1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=i386-pc-win32 -mattr=+avx512bw | FileCheck %s --check-prefix=X32
3 ; RUN: llc < %s -mtriple=x86_64-win32 -mattr=+avx512bw | FileCheck %s --check-prefix=CHECK64 --check-prefix=WIN64
4 ; RUN: llc < %s -mtriple=x86_64-linux-gnu -mattr=+avx512bw | FileCheck %s --check-prefix=CHECK64 --check-prefix=LINUXOSX64
6 ; Test regcall when receiving arguments of v64i1 type
7 define dso_local x86_regcallcc i64 @test_argv64i1(<64 x i1> %x0, <64 x i1> %x1, <64 x i1> %x2, <64 x i1> %x3, <64 x i1> %x4, <64 x i1> %x5, <64 x i1> %x6, <64 x i1> %x7, <64 x i1> %x8, <64 x i1> %x9, <64 x i1> %x10, <64 x i1> %x11, <64 x i1> %x12) {
8 ; X32-LABEL: test_argv64i1:
10 ; X32-NEXT: addl %edx, %eax
11 ; X32-NEXT: adcl %edi, %ecx
12 ; X32-NEXT: addl {{[0-9]+}}(%esp), %eax
13 ; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx
14 ; X32-NEXT: addl {{[0-9]+}}(%esp), %eax
15 ; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx
16 ; X32-NEXT: addl {{[0-9]+}}(%esp), %eax
17 ; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx
18 ; X32-NEXT: addl {{[0-9]+}}(%esp), %eax
19 ; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx
20 ; X32-NEXT: addl {{[0-9]+}}(%esp), %eax
21 ; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx
22 ; X32-NEXT: addl {{[0-9]+}}(%esp), %eax
23 ; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx
24 ; X32-NEXT: addl {{[0-9]+}}(%esp), %eax
25 ; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx
26 ; X32-NEXT: addl {{[0-9]+}}(%esp), %eax
27 ; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx
28 ; X32-NEXT: addl {{[0-9]+}}(%esp), %eax
29 ; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx
30 ; X32-NEXT: addl {{[0-9]+}}(%esp), %eax
31 ; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx
32 ; X32-NEXT: addl {{[0-9]+}}(%esp), %eax
33 ; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx
36 ; WIN64-LABEL: test_argv64i1:
38 ; WIN64-NEXT: addq %rcx, %rax
39 ; WIN64-NEXT: addq %rdx, %rax
40 ; WIN64-NEXT: addq %rdi, %rax
41 ; WIN64-NEXT: addq %rsi, %rax
42 ; WIN64-NEXT: addq %r8, %rax
43 ; WIN64-NEXT: addq %r9, %rax
44 ; WIN64-NEXT: addq %r10, %rax
45 ; WIN64-NEXT: addq %r11, %rax
46 ; WIN64-NEXT: addq %r12, %rax
47 ; WIN64-NEXT: addq %r14, %rax
48 ; WIN64-NEXT: addq %r15, %rax
49 ; WIN64-NEXT: addq {{[0-9]+}}(%rsp), %rax
52 ; LINUXOSX64-LABEL: test_argv64i1:
53 ; LINUXOSX64: # %bb.0:
54 ; LINUXOSX64-NEXT: addq %rcx, %rax
55 ; LINUXOSX64-NEXT: addq %rdx, %rax
56 ; LINUXOSX64-NEXT: addq %rdi, %rax
57 ; LINUXOSX64-NEXT: addq %rsi, %rax
58 ; LINUXOSX64-NEXT: addq %r8, %rax
59 ; LINUXOSX64-NEXT: addq %r9, %rax
60 ; LINUXOSX64-NEXT: addq %r12, %rax
61 ; LINUXOSX64-NEXT: addq %r13, %rax
62 ; LINUXOSX64-NEXT: addq %r14, %rax
63 ; LINUXOSX64-NEXT: addq %r15, %rax
64 ; LINUXOSX64-NEXT: addq {{[0-9]+}}(%rsp), %rax
65 ; LINUXOSX64-NEXT: addq {{[0-9]+}}(%rsp), %rax
66 ; LINUXOSX64-NEXT: retq
67 %y0 = bitcast <64 x i1> %x0 to i64
68 %y1 = bitcast <64 x i1> %x1 to i64
69 %y2 = bitcast <64 x i1> %x2 to i64
70 %y3 = bitcast <64 x i1> %x3 to i64
71 %y4 = bitcast <64 x i1> %x4 to i64
72 %y5 = bitcast <64 x i1> %x5 to i64
73 %y6 = bitcast <64 x i1> %x6 to i64
74 %y7 = bitcast <64 x i1> %x7 to i64
75 %y8 = bitcast <64 x i1> %x8 to i64
76 %y9 = bitcast <64 x i1> %x9 to i64
77 %y10 = bitcast <64 x i1> %x10 to i64
78 %y11 = bitcast <64 x i1> %x11 to i64
79 %y12 = bitcast <64 x i1> %x12 to i64
80 %add1 = add i64 %y0, %y1
81 %add2 = add i64 %add1, %y2
82 %add3 = add i64 %add2, %y3
83 %add4 = add i64 %add3, %y4
84 %add5 = add i64 %add4, %y5
85 %add6 = add i64 %add5, %y6
86 %add7 = add i64 %add6, %y7
87 %add8 = add i64 %add7, %y8
88 %add9 = add i64 %add8, %y9
89 %add10 = add i64 %add9, %y10
90 %add11 = add i64 %add10, %y11
91 %add12 = add i64 %add11, %y12
95 ; Test regcall when passing arguments of v64i1 type
96 define dso_local i64 @caller_argv64i1() #0 {
97 ; X32-LABEL: caller_argv64i1:
98 ; X32: # %bb.0: # %entry
99 ; X32-NEXT: pushl %edi
100 ; X32-NEXT: subl $88, %esp
101 ; X32-NEXT: vmovaps {{.*#+}} xmm0 = [2,1,2,1]
102 ; X32-NEXT: vmovups %xmm0, {{[0-9]+}}(%esp)
103 ; X32-NEXT: vmovaps {{.*#+}} zmm0 = [2,1,2,1,2,1,2,1,2,1,2,1,2,1,2,1]
104 ; X32-NEXT: vmovups %zmm0, (%esp)
105 ; X32-NEXT: movl $1, {{[0-9]+}}(%esp)
106 ; X32-NEXT: movl $2, {{[0-9]+}}(%esp)
107 ; X32-NEXT: movl $2, %eax
108 ; X32-NEXT: movl $1, %ecx
109 ; X32-NEXT: movl $2, %edx
110 ; X32-NEXT: movl $1, %edi
111 ; X32-NEXT: vzeroupper
112 ; X32-NEXT: calll _test_argv64i1
113 ; X32-NEXT: movl %ecx, %edx
114 ; X32-NEXT: addl $88, %esp
115 ; X32-NEXT: popl %edi
118 ; WIN64-LABEL: caller_argv64i1:
119 ; WIN64: # %bb.0: # %entry
120 ; WIN64-NEXT: pushq %r15
121 ; WIN64-NEXT: .seh_pushreg %r15
122 ; WIN64-NEXT: pushq %r14
123 ; WIN64-NEXT: .seh_pushreg %r14
124 ; WIN64-NEXT: pushq %r12
125 ; WIN64-NEXT: .seh_pushreg %r12
126 ; WIN64-NEXT: pushq %rsi
127 ; WIN64-NEXT: .seh_pushreg %rsi
128 ; WIN64-NEXT: pushq %rdi
129 ; WIN64-NEXT: .seh_pushreg %rdi
130 ; WIN64-NEXT: subq $48, %rsp
131 ; WIN64-NEXT: .seh_stackalloc 48
132 ; WIN64-NEXT: vmovaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
133 ; WIN64-NEXT: .seh_savexmm %xmm7, 32
134 ; WIN64-NEXT: vmovaps %xmm6, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
135 ; WIN64-NEXT: .seh_savexmm %xmm6, 16
136 ; WIN64-NEXT: .seh_endprologue
137 ; WIN64-NEXT: movabsq $4294967298, %rax # imm = 0x100000002
138 ; WIN64-NEXT: movq %rax, (%rsp)
139 ; WIN64-NEXT: movq %rax, %rcx
140 ; WIN64-NEXT: movq %rax, %rdx
141 ; WIN64-NEXT: movq %rax, %rdi
142 ; WIN64-NEXT: movq %rax, %r8
143 ; WIN64-NEXT: movq %rax, %r9
144 ; WIN64-NEXT: movq %rax, %r10
145 ; WIN64-NEXT: movq %rax, %r11
146 ; WIN64-NEXT: movq %rax, %r12
147 ; WIN64-NEXT: movq %rax, %r14
148 ; WIN64-NEXT: movq %rax, %r15
149 ; WIN64-NEXT: movq %rax, %rsi
150 ; WIN64-NEXT: callq test_argv64i1
151 ; WIN64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm6 # 16-byte Reload
152 ; WIN64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 16-byte Reload
153 ; WIN64-NEXT: addq $48, %rsp
154 ; WIN64-NEXT: popq %rdi
155 ; WIN64-NEXT: popq %rsi
156 ; WIN64-NEXT: popq %r12
157 ; WIN64-NEXT: popq %r14
158 ; WIN64-NEXT: popq %r15
160 ; WIN64-NEXT: .seh_endproc
162 ; LINUXOSX64-LABEL: caller_argv64i1:
163 ; LINUXOSX64: # %bb.0: # %entry
164 ; LINUXOSX64-NEXT: pushq %r15
165 ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16
166 ; LINUXOSX64-NEXT: pushq %r14
167 ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 24
168 ; LINUXOSX64-NEXT: pushq %r13
169 ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 32
170 ; LINUXOSX64-NEXT: pushq %r12
171 ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 40
172 ; LINUXOSX64-NEXT: pushq %rax
173 ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 48
174 ; LINUXOSX64-NEXT: .cfi_offset %r12, -40
175 ; LINUXOSX64-NEXT: .cfi_offset %r13, -32
176 ; LINUXOSX64-NEXT: .cfi_offset %r14, -24
177 ; LINUXOSX64-NEXT: .cfi_offset %r15, -16
178 ; LINUXOSX64-NEXT: movabsq $4294967298, %rax # imm = 0x100000002
179 ; LINUXOSX64-NEXT: movq %rax, %rcx
180 ; LINUXOSX64-NEXT: movq %rax, %rdx
181 ; LINUXOSX64-NEXT: movq %rax, %rdi
182 ; LINUXOSX64-NEXT: movq %rax, %r8
183 ; LINUXOSX64-NEXT: movq %rax, %r9
184 ; LINUXOSX64-NEXT: movq %rax, %r12
185 ; LINUXOSX64-NEXT: movq %rax, %r13
186 ; LINUXOSX64-NEXT: movq %rax, %r14
187 ; LINUXOSX64-NEXT: movq %rax, %r15
188 ; LINUXOSX64-NEXT: movq %rax, %rsi
189 ; LINUXOSX64-NEXT: pushq %rax
190 ; LINUXOSX64-NEXT: .cfi_adjust_cfa_offset 8
191 ; LINUXOSX64-NEXT: pushq %rax
192 ; LINUXOSX64-NEXT: .cfi_adjust_cfa_offset 8
193 ; LINUXOSX64-NEXT: callq test_argv64i1
194 ; LINUXOSX64-NEXT: addq $24, %rsp
195 ; LINUXOSX64-NEXT: .cfi_adjust_cfa_offset -24
196 ; LINUXOSX64-NEXT: popq %r12
197 ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 32
198 ; LINUXOSX64-NEXT: popq %r13
199 ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 24
200 ; LINUXOSX64-NEXT: popq %r14
201 ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16
202 ; LINUXOSX64-NEXT: popq %r15
203 ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8
204 ; LINUXOSX64-NEXT: retq
206 %v0 = bitcast i64 4294967298 to <64 x i1>
207 %call = call x86_regcallcc i64 @test_argv64i1(<64 x i1> %v0, <64 x i1> %v0, <64 x i1> %v0,
208 <64 x i1> %v0, <64 x i1> %v0, <64 x i1> %v0,
209 <64 x i1> %v0, <64 x i1> %v0, <64 x i1> %v0,
210 <64 x i1> %v0, <64 x i1> %v0, <64 x i1> %v0,
215 ; Test regcall when returning v64i1 type
216 define dso_local x86_regcallcc <64 x i1> @test_retv64i1() {
217 ; X32-LABEL: test_retv64i1:
219 ; X32-NEXT: movl $2, %eax
220 ; X32-NEXT: movl $1, %ecx
223 ; CHECK64-LABEL: test_retv64i1:
225 ; CHECK64-NEXT: movabsq $4294967298, %rax # imm = 0x100000002
227 %a = bitcast i64 4294967298 to <64 x i1>
231 ; Test regcall when processing result of v64i1 type
232 define dso_local <64 x i1> @caller_retv64i1() #0 {
233 ; X32-LABEL: caller_retv64i1:
234 ; X32: # %bb.0: # %entry
235 ; X32-NEXT: calll _test_retv64i1
236 ; X32-NEXT: kmovd %eax, %k0
237 ; X32-NEXT: kmovd %ecx, %k1
238 ; X32-NEXT: kunpckdq %k0, %k1, %k0
239 ; X32-NEXT: vpmovm2b %k0, %zmm0
242 ; WIN64-LABEL: caller_retv64i1:
243 ; WIN64: # %bb.0: # %entry
244 ; WIN64-NEXT: pushq %rsi
245 ; WIN64-NEXT: .seh_pushreg %rsi
246 ; WIN64-NEXT: pushq %rdi
247 ; WIN64-NEXT: .seh_pushreg %rdi
248 ; WIN64-NEXT: subq $40, %rsp
249 ; WIN64-NEXT: .seh_stackalloc 40
250 ; WIN64-NEXT: vmovaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
251 ; WIN64-NEXT: .seh_savexmm %xmm7, 16
252 ; WIN64-NEXT: vmovaps %xmm6, (%rsp) # 16-byte Spill
253 ; WIN64-NEXT: .seh_savexmm %xmm6, 0
254 ; WIN64-NEXT: .seh_endprologue
255 ; WIN64-NEXT: callq test_retv64i1
256 ; WIN64-NEXT: kmovq %rax, %k0
257 ; WIN64-NEXT: vpmovm2b %k0, %zmm0
258 ; WIN64-NEXT: vmovaps (%rsp), %xmm6 # 16-byte Reload
259 ; WIN64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 16-byte Reload
260 ; WIN64-NEXT: addq $40, %rsp
261 ; WIN64-NEXT: popq %rdi
262 ; WIN64-NEXT: popq %rsi
264 ; WIN64-NEXT: .seh_endproc
266 ; LINUXOSX64-LABEL: caller_retv64i1:
267 ; LINUXOSX64: # %bb.0: # %entry
268 ; LINUXOSX64-NEXT: pushq %rax
269 ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16
270 ; LINUXOSX64-NEXT: callq test_retv64i1
271 ; LINUXOSX64-NEXT: kmovq %rax, %k0
272 ; LINUXOSX64-NEXT: vpmovm2b %k0, %zmm0
273 ; LINUXOSX64-NEXT: popq %rax
274 ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8
275 ; LINUXOSX64-NEXT: retq
277 %call = call x86_regcallcc <64 x i1> @test_retv64i1()
281 ; Test regcall when receiving arguments of v32i1 type
282 declare i32 @test_argv32i1helper(<32 x i1> %x0, <32 x i1> %x1, <32 x i1> %x2)
283 define dso_local x86_regcallcc i32 @test_argv32i1(<32 x i1> %x0, <32 x i1> %x1, <32 x i1> %x2) {
284 ; X32-LABEL: test_argv32i1:
285 ; X32: # %bb.0: # %entry
286 ; X32-NEXT: subl $76, %esp
287 ; X32-NEXT: vmovups %xmm7, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
288 ; X32-NEXT: vmovups %xmm6, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
289 ; X32-NEXT: vmovups %xmm5, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
290 ; X32-NEXT: vmovups %xmm4, (%esp) # 16-byte Spill
291 ; X32-NEXT: kmovd %edx, %k0
292 ; X32-NEXT: kmovd %ecx, %k1
293 ; X32-NEXT: kmovd %eax, %k2
294 ; X32-NEXT: vpmovm2b %k2, %zmm0
295 ; X32-NEXT: vpmovm2b %k1, %zmm1
296 ; X32-NEXT: vpmovm2b %k0, %zmm2
297 ; X32-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
298 ; X32-NEXT: # kill: def $ymm1 killed $ymm1 killed $zmm1
299 ; X32-NEXT: # kill: def $ymm2 killed $ymm2 killed $zmm2
300 ; X32-NEXT: calll _test_argv32i1helper
301 ; X32-NEXT: vmovups (%esp), %xmm4 # 16-byte Reload
302 ; X32-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm5 # 16-byte Reload
303 ; X32-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm6 # 16-byte Reload
304 ; X32-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm7 # 16-byte Reload
305 ; X32-NEXT: addl $76, %esp
306 ; X32-NEXT: vzeroupper
309 ; WIN64-LABEL: test_argv32i1:
310 ; WIN64: # %bb.0: # %entry
311 ; WIN64-NEXT: pushq %rbp
312 ; WIN64-NEXT: .seh_pushreg %rbp
313 ; WIN64-NEXT: pushq %r11
314 ; WIN64-NEXT: .seh_pushreg %r11
315 ; WIN64-NEXT: pushq %r10
316 ; WIN64-NEXT: .seh_pushreg %r10
317 ; WIN64-NEXT: subq $128, %rsp
318 ; WIN64-NEXT: .seh_stackalloc 128
319 ; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rbp
320 ; WIN64-NEXT: .seh_setframe %rbp, 128
321 ; WIN64-NEXT: .seh_endprologue
322 ; WIN64-NEXT: andq $-32, %rsp
323 ; WIN64-NEXT: kmovd %edx, %k0
324 ; WIN64-NEXT: kmovd %eax, %k1
325 ; WIN64-NEXT: kmovd %ecx, %k2
326 ; WIN64-NEXT: vpmovm2b %k2, %zmm0
327 ; WIN64-NEXT: vmovdqa %ymm0, {{[0-9]+}}(%rsp)
328 ; WIN64-NEXT: vpmovm2b %k1, %zmm0
329 ; WIN64-NEXT: vmovdqa %ymm0, {{[0-9]+}}(%rsp)
330 ; WIN64-NEXT: vpmovm2b %k0, %zmm0
331 ; WIN64-NEXT: vmovdqa %ymm0, {{[0-9]+}}(%rsp)
332 ; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
333 ; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rdx
334 ; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %r8
335 ; WIN64-NEXT: vzeroupper
336 ; WIN64-NEXT: callq test_argv32i1helper
338 ; WIN64-NEXT: movq %rbp, %rsp
339 ; WIN64-NEXT: popq %r10
340 ; WIN64-NEXT: popq %r11
341 ; WIN64-NEXT: popq %rbp
343 ; WIN64-NEXT: .seh_endproc
345 ; LINUXOSX64-LABEL: test_argv32i1:
346 ; LINUXOSX64: # %bb.0: # %entry
347 ; LINUXOSX64-NEXT: subq $136, %rsp
348 ; LINUXOSX64-NEXT: vmovaps %xmm15, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
349 ; LINUXOSX64-NEXT: vmovaps %xmm14, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
350 ; LINUXOSX64-NEXT: vmovaps %xmm13, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
351 ; LINUXOSX64-NEXT: vmovaps %xmm12, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
352 ; LINUXOSX64-NEXT: vmovaps %xmm11, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
353 ; LINUXOSX64-NEXT: vmovaps %xmm10, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
354 ; LINUXOSX64-NEXT: vmovaps %xmm9, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
355 ; LINUXOSX64-NEXT: vmovaps %xmm8, (%rsp) # 16-byte Spill
356 ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 144
357 ; LINUXOSX64-NEXT: .cfi_offset %xmm8, -144
358 ; LINUXOSX64-NEXT: .cfi_offset %xmm9, -128
359 ; LINUXOSX64-NEXT: .cfi_offset %xmm10, -112
360 ; LINUXOSX64-NEXT: .cfi_offset %xmm11, -96
361 ; LINUXOSX64-NEXT: .cfi_offset %xmm12, -80
362 ; LINUXOSX64-NEXT: .cfi_offset %xmm13, -64
363 ; LINUXOSX64-NEXT: .cfi_offset %xmm14, -48
364 ; LINUXOSX64-NEXT: .cfi_offset %xmm15, -32
365 ; LINUXOSX64-NEXT: kmovd %edx, %k0
366 ; LINUXOSX64-NEXT: kmovd %ecx, %k1
367 ; LINUXOSX64-NEXT: kmovd %eax, %k2
368 ; LINUXOSX64-NEXT: vpmovm2b %k2, %zmm0
369 ; LINUXOSX64-NEXT: vpmovm2b %k1, %zmm1
370 ; LINUXOSX64-NEXT: vpmovm2b %k0, %zmm2
371 ; LINUXOSX64-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
372 ; LINUXOSX64-NEXT: # kill: def $ymm1 killed $ymm1 killed $zmm1
373 ; LINUXOSX64-NEXT: # kill: def $ymm2 killed $ymm2 killed $zmm2
374 ; LINUXOSX64-NEXT: callq test_argv32i1helper@PLT
375 ; LINUXOSX64-NEXT: vmovaps (%rsp), %xmm8 # 16-byte Reload
376 ; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm9 # 16-byte Reload
377 ; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm10 # 16-byte Reload
378 ; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm11 # 16-byte Reload
379 ; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm12 # 16-byte Reload
380 ; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm13 # 16-byte Reload
381 ; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm14 # 16-byte Reload
382 ; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm15 # 16-byte Reload
383 ; LINUXOSX64-NEXT: addq $136, %rsp
384 ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8
385 ; LINUXOSX64-NEXT: vzeroupper
386 ; LINUXOSX64-NEXT: retq
388 %res = call i32 @test_argv32i1helper(<32 x i1> %x0, <32 x i1> %x1, <32 x i1> %x2)
392 ; Test regcall when passing arguments of v32i1 type
393 define dso_local i32 @caller_argv32i1() #0 {
394 ; X32-LABEL: caller_argv32i1:
395 ; X32: # %bb.0: # %entry
396 ; X32-NEXT: movl $1, %eax
397 ; X32-NEXT: movl $1, %ecx
398 ; X32-NEXT: movl $1, %edx
399 ; X32-NEXT: calll _test_argv32i1
402 ; WIN64-LABEL: caller_argv32i1:
403 ; WIN64: # %bb.0: # %entry
404 ; WIN64-NEXT: pushq %rsi
405 ; WIN64-NEXT: .seh_pushreg %rsi
406 ; WIN64-NEXT: pushq %rdi
407 ; WIN64-NEXT: .seh_pushreg %rdi
408 ; WIN64-NEXT: subq $40, %rsp
409 ; WIN64-NEXT: .seh_stackalloc 40
410 ; WIN64-NEXT: vmovaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
411 ; WIN64-NEXT: .seh_savexmm %xmm7, 16
412 ; WIN64-NEXT: vmovaps %xmm6, (%rsp) # 16-byte Spill
413 ; WIN64-NEXT: .seh_savexmm %xmm6, 0
414 ; WIN64-NEXT: .seh_endprologue
415 ; WIN64-NEXT: movl $1, %eax
416 ; WIN64-NEXT: movl $1, %ecx
417 ; WIN64-NEXT: movl $1, %edx
418 ; WIN64-NEXT: callq test_argv32i1
419 ; WIN64-NEXT: vmovaps (%rsp), %xmm6 # 16-byte Reload
420 ; WIN64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 16-byte Reload
421 ; WIN64-NEXT: addq $40, %rsp
422 ; WIN64-NEXT: popq %rdi
423 ; WIN64-NEXT: popq %rsi
425 ; WIN64-NEXT: .seh_endproc
427 ; LINUXOSX64-LABEL: caller_argv32i1:
428 ; LINUXOSX64: # %bb.0: # %entry
429 ; LINUXOSX64-NEXT: pushq %rax
430 ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16
431 ; LINUXOSX64-NEXT: movl $1, %eax
432 ; LINUXOSX64-NEXT: movl $1, %ecx
433 ; LINUXOSX64-NEXT: movl $1, %edx
434 ; LINUXOSX64-NEXT: callq test_argv32i1
435 ; LINUXOSX64-NEXT: popq %rcx
436 ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8
437 ; LINUXOSX64-NEXT: retq
439 %v0 = bitcast i32 1 to <32 x i1>
440 %call = call x86_regcallcc i32 @test_argv32i1(<32 x i1> %v0, <32 x i1> %v0, <32 x i1> %v0)
444 ; Test regcall when returning v32i1 type
445 define dso_local x86_regcallcc <32 x i1> @test_retv32i1() {
446 ; X32-LABEL: test_retv32i1:
448 ; X32-NEXT: movl $1, %eax
451 ; CHECK64-LABEL: test_retv32i1:
453 ; CHECK64-NEXT: movl $1, %eax
455 %a = bitcast i32 1 to <32 x i1>
459 ; Test regcall when processing result of v32i1 type
460 define dso_local i32 @caller_retv32i1() #0 {
461 ; X32-LABEL: caller_retv32i1:
462 ; X32: # %bb.0: # %entry
463 ; X32-NEXT: calll _test_retv32i1
464 ; X32-NEXT: incl %eax
467 ; WIN64-LABEL: caller_retv32i1:
468 ; WIN64: # %bb.0: # %entry
469 ; WIN64-NEXT: pushq %rsi
470 ; WIN64-NEXT: .seh_pushreg %rsi
471 ; WIN64-NEXT: pushq %rdi
472 ; WIN64-NEXT: .seh_pushreg %rdi
473 ; WIN64-NEXT: subq $40, %rsp
474 ; WIN64-NEXT: .seh_stackalloc 40
475 ; WIN64-NEXT: vmovaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
476 ; WIN64-NEXT: .seh_savexmm %xmm7, 16
477 ; WIN64-NEXT: vmovaps %xmm6, (%rsp) # 16-byte Spill
478 ; WIN64-NEXT: .seh_savexmm %xmm6, 0
479 ; WIN64-NEXT: .seh_endprologue
480 ; WIN64-NEXT: callq test_retv32i1
481 ; WIN64-NEXT: incl %eax
482 ; WIN64-NEXT: vmovaps (%rsp), %xmm6 # 16-byte Reload
483 ; WIN64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 16-byte Reload
484 ; WIN64-NEXT: addq $40, %rsp
485 ; WIN64-NEXT: popq %rdi
486 ; WIN64-NEXT: popq %rsi
488 ; WIN64-NEXT: .seh_endproc
490 ; LINUXOSX64-LABEL: caller_retv32i1:
491 ; LINUXOSX64: # %bb.0: # %entry
492 ; LINUXOSX64-NEXT: pushq %rax
493 ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16
494 ; LINUXOSX64-NEXT: callq test_retv32i1
495 ; LINUXOSX64-NEXT: incl %eax
496 ; LINUXOSX64-NEXT: popq %rcx
497 ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8
498 ; LINUXOSX64-NEXT: retq
500 %call = call x86_regcallcc <32 x i1> @test_retv32i1()
501 %c = bitcast <32 x i1> %call to i32
506 ; Test regcall when receiving arguments of v16i1 type
507 declare i16 @test_argv16i1helper(<16 x i1> %x0, <16 x i1> %x1, <16 x i1> %x2)
508 define dso_local x86_regcallcc i16 @test_argv16i1(<16 x i1> %x0, <16 x i1> %x1, <16 x i1> %x2) {
509 ; X32-LABEL: test_argv16i1:
511 ; X32-NEXT: subl $76, %esp
512 ; X32-NEXT: vmovups %xmm7, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
513 ; X32-NEXT: vmovups %xmm6, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
514 ; X32-NEXT: vmovups %xmm5, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
515 ; X32-NEXT: vmovups %xmm4, (%esp) # 16-byte Spill
516 ; X32-NEXT: kmovd %edx, %k0
517 ; X32-NEXT: kmovd %ecx, %k1
518 ; X32-NEXT: kmovd %eax, %k2
519 ; X32-NEXT: vpmovm2b %k2, %zmm0
520 ; X32-NEXT: vpmovm2b %k1, %zmm1
521 ; X32-NEXT: vpmovm2b %k0, %zmm2
522 ; X32-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
523 ; X32-NEXT: # kill: def $xmm1 killed $xmm1 killed $zmm1
524 ; X32-NEXT: # kill: def $xmm2 killed $xmm2 killed $zmm2
525 ; X32-NEXT: vzeroupper
526 ; X32-NEXT: calll _test_argv16i1helper
527 ; X32-NEXT: vmovups (%esp), %xmm4 # 16-byte Reload
528 ; X32-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm5 # 16-byte Reload
529 ; X32-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm6 # 16-byte Reload
530 ; X32-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm7 # 16-byte Reload
531 ; X32-NEXT: addl $76, %esp
534 ; WIN64-LABEL: test_argv16i1:
536 ; WIN64-NEXT: pushq %r11
537 ; WIN64-NEXT: .seh_pushreg %r11
538 ; WIN64-NEXT: pushq %r10
539 ; WIN64-NEXT: .seh_pushreg %r10
540 ; WIN64-NEXT: subq $88, %rsp
541 ; WIN64-NEXT: .seh_stackalloc 88
542 ; WIN64-NEXT: .seh_endprologue
543 ; WIN64-NEXT: kmovd %edx, %k0
544 ; WIN64-NEXT: kmovd %eax, %k1
545 ; WIN64-NEXT: kmovd %ecx, %k2
546 ; WIN64-NEXT: vpmovm2b %k2, %zmm0
547 ; WIN64-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp)
548 ; WIN64-NEXT: vpmovm2b %k1, %zmm0
549 ; WIN64-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp)
550 ; WIN64-NEXT: vpmovm2b %k0, %zmm0
551 ; WIN64-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp)
552 ; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
553 ; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rdx
554 ; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %r8
555 ; WIN64-NEXT: vzeroupper
556 ; WIN64-NEXT: callq test_argv16i1helper
558 ; WIN64-NEXT: addq $88, %rsp
559 ; WIN64-NEXT: popq %r10
560 ; WIN64-NEXT: popq %r11
562 ; WIN64-NEXT: .seh_endproc
564 ; LINUXOSX64-LABEL: test_argv16i1:
565 ; LINUXOSX64: # %bb.0:
566 ; LINUXOSX64-NEXT: subq $136, %rsp
567 ; LINUXOSX64-NEXT: vmovaps %xmm15, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
568 ; LINUXOSX64-NEXT: vmovaps %xmm14, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
569 ; LINUXOSX64-NEXT: vmovaps %xmm13, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
570 ; LINUXOSX64-NEXT: vmovaps %xmm12, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
571 ; LINUXOSX64-NEXT: vmovaps %xmm11, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
572 ; LINUXOSX64-NEXT: vmovaps %xmm10, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
573 ; LINUXOSX64-NEXT: vmovaps %xmm9, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
574 ; LINUXOSX64-NEXT: vmovaps %xmm8, (%rsp) # 16-byte Spill
575 ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 144
576 ; LINUXOSX64-NEXT: .cfi_offset %xmm8, -144
577 ; LINUXOSX64-NEXT: .cfi_offset %xmm9, -128
578 ; LINUXOSX64-NEXT: .cfi_offset %xmm10, -112
579 ; LINUXOSX64-NEXT: .cfi_offset %xmm11, -96
580 ; LINUXOSX64-NEXT: .cfi_offset %xmm12, -80
581 ; LINUXOSX64-NEXT: .cfi_offset %xmm13, -64
582 ; LINUXOSX64-NEXT: .cfi_offset %xmm14, -48
583 ; LINUXOSX64-NEXT: .cfi_offset %xmm15, -32
584 ; LINUXOSX64-NEXT: kmovd %edx, %k0
585 ; LINUXOSX64-NEXT: kmovd %ecx, %k1
586 ; LINUXOSX64-NEXT: kmovd %eax, %k2
587 ; LINUXOSX64-NEXT: vpmovm2b %k2, %zmm0
588 ; LINUXOSX64-NEXT: vpmovm2b %k1, %zmm1
589 ; LINUXOSX64-NEXT: vpmovm2b %k0, %zmm2
590 ; LINUXOSX64-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
591 ; LINUXOSX64-NEXT: # kill: def $xmm1 killed $xmm1 killed $zmm1
592 ; LINUXOSX64-NEXT: # kill: def $xmm2 killed $xmm2 killed $zmm2
593 ; LINUXOSX64-NEXT: vzeroupper
594 ; LINUXOSX64-NEXT: callq test_argv16i1helper@PLT
595 ; LINUXOSX64-NEXT: vmovaps (%rsp), %xmm8 # 16-byte Reload
596 ; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm9 # 16-byte Reload
597 ; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm10 # 16-byte Reload
598 ; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm11 # 16-byte Reload
599 ; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm12 # 16-byte Reload
600 ; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm13 # 16-byte Reload
601 ; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm14 # 16-byte Reload
602 ; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm15 # 16-byte Reload
603 ; LINUXOSX64-NEXT: addq $136, %rsp
604 ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8
605 ; LINUXOSX64-NEXT: retq
606 %res = call i16 @test_argv16i1helper(<16 x i1> %x0, <16 x i1> %x1, <16 x i1> %x2)
610 ; Test regcall when passing arguments of v16i1 type
611 define dso_local i16 @caller_argv16i1() #0 {
612 ; X32-LABEL: caller_argv16i1:
613 ; X32: # %bb.0: # %entry
614 ; X32-NEXT: movl $1, %eax
615 ; X32-NEXT: movl $1, %ecx
616 ; X32-NEXT: movl $1, %edx
617 ; X32-NEXT: calll _test_argv16i1
620 ; WIN64-LABEL: caller_argv16i1:
621 ; WIN64: # %bb.0: # %entry
622 ; WIN64-NEXT: pushq %rsi
623 ; WIN64-NEXT: .seh_pushreg %rsi
624 ; WIN64-NEXT: pushq %rdi
625 ; WIN64-NEXT: .seh_pushreg %rdi
626 ; WIN64-NEXT: subq $40, %rsp
627 ; WIN64-NEXT: .seh_stackalloc 40
628 ; WIN64-NEXT: vmovaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
629 ; WIN64-NEXT: .seh_savexmm %xmm7, 16
630 ; WIN64-NEXT: vmovaps %xmm6, (%rsp) # 16-byte Spill
631 ; WIN64-NEXT: .seh_savexmm %xmm6, 0
632 ; WIN64-NEXT: .seh_endprologue
633 ; WIN64-NEXT: movl $1, %eax
634 ; WIN64-NEXT: movl $1, %ecx
635 ; WIN64-NEXT: movl $1, %edx
636 ; WIN64-NEXT: callq test_argv16i1
637 ; WIN64-NEXT: vmovaps (%rsp), %xmm6 # 16-byte Reload
638 ; WIN64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 16-byte Reload
639 ; WIN64-NEXT: addq $40, %rsp
640 ; WIN64-NEXT: popq %rdi
641 ; WIN64-NEXT: popq %rsi
643 ; WIN64-NEXT: .seh_endproc
645 ; LINUXOSX64-LABEL: caller_argv16i1:
646 ; LINUXOSX64: # %bb.0: # %entry
647 ; LINUXOSX64-NEXT: pushq %rax
648 ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16
649 ; LINUXOSX64-NEXT: movl $1, %eax
650 ; LINUXOSX64-NEXT: movl $1, %ecx
651 ; LINUXOSX64-NEXT: movl $1, %edx
652 ; LINUXOSX64-NEXT: callq test_argv16i1
653 ; LINUXOSX64-NEXT: popq %rcx
654 ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8
655 ; LINUXOSX64-NEXT: retq
657 %v0 = bitcast i16 1 to <16 x i1>
658 %call = call x86_regcallcc i16 @test_argv16i1(<16 x i1> %v0, <16 x i1> %v0, <16 x i1> %v0)
662 ; Test regcall when returning v16i1 type
663 define dso_local x86_regcallcc <16 x i1> @test_retv16i1() {
664 ; X32-LABEL: test_retv16i1:
666 ; X32-NEXT: movw $1, %ax
669 ; CHECK64-LABEL: test_retv16i1:
671 ; CHECK64-NEXT: movw $1, %ax
673 %a = bitcast i16 1 to <16 x i1>
677 ; Test regcall when processing result of v16i1 type
678 define dso_local i16 @caller_retv16i1() #0 {
679 ; X32-LABEL: caller_retv16i1:
680 ; X32: # %bb.0: # %entry
681 ; X32-NEXT: calll _test_retv16i1
682 ; X32-NEXT: # kill: def $ax killed $ax def $eax
683 ; X32-NEXT: incl %eax
684 ; X32-NEXT: # kill: def $ax killed $ax killed $eax
687 ; WIN64-LABEL: caller_retv16i1:
688 ; WIN64: # %bb.0: # %entry
689 ; WIN64-NEXT: pushq %rsi
690 ; WIN64-NEXT: .seh_pushreg %rsi
691 ; WIN64-NEXT: pushq %rdi
692 ; WIN64-NEXT: .seh_pushreg %rdi
693 ; WIN64-NEXT: subq $40, %rsp
694 ; WIN64-NEXT: .seh_stackalloc 40
695 ; WIN64-NEXT: vmovaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
696 ; WIN64-NEXT: .seh_savexmm %xmm7, 16
697 ; WIN64-NEXT: vmovaps %xmm6, (%rsp) # 16-byte Spill
698 ; WIN64-NEXT: .seh_savexmm %xmm6, 0
699 ; WIN64-NEXT: .seh_endprologue
700 ; WIN64-NEXT: callq test_retv16i1
701 ; WIN64-NEXT: # kill: def $ax killed $ax def $eax
702 ; WIN64-NEXT: incl %eax
703 ; WIN64-NEXT: # kill: def $ax killed $ax killed $eax
704 ; WIN64-NEXT: vmovaps (%rsp), %xmm6 # 16-byte Reload
705 ; WIN64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 16-byte Reload
706 ; WIN64-NEXT: addq $40, %rsp
707 ; WIN64-NEXT: popq %rdi
708 ; WIN64-NEXT: popq %rsi
710 ; WIN64-NEXT: .seh_endproc
712 ; LINUXOSX64-LABEL: caller_retv16i1:
713 ; LINUXOSX64: # %bb.0: # %entry
714 ; LINUXOSX64-NEXT: pushq %rax
715 ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16
716 ; LINUXOSX64-NEXT: callq test_retv16i1
717 ; LINUXOSX64-NEXT: # kill: def $ax killed $ax def $eax
718 ; LINUXOSX64-NEXT: incl %eax
719 ; LINUXOSX64-NEXT: # kill: def $ax killed $ax killed $eax
720 ; LINUXOSX64-NEXT: popq %rcx
721 ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8
722 ; LINUXOSX64-NEXT: retq
724 %call = call x86_regcallcc <16 x i1> @test_retv16i1()
725 %c = bitcast <16 x i1> %call to i16
730 ; Test regcall when receiving arguments of v8i1 type
731 declare i8 @test_argv8i1helper(<8 x i1> %x0, <8 x i1> %x1, <8 x i1> %x2)
732 define dso_local x86_regcallcc i8 @test_argv8i1(<8 x i1> %x0, <8 x i1> %x1, <8 x i1> %x2) {
733 ; X32-LABEL: test_argv8i1:
735 ; X32-NEXT: subl $76, %esp
736 ; X32-NEXT: vmovups %xmm7, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
737 ; X32-NEXT: vmovups %xmm6, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
738 ; X32-NEXT: vmovups %xmm5, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
739 ; X32-NEXT: vmovups %xmm4, (%esp) # 16-byte Spill
740 ; X32-NEXT: kmovd %edx, %k0
741 ; X32-NEXT: kmovd %ecx, %k1
742 ; X32-NEXT: kmovd %eax, %k2
743 ; X32-NEXT: vpmovm2w %k2, %zmm0
744 ; X32-NEXT: vpmovm2w %k1, %zmm1
745 ; X32-NEXT: vpmovm2w %k0, %zmm2
746 ; X32-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
747 ; X32-NEXT: # kill: def $xmm1 killed $xmm1 killed $zmm1
748 ; X32-NEXT: # kill: def $xmm2 killed $xmm2 killed $zmm2
749 ; X32-NEXT: vzeroupper
750 ; X32-NEXT: calll _test_argv8i1helper
751 ; X32-NEXT: vmovups (%esp), %xmm4 # 16-byte Reload
752 ; X32-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm5 # 16-byte Reload
753 ; X32-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm6 # 16-byte Reload
754 ; X32-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm7 # 16-byte Reload
755 ; X32-NEXT: addl $76, %esp
758 ; WIN64-LABEL: test_argv8i1:
760 ; WIN64-NEXT: pushq %r11
761 ; WIN64-NEXT: .seh_pushreg %r11
762 ; WIN64-NEXT: pushq %r10
763 ; WIN64-NEXT: .seh_pushreg %r10
764 ; WIN64-NEXT: subq $88, %rsp
765 ; WIN64-NEXT: .seh_stackalloc 88
766 ; WIN64-NEXT: .seh_endprologue
767 ; WIN64-NEXT: kmovd %edx, %k0
768 ; WIN64-NEXT: kmovd %eax, %k1
769 ; WIN64-NEXT: kmovd %ecx, %k2
770 ; WIN64-NEXT: vpmovm2w %k2, %zmm0
771 ; WIN64-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp)
772 ; WIN64-NEXT: vpmovm2w %k1, %zmm0
773 ; WIN64-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp)
774 ; WIN64-NEXT: vpmovm2w %k0, %zmm0
775 ; WIN64-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp)
776 ; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
777 ; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rdx
778 ; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %r8
779 ; WIN64-NEXT: vzeroupper
780 ; WIN64-NEXT: callq test_argv8i1helper
782 ; WIN64-NEXT: addq $88, %rsp
783 ; WIN64-NEXT: popq %r10
784 ; WIN64-NEXT: popq %r11
786 ; WIN64-NEXT: .seh_endproc
788 ; LINUXOSX64-LABEL: test_argv8i1:
789 ; LINUXOSX64: # %bb.0:
790 ; LINUXOSX64-NEXT: subq $136, %rsp
791 ; LINUXOSX64-NEXT: vmovaps %xmm15, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
792 ; LINUXOSX64-NEXT: vmovaps %xmm14, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
793 ; LINUXOSX64-NEXT: vmovaps %xmm13, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
794 ; LINUXOSX64-NEXT: vmovaps %xmm12, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
795 ; LINUXOSX64-NEXT: vmovaps %xmm11, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
796 ; LINUXOSX64-NEXT: vmovaps %xmm10, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
797 ; LINUXOSX64-NEXT: vmovaps %xmm9, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
798 ; LINUXOSX64-NEXT: vmovaps %xmm8, (%rsp) # 16-byte Spill
799 ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 144
800 ; LINUXOSX64-NEXT: .cfi_offset %xmm8, -144
801 ; LINUXOSX64-NEXT: .cfi_offset %xmm9, -128
802 ; LINUXOSX64-NEXT: .cfi_offset %xmm10, -112
803 ; LINUXOSX64-NEXT: .cfi_offset %xmm11, -96
804 ; LINUXOSX64-NEXT: .cfi_offset %xmm12, -80
805 ; LINUXOSX64-NEXT: .cfi_offset %xmm13, -64
806 ; LINUXOSX64-NEXT: .cfi_offset %xmm14, -48
807 ; LINUXOSX64-NEXT: .cfi_offset %xmm15, -32
808 ; LINUXOSX64-NEXT: kmovd %edx, %k0
809 ; LINUXOSX64-NEXT: kmovd %ecx, %k1
810 ; LINUXOSX64-NEXT: kmovd %eax, %k2
811 ; LINUXOSX64-NEXT: vpmovm2w %k2, %zmm0
812 ; LINUXOSX64-NEXT: vpmovm2w %k1, %zmm1
813 ; LINUXOSX64-NEXT: vpmovm2w %k0, %zmm2
814 ; LINUXOSX64-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
815 ; LINUXOSX64-NEXT: # kill: def $xmm1 killed $xmm1 killed $zmm1
816 ; LINUXOSX64-NEXT: # kill: def $xmm2 killed $xmm2 killed $zmm2
817 ; LINUXOSX64-NEXT: vzeroupper
818 ; LINUXOSX64-NEXT: callq test_argv8i1helper@PLT
819 ; LINUXOSX64-NEXT: vmovaps (%rsp), %xmm8 # 16-byte Reload
820 ; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm9 # 16-byte Reload
821 ; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm10 # 16-byte Reload
822 ; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm11 # 16-byte Reload
823 ; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm12 # 16-byte Reload
824 ; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm13 # 16-byte Reload
825 ; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm14 # 16-byte Reload
826 ; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm15 # 16-byte Reload
827 ; LINUXOSX64-NEXT: addq $136, %rsp
828 ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8
829 ; LINUXOSX64-NEXT: retq
830 %res = call i8 @test_argv8i1helper(<8 x i1> %x0, <8 x i1> %x1, <8 x i1> %x2)
834 ; Test regcall when passing arguments of v8i1 type
835 define dso_local i8 @caller_argv8i1() #0 {
836 ; X32-LABEL: caller_argv8i1:
837 ; X32: # %bb.0: # %entry
838 ; X32-NEXT: movl $1, %eax
839 ; X32-NEXT: movl $1, %ecx
840 ; X32-NEXT: movl $1, %edx
841 ; X32-NEXT: calll _test_argv8i1
844 ; WIN64-LABEL: caller_argv8i1:
845 ; WIN64: # %bb.0: # %entry
846 ; WIN64-NEXT: pushq %rsi
847 ; WIN64-NEXT: .seh_pushreg %rsi
848 ; WIN64-NEXT: pushq %rdi
849 ; WIN64-NEXT: .seh_pushreg %rdi
850 ; WIN64-NEXT: subq $40, %rsp
851 ; WIN64-NEXT: .seh_stackalloc 40
852 ; WIN64-NEXT: vmovaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
853 ; WIN64-NEXT: .seh_savexmm %xmm7, 16
854 ; WIN64-NEXT: vmovaps %xmm6, (%rsp) # 16-byte Spill
855 ; WIN64-NEXT: .seh_savexmm %xmm6, 0
856 ; WIN64-NEXT: .seh_endprologue
857 ; WIN64-NEXT: movl $1, %eax
858 ; WIN64-NEXT: movl $1, %ecx
859 ; WIN64-NEXT: movl $1, %edx
860 ; WIN64-NEXT: callq test_argv8i1
861 ; WIN64-NEXT: vmovaps (%rsp), %xmm6 # 16-byte Reload
862 ; WIN64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 16-byte Reload
863 ; WIN64-NEXT: addq $40, %rsp
864 ; WIN64-NEXT: popq %rdi
865 ; WIN64-NEXT: popq %rsi
867 ; WIN64-NEXT: .seh_endproc
869 ; LINUXOSX64-LABEL: caller_argv8i1:
870 ; LINUXOSX64: # %bb.0: # %entry
871 ; LINUXOSX64-NEXT: pushq %rax
872 ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16
873 ; LINUXOSX64-NEXT: movl $1, %eax
874 ; LINUXOSX64-NEXT: movl $1, %ecx
875 ; LINUXOSX64-NEXT: movl $1, %edx
876 ; LINUXOSX64-NEXT: callq test_argv8i1
877 ; LINUXOSX64-NEXT: popq %rcx
878 ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8
879 ; LINUXOSX64-NEXT: retq
881 %v0 = bitcast i8 1 to <8 x i1>
882 %call = call x86_regcallcc i8 @test_argv8i1(<8 x i1> %v0, <8 x i1> %v0, <8 x i1> %v0)
886 ; Test regcall when returning v8i1 type
887 define dso_local x86_regcallcc <8 x i1> @test_retv8i1() {
888 ; X32-LABEL: test_retv8i1:
890 ; X32-NEXT: movb $1, %al
893 ; CHECK64-LABEL: test_retv8i1:
895 ; CHECK64-NEXT: movb $1, %al
897 %a = bitcast i8 1 to <8 x i1>
901 ; Test regcall when processing result of v8i1 type
902 define dso_local <8 x i1> @caller_retv8i1() #0 {
903 ; X32-LABEL: caller_retv8i1:
904 ; X32: # %bb.0: # %entry
905 ; X32-NEXT: calll _test_retv8i1
906 ; X32-NEXT: # kill: def $al killed $al def $eax
907 ; X32-NEXT: kmovd %eax, %k0
908 ; X32-NEXT: vpmovm2w %k0, %zmm0
909 ; X32-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
910 ; X32-NEXT: vzeroupper
913 ; WIN64-LABEL: caller_retv8i1:
914 ; WIN64: # %bb.0: # %entry
915 ; WIN64-NEXT: pushq %rsi
916 ; WIN64-NEXT: .seh_pushreg %rsi
917 ; WIN64-NEXT: pushq %rdi
918 ; WIN64-NEXT: .seh_pushreg %rdi
919 ; WIN64-NEXT: subq $40, %rsp
920 ; WIN64-NEXT: .seh_stackalloc 40
921 ; WIN64-NEXT: vmovaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
922 ; WIN64-NEXT: .seh_savexmm %xmm7, 16
923 ; WIN64-NEXT: vmovaps %xmm6, (%rsp) # 16-byte Spill
924 ; WIN64-NEXT: .seh_savexmm %xmm6, 0
925 ; WIN64-NEXT: .seh_endprologue
926 ; WIN64-NEXT: callq test_retv8i1
927 ; WIN64-NEXT: # kill: def $al killed $al def $eax
928 ; WIN64-NEXT: kmovd %eax, %k0
929 ; WIN64-NEXT: vpmovm2w %k0, %zmm0
930 ; WIN64-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
931 ; WIN64-NEXT: vmovaps (%rsp), %xmm6 # 16-byte Reload
932 ; WIN64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 16-byte Reload
933 ; WIN64-NEXT: addq $40, %rsp
934 ; WIN64-NEXT: popq %rdi
935 ; WIN64-NEXT: popq %rsi
936 ; WIN64-NEXT: vzeroupper
938 ; WIN64-NEXT: .seh_endproc
940 ; LINUXOSX64-LABEL: caller_retv8i1:
941 ; LINUXOSX64: # %bb.0: # %entry
942 ; LINUXOSX64-NEXT: pushq %rax
943 ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16
944 ; LINUXOSX64-NEXT: callq test_retv8i1
945 ; LINUXOSX64-NEXT: # kill: def $al killed $al def $eax
946 ; LINUXOSX64-NEXT: kmovd %eax, %k0
947 ; LINUXOSX64-NEXT: vpmovm2w %k0, %zmm0
948 ; LINUXOSX64-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
949 ; LINUXOSX64-NEXT: popq %rax
950 ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8
951 ; LINUXOSX64-NEXT: vzeroupper
952 ; LINUXOSX64-NEXT: retq
954 %call = call x86_regcallcc <8 x i1> @test_retv8i1()