1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=i386-pc-win32 -mattr=+avx512bw | FileCheck %s --check-prefix=X32
3 ; RUN: llc < %s -mtriple=x86_64-win32 -mattr=+avx512bw | FileCheck %s --check-prefix=CHECK64 --check-prefix=WIN64
4 ; RUN: llc < %s -mtriple=x86_64-linux-gnu -mattr=+avx512bw | FileCheck %s --check-prefix=CHECK64 --check-prefix=LINUXOSX64
6 ; Test regcall when receiving arguments of v64i1 type
7 define x86_regcallcc i64 @test_argv64i1(<64 x i1> %x0, <64 x i1> %x1, <64 x i1> %x2, <64 x i1> %x3, <64 x i1> %x4, <64 x i1> %x5, <64 x i1> %x6, <64 x i1> %x7, <64 x i1> %x8, <64 x i1> %x9, <64 x i1> %x10, <64 x i1> %x11, <64 x i1> %x12) {
8 ; X32-LABEL: test_argv64i1:
10 ; X32-NEXT: addl %edx, %eax
11 ; X32-NEXT: adcl %edi, %ecx
12 ; X32-NEXT: addl {{[0-9]+}}(%esp), %eax
13 ; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx
14 ; X32-NEXT: addl {{[0-9]+}}(%esp), %eax
15 ; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx
16 ; X32-NEXT: addl {{[0-9]+}}(%esp), %eax
17 ; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx
18 ; X32-NEXT: addl {{[0-9]+}}(%esp), %eax
19 ; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx
20 ; X32-NEXT: addl {{[0-9]+}}(%esp), %eax
21 ; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx
22 ; X32-NEXT: addl {{[0-9]+}}(%esp), %eax
23 ; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx
24 ; X32-NEXT: addl {{[0-9]+}}(%esp), %eax
25 ; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx
26 ; X32-NEXT: addl {{[0-9]+}}(%esp), %eax
27 ; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx
28 ; X32-NEXT: addl {{[0-9]+}}(%esp), %eax
29 ; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx
30 ; X32-NEXT: addl {{[0-9]+}}(%esp), %eax
31 ; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx
32 ; X32-NEXT: addl {{[0-9]+}}(%esp), %eax
33 ; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx
36 ; WIN64-LABEL: test_argv64i1:
38 ; WIN64-NEXT: addq %rcx, %rax
39 ; WIN64-NEXT: addq %rdx, %rax
40 ; WIN64-NEXT: addq %rdi, %rax
41 ; WIN64-NEXT: addq %rsi, %rax
42 ; WIN64-NEXT: addq %r8, %rax
43 ; WIN64-NEXT: addq %r9, %rax
44 ; WIN64-NEXT: addq %r10, %rax
45 ; WIN64-NEXT: addq %r11, %rax
46 ; WIN64-NEXT: addq %r12, %rax
47 ; WIN64-NEXT: addq %r14, %rax
48 ; WIN64-NEXT: addq %r15, %rax
49 ; WIN64-NEXT: addq {{[0-9]+}}(%rsp), %rax
52 ; LINUXOSX64-LABEL: test_argv64i1:
53 ; LINUXOSX64: # %bb.0:
54 ; LINUXOSX64-NEXT: addq %rcx, %rax
55 ; LINUXOSX64-NEXT: addq %rdx, %rax
56 ; LINUXOSX64-NEXT: addq %rdi, %rax
57 ; LINUXOSX64-NEXT: addq %rsi, %rax
58 ; LINUXOSX64-NEXT: addq %r8, %rax
59 ; LINUXOSX64-NEXT: addq %r9, %rax
60 ; LINUXOSX64-NEXT: addq %r12, %rax
61 ; LINUXOSX64-NEXT: addq %r13, %rax
62 ; LINUXOSX64-NEXT: addq %r14, %rax
63 ; LINUXOSX64-NEXT: addq %r15, %rax
64 ; LINUXOSX64-NEXT: addq {{[0-9]+}}(%rsp), %rax
65 ; LINUXOSX64-NEXT: addq {{[0-9]+}}(%rsp), %rax
66 ; LINUXOSX64-NEXT: retq
67 %y0 = bitcast <64 x i1> %x0 to i64
68 %y1 = bitcast <64 x i1> %x1 to i64
69 %y2 = bitcast <64 x i1> %x2 to i64
70 %y3 = bitcast <64 x i1> %x3 to i64
71 %y4 = bitcast <64 x i1> %x4 to i64
72 %y5 = bitcast <64 x i1> %x5 to i64
73 %y6 = bitcast <64 x i1> %x6 to i64
74 %y7 = bitcast <64 x i1> %x7 to i64
75 %y8 = bitcast <64 x i1> %x8 to i64
76 %y9 = bitcast <64 x i1> %x9 to i64
77 %y10 = bitcast <64 x i1> %x10 to i64
78 %y11 = bitcast <64 x i1> %x11 to i64
79 %y12 = bitcast <64 x i1> %x12 to i64
80 %add1 = add i64 %y0, %y1
81 %add2 = add i64 %add1, %y2
82 %add3 = add i64 %add2, %y3
83 %add4 = add i64 %add3, %y4
84 %add5 = add i64 %add4, %y5
85 %add6 = add i64 %add5, %y6
86 %add7 = add i64 %add6, %y7
87 %add8 = add i64 %add7, %y8
88 %add9 = add i64 %add8, %y9
89 %add10 = add i64 %add9, %y10
90 %add11 = add i64 %add10, %y11
91 %add12 = add i64 %add11, %y12
95 ; Test regcall when passing arguments of v64i1 type
96 define i64 @caller_argv64i1() #0 {
97 ; X32-LABEL: caller_argv64i1:
98 ; X32: # %bb.0: # %entry
99 ; X32-NEXT: pushl %edi
100 ; X32-NEXT: subl $88, %esp
101 ; X32-NEXT: vmovaps {{.*#+}} xmm0 = [2,1,2,1]
102 ; X32-NEXT: vmovups %xmm0, {{[0-9]+}}(%esp)
103 ; X32-NEXT: vmovaps {{.*#+}} zmm0 = [2,1,2,1,2,1,2,1,2,1,2,1,2,1,2,1]
104 ; X32-NEXT: vmovups %zmm0, (%esp)
105 ; X32-NEXT: movl $1, {{[0-9]+}}(%esp)
106 ; X32-NEXT: movl $2, {{[0-9]+}}(%esp)
107 ; X32-NEXT: movl $2, %eax
108 ; X32-NEXT: movl $1, %ecx
109 ; X32-NEXT: movl $2, %edx
110 ; X32-NEXT: movl $1, %edi
111 ; X32-NEXT: vzeroupper
112 ; X32-NEXT: calll _test_argv64i1
113 ; X32-NEXT: movl %ecx, %edx
114 ; X32-NEXT: addl $88, %esp
115 ; X32-NEXT: popl %edi
118 ; WIN64-LABEL: caller_argv64i1:
119 ; WIN64: # %bb.0: # %entry
120 ; WIN64-NEXT: pushq %r15
121 ; WIN64-NEXT: .seh_pushreg 15
122 ; WIN64-NEXT: pushq %r14
123 ; WIN64-NEXT: .seh_pushreg 14
124 ; WIN64-NEXT: pushq %r12
125 ; WIN64-NEXT: .seh_pushreg 12
126 ; WIN64-NEXT: pushq %rsi
127 ; WIN64-NEXT: .seh_pushreg 6
128 ; WIN64-NEXT: pushq %rdi
129 ; WIN64-NEXT: .seh_pushreg 7
130 ; WIN64-NEXT: subq $48, %rsp
131 ; WIN64-NEXT: .seh_stackalloc 48
132 ; WIN64-NEXT: vmovaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
133 ; WIN64-NEXT: .seh_savexmm 7, 32
134 ; WIN64-NEXT: vmovaps %xmm6, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
135 ; WIN64-NEXT: .seh_savexmm 6, 16
136 ; WIN64-NEXT: .seh_endprologue
137 ; WIN64-NEXT: movabsq $4294967298, %rax # imm = 0x100000002
138 ; WIN64-NEXT: movq %rax, (%rsp)
139 ; WIN64-NEXT: movq %rax, %rcx
140 ; WIN64-NEXT: movq %rax, %rdx
141 ; WIN64-NEXT: movq %rax, %rdi
142 ; WIN64-NEXT: movq %rax, %r8
143 ; WIN64-NEXT: movq %rax, %r9
144 ; WIN64-NEXT: movq %rax, %r10
145 ; WIN64-NEXT: movq %rax, %r11
146 ; WIN64-NEXT: movq %rax, %r12
147 ; WIN64-NEXT: movq %rax, %r14
148 ; WIN64-NEXT: movq %rax, %r15
149 ; WIN64-NEXT: movq %rax, %rsi
150 ; WIN64-NEXT: callq test_argv64i1
151 ; WIN64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm6 # 16-byte Reload
152 ; WIN64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 16-byte Reload
153 ; WIN64-NEXT: addq $48, %rsp
154 ; WIN64-NEXT: popq %rdi
155 ; WIN64-NEXT: popq %rsi
156 ; WIN64-NEXT: popq %r12
157 ; WIN64-NEXT: popq %r14
158 ; WIN64-NEXT: popq %r15
160 ; WIN64-NEXT: .seh_handlerdata
162 ; WIN64-NEXT: .seh_endproc
164 ; LINUXOSX64-LABEL: caller_argv64i1:
165 ; LINUXOSX64: # %bb.0: # %entry
166 ; LINUXOSX64-NEXT: pushq %r15
167 ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16
168 ; LINUXOSX64-NEXT: pushq %r14
169 ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 24
170 ; LINUXOSX64-NEXT: pushq %r13
171 ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 32
172 ; LINUXOSX64-NEXT: pushq %r12
173 ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 40
174 ; LINUXOSX64-NEXT: pushq %rax
175 ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 48
176 ; LINUXOSX64-NEXT: .cfi_offset %r12, -40
177 ; LINUXOSX64-NEXT: .cfi_offset %r13, -32
178 ; LINUXOSX64-NEXT: .cfi_offset %r14, -24
179 ; LINUXOSX64-NEXT: .cfi_offset %r15, -16
180 ; LINUXOSX64-NEXT: movabsq $4294967298, %rax # imm = 0x100000002
181 ; LINUXOSX64-NEXT: movq %rax, %rcx
182 ; LINUXOSX64-NEXT: movq %rax, %rdx
183 ; LINUXOSX64-NEXT: movq %rax, %rdi
184 ; LINUXOSX64-NEXT: movq %rax, %r8
185 ; LINUXOSX64-NEXT: movq %rax, %r9
186 ; LINUXOSX64-NEXT: movq %rax, %r12
187 ; LINUXOSX64-NEXT: movq %rax, %r13
188 ; LINUXOSX64-NEXT: movq %rax, %r14
189 ; LINUXOSX64-NEXT: movq %rax, %r15
190 ; LINUXOSX64-NEXT: movq %rax, %rsi
191 ; LINUXOSX64-NEXT: pushq %rax
192 ; LINUXOSX64-NEXT: .cfi_adjust_cfa_offset 8
193 ; LINUXOSX64-NEXT: pushq %rax
194 ; LINUXOSX64-NEXT: .cfi_adjust_cfa_offset 8
195 ; LINUXOSX64-NEXT: callq test_argv64i1
196 ; LINUXOSX64-NEXT: addq $24, %rsp
197 ; LINUXOSX64-NEXT: .cfi_adjust_cfa_offset -24
198 ; LINUXOSX64-NEXT: popq %r12
199 ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 32
200 ; LINUXOSX64-NEXT: popq %r13
201 ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 24
202 ; LINUXOSX64-NEXT: popq %r14
203 ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16
204 ; LINUXOSX64-NEXT: popq %r15
205 ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8
206 ; LINUXOSX64-NEXT: retq
208 %v0 = bitcast i64 4294967298 to <64 x i1>
209 %call = call x86_regcallcc i64 @test_argv64i1(<64 x i1> %v0, <64 x i1> %v0, <64 x i1> %v0,
210 <64 x i1> %v0, <64 x i1> %v0, <64 x i1> %v0,
211 <64 x i1> %v0, <64 x i1> %v0, <64 x i1> %v0,
212 <64 x i1> %v0, <64 x i1> %v0, <64 x i1> %v0,
217 ; Test regcall when returning v64i1 type
218 define x86_regcallcc <64 x i1> @test_retv64i1() {
219 ; X32-LABEL: test_retv64i1:
221 ; X32-NEXT: movl $2, %eax
222 ; X32-NEXT: movl $1, %ecx
225 ; CHECK64-LABEL: test_retv64i1:
227 ; CHECK64-NEXT: movabsq $4294967298, %rax # imm = 0x100000002
229 %a = bitcast i64 4294967298 to <64 x i1>
233 ; Test regcall when processing result of v64i1 type
234 define <64 x i1> @caller_retv64i1() #0 {
235 ; X32-LABEL: caller_retv64i1:
236 ; X32: # %bb.0: # %entry
237 ; X32-NEXT: calll _test_retv64i1
238 ; X32-NEXT: kmovd %eax, %k0
239 ; X32-NEXT: kmovd %ecx, %k1
240 ; X32-NEXT: kunpckdq %k0, %k1, %k0
241 ; X32-NEXT: vpmovm2b %k0, %zmm0
244 ; WIN64-LABEL: caller_retv64i1:
245 ; WIN64: # %bb.0: # %entry
246 ; WIN64-NEXT: pushq %rsi
247 ; WIN64-NEXT: .seh_pushreg 6
248 ; WIN64-NEXT: pushq %rdi
249 ; WIN64-NEXT: .seh_pushreg 7
250 ; WIN64-NEXT: subq $40, %rsp
251 ; WIN64-NEXT: .seh_stackalloc 40
252 ; WIN64-NEXT: vmovaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
253 ; WIN64-NEXT: .seh_savexmm 7, 16
254 ; WIN64-NEXT: vmovaps %xmm6, (%rsp) # 16-byte Spill
255 ; WIN64-NEXT: .seh_savexmm 6, 0
256 ; WIN64-NEXT: .seh_endprologue
257 ; WIN64-NEXT: callq test_retv64i1
258 ; WIN64-NEXT: kmovq %rax, %k0
259 ; WIN64-NEXT: vpmovm2b %k0, %zmm0
260 ; WIN64-NEXT: vmovaps (%rsp), %xmm6 # 16-byte Reload
261 ; WIN64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 16-byte Reload
262 ; WIN64-NEXT: addq $40, %rsp
263 ; WIN64-NEXT: popq %rdi
264 ; WIN64-NEXT: popq %rsi
266 ; WIN64-NEXT: .seh_handlerdata
268 ; WIN64-NEXT: .seh_endproc
270 ; LINUXOSX64-LABEL: caller_retv64i1:
271 ; LINUXOSX64: # %bb.0: # %entry
272 ; LINUXOSX64-NEXT: pushq %rax
273 ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16
274 ; LINUXOSX64-NEXT: callq test_retv64i1
275 ; LINUXOSX64-NEXT: kmovq %rax, %k0
276 ; LINUXOSX64-NEXT: vpmovm2b %k0, %zmm0
277 ; LINUXOSX64-NEXT: popq %rax
278 ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8
279 ; LINUXOSX64-NEXT: retq
281 %call = call x86_regcallcc <64 x i1> @test_retv64i1()
285 ; Test regcall when receiving arguments of v32i1 type
286 declare i32 @test_argv32i1helper(<32 x i1> %x0, <32 x i1> %x1, <32 x i1> %x2)
287 define x86_regcallcc i32 @test_argv32i1(<32 x i1> %x0, <32 x i1> %x1, <32 x i1> %x2) {
288 ; X32-LABEL: test_argv32i1:
289 ; X32: # %bb.0: # %entry
290 ; X32-NEXT: pushl %esp
291 ; X32-NEXT: subl $72, %esp
292 ; X32-NEXT: vmovups %xmm7, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
293 ; X32-NEXT: vmovups %xmm6, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
294 ; X32-NEXT: vmovups %xmm5, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
295 ; X32-NEXT: vmovups %xmm4, (%esp) # 16-byte Spill
296 ; X32-NEXT: kmovd %edx, %k0
297 ; X32-NEXT: kmovd %ecx, %k1
298 ; X32-NEXT: kmovd %eax, %k2
299 ; X32-NEXT: vpmovm2b %k2, %zmm0
300 ; X32-NEXT: vpmovm2b %k1, %zmm1
301 ; X32-NEXT: vpmovm2b %k0, %zmm2
302 ; X32-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
303 ; X32-NEXT: # kill: def $ymm1 killed $ymm1 killed $zmm1
304 ; X32-NEXT: # kill: def $ymm2 killed $ymm2 killed $zmm2
305 ; X32-NEXT: calll _test_argv32i1helper
306 ; X32-NEXT: vmovups (%esp), %xmm4 # 16-byte Reload
307 ; X32-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm5 # 16-byte Reload
308 ; X32-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm6 # 16-byte Reload
309 ; X32-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm7 # 16-byte Reload
310 ; X32-NEXT: addl $72, %esp
311 ; X32-NEXT: popl %esp
312 ; X32-NEXT: vzeroupper
315 ; WIN64-LABEL: test_argv32i1:
316 ; WIN64: # %bb.0: # %entry
317 ; WIN64-NEXT: pushq %r11
318 ; WIN64-NEXT: .seh_pushreg 11
319 ; WIN64-NEXT: pushq %r10
320 ; WIN64-NEXT: .seh_pushreg 10
321 ; WIN64-NEXT: pushq %rsp
322 ; WIN64-NEXT: .seh_pushreg 4
323 ; WIN64-NEXT: subq $32, %rsp
324 ; WIN64-NEXT: .seh_stackalloc 32
325 ; WIN64-NEXT: .seh_endprologue
326 ; WIN64-NEXT: kmovd %edx, %k0
327 ; WIN64-NEXT: kmovd %ecx, %k1
328 ; WIN64-NEXT: kmovd %eax, %k2
329 ; WIN64-NEXT: vpmovm2b %k2, %zmm0
330 ; WIN64-NEXT: vpmovm2b %k1, %zmm1
331 ; WIN64-NEXT: vpmovm2b %k0, %zmm2
332 ; WIN64-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
333 ; WIN64-NEXT: # kill: def $ymm1 killed $ymm1 killed $zmm1
334 ; WIN64-NEXT: # kill: def $ymm2 killed $ymm2 killed $zmm2
335 ; WIN64-NEXT: callq test_argv32i1helper
337 ; WIN64-NEXT: addq $32, %rsp
338 ; WIN64-NEXT: popq %rsp
339 ; WIN64-NEXT: popq %r10
340 ; WIN64-NEXT: popq %r11
341 ; WIN64-NEXT: vzeroupper
343 ; WIN64-NEXT: .seh_handlerdata
345 ; WIN64-NEXT: .seh_endproc
347 ; LINUXOSX64-LABEL: test_argv32i1:
348 ; LINUXOSX64: # %bb.0: # %entry
349 ; LINUXOSX64-NEXT: pushq %rsp
350 ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16
351 ; LINUXOSX64-NEXT: subq $128, %rsp
352 ; LINUXOSX64-NEXT: vmovaps %xmm15, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
353 ; LINUXOSX64-NEXT: vmovaps %xmm14, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
354 ; LINUXOSX64-NEXT: vmovaps %xmm13, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
355 ; LINUXOSX64-NEXT: vmovaps %xmm12, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
356 ; LINUXOSX64-NEXT: vmovaps %xmm11, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
357 ; LINUXOSX64-NEXT: vmovaps %xmm10, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
358 ; LINUXOSX64-NEXT: vmovaps %xmm9, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
359 ; LINUXOSX64-NEXT: vmovaps %xmm8, (%rsp) # 16-byte Spill
360 ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 144
361 ; LINUXOSX64-NEXT: .cfi_offset %rsp, -16
362 ; LINUXOSX64-NEXT: .cfi_offset %xmm8, -144
363 ; LINUXOSX64-NEXT: .cfi_offset %xmm9, -128
364 ; LINUXOSX64-NEXT: .cfi_offset %xmm10, -112
365 ; LINUXOSX64-NEXT: .cfi_offset %xmm11, -96
366 ; LINUXOSX64-NEXT: .cfi_offset %xmm12, -80
367 ; LINUXOSX64-NEXT: .cfi_offset %xmm13, -64
368 ; LINUXOSX64-NEXT: .cfi_offset %xmm14, -48
369 ; LINUXOSX64-NEXT: .cfi_offset %xmm15, -32
370 ; LINUXOSX64-NEXT: kmovd %edx, %k0
371 ; LINUXOSX64-NEXT: kmovd %ecx, %k1
372 ; LINUXOSX64-NEXT: kmovd %eax, %k2
373 ; LINUXOSX64-NEXT: vpmovm2b %k2, %zmm0
374 ; LINUXOSX64-NEXT: vpmovm2b %k1, %zmm1
375 ; LINUXOSX64-NEXT: vpmovm2b %k0, %zmm2
376 ; LINUXOSX64-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
377 ; LINUXOSX64-NEXT: # kill: def $ymm1 killed $ymm1 killed $zmm1
378 ; LINUXOSX64-NEXT: # kill: def $ymm2 killed $ymm2 killed $zmm2
379 ; LINUXOSX64-NEXT: callq test_argv32i1helper
380 ; LINUXOSX64-NEXT: vmovaps (%rsp), %xmm8 # 16-byte Reload
381 ; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm9 # 16-byte Reload
382 ; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm10 # 16-byte Reload
383 ; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm11 # 16-byte Reload
384 ; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm12 # 16-byte Reload
385 ; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm13 # 16-byte Reload
386 ; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm14 # 16-byte Reload
387 ; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm15 # 16-byte Reload
388 ; LINUXOSX64-NEXT: addq $128, %rsp
389 ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16
390 ; LINUXOSX64-NEXT: popq %rsp
391 ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8
392 ; LINUXOSX64-NEXT: vzeroupper
393 ; LINUXOSX64-NEXT: retq
395 %res = call i32 @test_argv32i1helper(<32 x i1> %x0, <32 x i1> %x1, <32 x i1> %x2)
399 ; Test regcall when passing arguments of v32i1 type
400 define i32 @caller_argv32i1() #0 {
401 ; X32-LABEL: caller_argv32i1:
402 ; X32: # %bb.0: # %entry
403 ; X32-NEXT: movl $1, %eax
404 ; X32-NEXT: movl $1, %ecx
405 ; X32-NEXT: movl $1, %edx
406 ; X32-NEXT: calll _test_argv32i1
409 ; WIN64-LABEL: caller_argv32i1:
410 ; WIN64: # %bb.0: # %entry
411 ; WIN64-NEXT: pushq %rsi
412 ; WIN64-NEXT: .seh_pushreg 6
413 ; WIN64-NEXT: pushq %rdi
414 ; WIN64-NEXT: .seh_pushreg 7
415 ; WIN64-NEXT: subq $40, %rsp
416 ; WIN64-NEXT: .seh_stackalloc 40
417 ; WIN64-NEXT: vmovaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
418 ; WIN64-NEXT: .seh_savexmm 7, 16
419 ; WIN64-NEXT: vmovaps %xmm6, (%rsp) # 16-byte Spill
420 ; WIN64-NEXT: .seh_savexmm 6, 0
421 ; WIN64-NEXT: .seh_endprologue
422 ; WIN64-NEXT: movl $1, %eax
423 ; WIN64-NEXT: movl $1, %ecx
424 ; WIN64-NEXT: movl $1, %edx
425 ; WIN64-NEXT: callq test_argv32i1
426 ; WIN64-NEXT: vmovaps (%rsp), %xmm6 # 16-byte Reload
427 ; WIN64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 16-byte Reload
428 ; WIN64-NEXT: addq $40, %rsp
429 ; WIN64-NEXT: popq %rdi
430 ; WIN64-NEXT: popq %rsi
432 ; WIN64-NEXT: .seh_handlerdata
434 ; WIN64-NEXT: .seh_endproc
436 ; LINUXOSX64-LABEL: caller_argv32i1:
437 ; LINUXOSX64: # %bb.0: # %entry
438 ; LINUXOSX64-NEXT: pushq %rax
439 ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16
440 ; LINUXOSX64-NEXT: movl $1, %eax
441 ; LINUXOSX64-NEXT: movl $1, %ecx
442 ; LINUXOSX64-NEXT: movl $1, %edx
443 ; LINUXOSX64-NEXT: callq test_argv32i1
444 ; LINUXOSX64-NEXT: popq %rcx
445 ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8
446 ; LINUXOSX64-NEXT: retq
448 %v0 = bitcast i32 1 to <32 x i1>
449 %call = call x86_regcallcc i32 @test_argv32i1(<32 x i1> %v0, <32 x i1> %v0, <32 x i1> %v0)
453 ; Test regcall when returning v32i1 type
454 define x86_regcallcc <32 x i1> @test_retv32i1() {
455 ; X32-LABEL: test_retv32i1:
457 ; X32-NEXT: movl $1, %eax
460 ; CHECK64-LABEL: test_retv32i1:
462 ; CHECK64-NEXT: movl $1, %eax
464 %a = bitcast i32 1 to <32 x i1>
468 ; Test regcall when processing result of v32i1 type
469 define i32 @caller_retv32i1() #0 {
470 ; X32-LABEL: caller_retv32i1:
471 ; X32: # %bb.0: # %entry
472 ; X32-NEXT: calll _test_retv32i1
473 ; X32-NEXT: incl %eax
476 ; WIN64-LABEL: caller_retv32i1:
477 ; WIN64: # %bb.0: # %entry
478 ; WIN64-NEXT: pushq %rsi
479 ; WIN64-NEXT: .seh_pushreg 6
480 ; WIN64-NEXT: pushq %rdi
481 ; WIN64-NEXT: .seh_pushreg 7
482 ; WIN64-NEXT: subq $40, %rsp
483 ; WIN64-NEXT: .seh_stackalloc 40
484 ; WIN64-NEXT: vmovaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
485 ; WIN64-NEXT: .seh_savexmm 7, 16
486 ; WIN64-NEXT: vmovaps %xmm6, (%rsp) # 16-byte Spill
487 ; WIN64-NEXT: .seh_savexmm 6, 0
488 ; WIN64-NEXT: .seh_endprologue
489 ; WIN64-NEXT: callq test_retv32i1
490 ; WIN64-NEXT: incl %eax
491 ; WIN64-NEXT: vmovaps (%rsp), %xmm6 # 16-byte Reload
492 ; WIN64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 16-byte Reload
493 ; WIN64-NEXT: addq $40, %rsp
494 ; WIN64-NEXT: popq %rdi
495 ; WIN64-NEXT: popq %rsi
497 ; WIN64-NEXT: .seh_handlerdata
499 ; WIN64-NEXT: .seh_endproc
501 ; LINUXOSX64-LABEL: caller_retv32i1:
502 ; LINUXOSX64: # %bb.0: # %entry
503 ; LINUXOSX64-NEXT: pushq %rax
504 ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16
505 ; LINUXOSX64-NEXT: callq test_retv32i1
506 ; LINUXOSX64-NEXT: incl %eax
507 ; LINUXOSX64-NEXT: popq %rcx
508 ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8
509 ; LINUXOSX64-NEXT: retq
511 %call = call x86_regcallcc <32 x i1> @test_retv32i1()
512 %c = bitcast <32 x i1> %call to i32
517 ; Test regcall when receiving arguments of v16i1 type
518 declare i16 @test_argv16i1helper(<16 x i1> %x0, <16 x i1> %x1, <16 x i1> %x2)
519 define x86_regcallcc i16 @test_argv16i1(<16 x i1> %x0, <16 x i1> %x1, <16 x i1> %x2) {
520 ; X32-LABEL: test_argv16i1:
522 ; X32-NEXT: pushl %esp
523 ; X32-NEXT: subl $72, %esp
524 ; X32-NEXT: vmovups %xmm7, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
525 ; X32-NEXT: vmovups %xmm6, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
526 ; X32-NEXT: vmovups %xmm5, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
527 ; X32-NEXT: vmovups %xmm4, (%esp) # 16-byte Spill
528 ; X32-NEXT: kmovd %edx, %k0
529 ; X32-NEXT: kmovd %ecx, %k1
530 ; X32-NEXT: kmovd %eax, %k2
531 ; X32-NEXT: vpmovm2b %k2, %zmm0
532 ; X32-NEXT: vpmovm2b %k1, %zmm1
533 ; X32-NEXT: vpmovm2b %k0, %zmm2
534 ; X32-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
535 ; X32-NEXT: # kill: def $xmm1 killed $xmm1 killed $zmm1
536 ; X32-NEXT: # kill: def $xmm2 killed $xmm2 killed $zmm2
537 ; X32-NEXT: vzeroupper
538 ; X32-NEXT: calll _test_argv16i1helper
539 ; X32-NEXT: vmovups (%esp), %xmm4 # 16-byte Reload
540 ; X32-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm5 # 16-byte Reload
541 ; X32-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm6 # 16-byte Reload
542 ; X32-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm7 # 16-byte Reload
543 ; X32-NEXT: addl $72, %esp
544 ; X32-NEXT: popl %esp
547 ; WIN64-LABEL: test_argv16i1:
549 ; WIN64-NEXT: pushq %r11
550 ; WIN64-NEXT: .seh_pushreg 11
551 ; WIN64-NEXT: pushq %r10
552 ; WIN64-NEXT: .seh_pushreg 10
553 ; WIN64-NEXT: pushq %rsp
554 ; WIN64-NEXT: .seh_pushreg 4
555 ; WIN64-NEXT: subq $32, %rsp
556 ; WIN64-NEXT: .seh_stackalloc 32
557 ; WIN64-NEXT: .seh_endprologue
558 ; WIN64-NEXT: kmovd %edx, %k0
559 ; WIN64-NEXT: kmovd %ecx, %k1
560 ; WIN64-NEXT: kmovd %eax, %k2
561 ; WIN64-NEXT: vpmovm2b %k2, %zmm0
562 ; WIN64-NEXT: vpmovm2b %k1, %zmm1
563 ; WIN64-NEXT: vpmovm2b %k0, %zmm2
564 ; WIN64-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
565 ; WIN64-NEXT: # kill: def $xmm1 killed $xmm1 killed $zmm1
566 ; WIN64-NEXT: # kill: def $xmm2 killed $xmm2 killed $zmm2
567 ; WIN64-NEXT: vzeroupper
568 ; WIN64-NEXT: callq test_argv16i1helper
570 ; WIN64-NEXT: addq $32, %rsp
571 ; WIN64-NEXT: popq %rsp
572 ; WIN64-NEXT: popq %r10
573 ; WIN64-NEXT: popq %r11
575 ; WIN64-NEXT: .seh_handlerdata
577 ; WIN64-NEXT: .seh_endproc
579 ; LINUXOSX64-LABEL: test_argv16i1:
580 ; LINUXOSX64: # %bb.0:
581 ; LINUXOSX64-NEXT: pushq %rsp
582 ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16
583 ; LINUXOSX64-NEXT: subq $128, %rsp
584 ; LINUXOSX64-NEXT: vmovaps %xmm15, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
585 ; LINUXOSX64-NEXT: vmovaps %xmm14, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
586 ; LINUXOSX64-NEXT: vmovaps %xmm13, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
587 ; LINUXOSX64-NEXT: vmovaps %xmm12, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
588 ; LINUXOSX64-NEXT: vmovaps %xmm11, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
589 ; LINUXOSX64-NEXT: vmovaps %xmm10, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
590 ; LINUXOSX64-NEXT: vmovaps %xmm9, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
591 ; LINUXOSX64-NEXT: vmovaps %xmm8, (%rsp) # 16-byte Spill
592 ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 144
593 ; LINUXOSX64-NEXT: .cfi_offset %rsp, -16
594 ; LINUXOSX64-NEXT: .cfi_offset %xmm8, -144
595 ; LINUXOSX64-NEXT: .cfi_offset %xmm9, -128
596 ; LINUXOSX64-NEXT: .cfi_offset %xmm10, -112
597 ; LINUXOSX64-NEXT: .cfi_offset %xmm11, -96
598 ; LINUXOSX64-NEXT: .cfi_offset %xmm12, -80
599 ; LINUXOSX64-NEXT: .cfi_offset %xmm13, -64
600 ; LINUXOSX64-NEXT: .cfi_offset %xmm14, -48
601 ; LINUXOSX64-NEXT: .cfi_offset %xmm15, -32
602 ; LINUXOSX64-NEXT: kmovd %edx, %k0
603 ; LINUXOSX64-NEXT: kmovd %ecx, %k1
604 ; LINUXOSX64-NEXT: kmovd %eax, %k2
605 ; LINUXOSX64-NEXT: vpmovm2b %k2, %zmm0
606 ; LINUXOSX64-NEXT: vpmovm2b %k1, %zmm1
607 ; LINUXOSX64-NEXT: vpmovm2b %k0, %zmm2
608 ; LINUXOSX64-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
609 ; LINUXOSX64-NEXT: # kill: def $xmm1 killed $xmm1 killed $zmm1
610 ; LINUXOSX64-NEXT: # kill: def $xmm2 killed $xmm2 killed $zmm2
611 ; LINUXOSX64-NEXT: vzeroupper
612 ; LINUXOSX64-NEXT: callq test_argv16i1helper
613 ; LINUXOSX64-NEXT: vmovaps (%rsp), %xmm8 # 16-byte Reload
614 ; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm9 # 16-byte Reload
615 ; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm10 # 16-byte Reload
616 ; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm11 # 16-byte Reload
617 ; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm12 # 16-byte Reload
618 ; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm13 # 16-byte Reload
619 ; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm14 # 16-byte Reload
620 ; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm15 # 16-byte Reload
621 ; LINUXOSX64-NEXT: addq $128, %rsp
622 ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16
623 ; LINUXOSX64-NEXT: popq %rsp
624 ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8
625 ; LINUXOSX64-NEXT: retq
626 %res = call i16 @test_argv16i1helper(<16 x i1> %x0, <16 x i1> %x1, <16 x i1> %x2)
630 ; Test regcall when passing arguments of v16i1 type
631 define i16 @caller_argv16i1() #0 {
632 ; X32-LABEL: caller_argv16i1:
633 ; X32: # %bb.0: # %entry
634 ; X32-NEXT: movl $1, %eax
635 ; X32-NEXT: movl $1, %ecx
636 ; X32-NEXT: movl $1, %edx
637 ; X32-NEXT: calll _test_argv16i1
640 ; WIN64-LABEL: caller_argv16i1:
641 ; WIN64: # %bb.0: # %entry
642 ; WIN64-NEXT: pushq %rsi
643 ; WIN64-NEXT: .seh_pushreg 6
644 ; WIN64-NEXT: pushq %rdi
645 ; WIN64-NEXT: .seh_pushreg 7
646 ; WIN64-NEXT: subq $40, %rsp
647 ; WIN64-NEXT: .seh_stackalloc 40
648 ; WIN64-NEXT: vmovaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
649 ; WIN64-NEXT: .seh_savexmm 7, 16
650 ; WIN64-NEXT: vmovaps %xmm6, (%rsp) # 16-byte Spill
651 ; WIN64-NEXT: .seh_savexmm 6, 0
652 ; WIN64-NEXT: .seh_endprologue
653 ; WIN64-NEXT: movl $1, %eax
654 ; WIN64-NEXT: movl $1, %ecx
655 ; WIN64-NEXT: movl $1, %edx
656 ; WIN64-NEXT: callq test_argv16i1
657 ; WIN64-NEXT: vmovaps (%rsp), %xmm6 # 16-byte Reload
658 ; WIN64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 16-byte Reload
659 ; WIN64-NEXT: addq $40, %rsp
660 ; WIN64-NEXT: popq %rdi
661 ; WIN64-NEXT: popq %rsi
663 ; WIN64-NEXT: .seh_handlerdata
665 ; WIN64-NEXT: .seh_endproc
667 ; LINUXOSX64-LABEL: caller_argv16i1:
668 ; LINUXOSX64: # %bb.0: # %entry
669 ; LINUXOSX64-NEXT: pushq %rax
670 ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16
671 ; LINUXOSX64-NEXT: movl $1, %eax
672 ; LINUXOSX64-NEXT: movl $1, %ecx
673 ; LINUXOSX64-NEXT: movl $1, %edx
674 ; LINUXOSX64-NEXT: callq test_argv16i1
675 ; LINUXOSX64-NEXT: popq %rcx
676 ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8
677 ; LINUXOSX64-NEXT: retq
679 %v0 = bitcast i16 1 to <16 x i1>
680 %call = call x86_regcallcc i16 @test_argv16i1(<16 x i1> %v0, <16 x i1> %v0, <16 x i1> %v0)
684 ; Test regcall when returning v16i1 type
685 define x86_regcallcc <16 x i1> @test_retv16i1() {
686 ; X32-LABEL: test_retv16i1:
688 ; X32-NEXT: movw $1, %ax
691 ; CHECK64-LABEL: test_retv16i1:
693 ; CHECK64-NEXT: movw $1, %ax
695 %a = bitcast i16 1 to <16 x i1>
699 ; Test regcall when processing result of v16i1 type
700 define i16 @caller_retv16i1() #0 {
701 ; X32-LABEL: caller_retv16i1:
702 ; X32: # %bb.0: # %entry
703 ; X32-NEXT: calll _test_retv16i1
704 ; X32-NEXT: # kill: def $ax killed $ax def $eax
705 ; X32-NEXT: incl %eax
706 ; X32-NEXT: # kill: def $ax killed $ax killed $eax
709 ; WIN64-LABEL: caller_retv16i1:
710 ; WIN64: # %bb.0: # %entry
711 ; WIN64-NEXT: pushq %rsi
712 ; WIN64-NEXT: .seh_pushreg 6
713 ; WIN64-NEXT: pushq %rdi
714 ; WIN64-NEXT: .seh_pushreg 7
715 ; WIN64-NEXT: subq $40, %rsp
716 ; WIN64-NEXT: .seh_stackalloc 40
717 ; WIN64-NEXT: vmovaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
718 ; WIN64-NEXT: .seh_savexmm 7, 16
719 ; WIN64-NEXT: vmovaps %xmm6, (%rsp) # 16-byte Spill
720 ; WIN64-NEXT: .seh_savexmm 6, 0
721 ; WIN64-NEXT: .seh_endprologue
722 ; WIN64-NEXT: callq test_retv16i1
723 ; WIN64-NEXT: # kill: def $ax killed $ax def $eax
724 ; WIN64-NEXT: incl %eax
725 ; WIN64-NEXT: # kill: def $ax killed $ax killed $eax
726 ; WIN64-NEXT: vmovaps (%rsp), %xmm6 # 16-byte Reload
727 ; WIN64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 16-byte Reload
728 ; WIN64-NEXT: addq $40, %rsp
729 ; WIN64-NEXT: popq %rdi
730 ; WIN64-NEXT: popq %rsi
732 ; WIN64-NEXT: .seh_handlerdata
734 ; WIN64-NEXT: .seh_endproc
736 ; LINUXOSX64-LABEL: caller_retv16i1:
737 ; LINUXOSX64: # %bb.0: # %entry
738 ; LINUXOSX64-NEXT: pushq %rax
739 ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16
740 ; LINUXOSX64-NEXT: callq test_retv16i1
741 ; LINUXOSX64-NEXT: # kill: def $ax killed $ax def $eax
742 ; LINUXOSX64-NEXT: incl %eax
743 ; LINUXOSX64-NEXT: # kill: def $ax killed $ax killed $eax
744 ; LINUXOSX64-NEXT: popq %rcx
745 ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8
746 ; LINUXOSX64-NEXT: retq
748 %call = call x86_regcallcc <16 x i1> @test_retv16i1()
749 %c = bitcast <16 x i1> %call to i16
754 ; Test regcall when receiving arguments of v8i1 type
755 declare i8 @test_argv8i1helper(<8 x i1> %x0, <8 x i1> %x1, <8 x i1> %x2)
756 define x86_regcallcc i8 @test_argv8i1(<8 x i1> %x0, <8 x i1> %x1, <8 x i1> %x2) {
757 ; X32-LABEL: test_argv8i1:
759 ; X32-NEXT: pushl %esp
760 ; X32-NEXT: subl $72, %esp
761 ; X32-NEXT: vmovups %xmm7, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
762 ; X32-NEXT: vmovups %xmm6, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
763 ; X32-NEXT: vmovups %xmm5, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
764 ; X32-NEXT: vmovups %xmm4, (%esp) # 16-byte Spill
765 ; X32-NEXT: kmovd %edx, %k0
766 ; X32-NEXT: kmovd %ecx, %k1
767 ; X32-NEXT: kmovd %eax, %k2
768 ; X32-NEXT: vpmovm2w %k2, %zmm0
769 ; X32-NEXT: vpmovm2w %k1, %zmm1
770 ; X32-NEXT: vpmovm2w %k0, %zmm2
771 ; X32-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
772 ; X32-NEXT: # kill: def $xmm1 killed $xmm1 killed $zmm1
773 ; X32-NEXT: # kill: def $xmm2 killed $xmm2 killed $zmm2
774 ; X32-NEXT: vzeroupper
775 ; X32-NEXT: calll _test_argv8i1helper
776 ; X32-NEXT: vmovups (%esp), %xmm4 # 16-byte Reload
777 ; X32-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm5 # 16-byte Reload
778 ; X32-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm6 # 16-byte Reload
779 ; X32-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm7 # 16-byte Reload
780 ; X32-NEXT: addl $72, %esp
781 ; X32-NEXT: popl %esp
784 ; WIN64-LABEL: test_argv8i1:
786 ; WIN64-NEXT: pushq %r11
787 ; WIN64-NEXT: .seh_pushreg 11
788 ; WIN64-NEXT: pushq %r10
789 ; WIN64-NEXT: .seh_pushreg 10
790 ; WIN64-NEXT: pushq %rsp
791 ; WIN64-NEXT: .seh_pushreg 4
792 ; WIN64-NEXT: subq $32, %rsp
793 ; WIN64-NEXT: .seh_stackalloc 32
794 ; WIN64-NEXT: .seh_endprologue
795 ; WIN64-NEXT: kmovd %edx, %k0
796 ; WIN64-NEXT: kmovd %ecx, %k1
797 ; WIN64-NEXT: kmovd %eax, %k2
798 ; WIN64-NEXT: vpmovm2w %k2, %zmm0
799 ; WIN64-NEXT: vpmovm2w %k1, %zmm1
800 ; WIN64-NEXT: vpmovm2w %k0, %zmm2
801 ; WIN64-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
802 ; WIN64-NEXT: # kill: def $xmm1 killed $xmm1 killed $zmm1
803 ; WIN64-NEXT: # kill: def $xmm2 killed $xmm2 killed $zmm2
804 ; WIN64-NEXT: vzeroupper
805 ; WIN64-NEXT: callq test_argv8i1helper
807 ; WIN64-NEXT: addq $32, %rsp
808 ; WIN64-NEXT: popq %rsp
809 ; WIN64-NEXT: popq %r10
810 ; WIN64-NEXT: popq %r11
812 ; WIN64-NEXT: .seh_handlerdata
814 ; WIN64-NEXT: .seh_endproc
816 ; LINUXOSX64-LABEL: test_argv8i1:
817 ; LINUXOSX64: # %bb.0:
818 ; LINUXOSX64-NEXT: pushq %rsp
819 ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16
820 ; LINUXOSX64-NEXT: subq $128, %rsp
821 ; LINUXOSX64-NEXT: vmovaps %xmm15, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
822 ; LINUXOSX64-NEXT: vmovaps %xmm14, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
823 ; LINUXOSX64-NEXT: vmovaps %xmm13, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
824 ; LINUXOSX64-NEXT: vmovaps %xmm12, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
825 ; LINUXOSX64-NEXT: vmovaps %xmm11, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
826 ; LINUXOSX64-NEXT: vmovaps %xmm10, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
827 ; LINUXOSX64-NEXT: vmovaps %xmm9, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
828 ; LINUXOSX64-NEXT: vmovaps %xmm8, (%rsp) # 16-byte Spill
829 ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 144
830 ; LINUXOSX64-NEXT: .cfi_offset %rsp, -16
831 ; LINUXOSX64-NEXT: .cfi_offset %xmm8, -144
832 ; LINUXOSX64-NEXT: .cfi_offset %xmm9, -128
833 ; LINUXOSX64-NEXT: .cfi_offset %xmm10, -112
834 ; LINUXOSX64-NEXT: .cfi_offset %xmm11, -96
835 ; LINUXOSX64-NEXT: .cfi_offset %xmm12, -80
836 ; LINUXOSX64-NEXT: .cfi_offset %xmm13, -64
837 ; LINUXOSX64-NEXT: .cfi_offset %xmm14, -48
838 ; LINUXOSX64-NEXT: .cfi_offset %xmm15, -32
839 ; LINUXOSX64-NEXT: kmovd %edx, %k0
840 ; LINUXOSX64-NEXT: kmovd %ecx, %k1
841 ; LINUXOSX64-NEXT: kmovd %eax, %k2
842 ; LINUXOSX64-NEXT: vpmovm2w %k2, %zmm0
843 ; LINUXOSX64-NEXT: vpmovm2w %k1, %zmm1
844 ; LINUXOSX64-NEXT: vpmovm2w %k0, %zmm2
845 ; LINUXOSX64-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
846 ; LINUXOSX64-NEXT: # kill: def $xmm1 killed $xmm1 killed $zmm1
847 ; LINUXOSX64-NEXT: # kill: def $xmm2 killed $xmm2 killed $zmm2
848 ; LINUXOSX64-NEXT: vzeroupper
849 ; LINUXOSX64-NEXT: callq test_argv8i1helper
850 ; LINUXOSX64-NEXT: vmovaps (%rsp), %xmm8 # 16-byte Reload
851 ; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm9 # 16-byte Reload
852 ; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm10 # 16-byte Reload
853 ; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm11 # 16-byte Reload
854 ; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm12 # 16-byte Reload
855 ; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm13 # 16-byte Reload
856 ; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm14 # 16-byte Reload
857 ; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm15 # 16-byte Reload
858 ; LINUXOSX64-NEXT: addq $128, %rsp
859 ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16
860 ; LINUXOSX64-NEXT: popq %rsp
861 ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8
862 ; LINUXOSX64-NEXT: retq
863 %res = call i8 @test_argv8i1helper(<8 x i1> %x0, <8 x i1> %x1, <8 x i1> %x2)
867 ; Test regcall when passing arguments of v8i1 type
868 define i8 @caller_argv8i1() #0 {
869 ; X32-LABEL: caller_argv8i1:
870 ; X32: # %bb.0: # %entry
871 ; X32-NEXT: movl $1, %eax
872 ; X32-NEXT: movl $1, %ecx
873 ; X32-NEXT: movl $1, %edx
874 ; X32-NEXT: calll _test_argv8i1
877 ; WIN64-LABEL: caller_argv8i1:
878 ; WIN64: # %bb.0: # %entry
879 ; WIN64-NEXT: pushq %rsi
880 ; WIN64-NEXT: .seh_pushreg 6
881 ; WIN64-NEXT: pushq %rdi
882 ; WIN64-NEXT: .seh_pushreg 7
883 ; WIN64-NEXT: subq $40, %rsp
884 ; WIN64-NEXT: .seh_stackalloc 40
885 ; WIN64-NEXT: vmovaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
886 ; WIN64-NEXT: .seh_savexmm 7, 16
887 ; WIN64-NEXT: vmovaps %xmm6, (%rsp) # 16-byte Spill
888 ; WIN64-NEXT: .seh_savexmm 6, 0
889 ; WIN64-NEXT: .seh_endprologue
890 ; WIN64-NEXT: movl $1, %eax
891 ; WIN64-NEXT: movl $1, %ecx
892 ; WIN64-NEXT: movl $1, %edx
893 ; WIN64-NEXT: callq test_argv8i1
894 ; WIN64-NEXT: vmovaps (%rsp), %xmm6 # 16-byte Reload
895 ; WIN64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 16-byte Reload
896 ; WIN64-NEXT: addq $40, %rsp
897 ; WIN64-NEXT: popq %rdi
898 ; WIN64-NEXT: popq %rsi
900 ; WIN64-NEXT: .seh_handlerdata
902 ; WIN64-NEXT: .seh_endproc
904 ; LINUXOSX64-LABEL: caller_argv8i1:
905 ; LINUXOSX64: # %bb.0: # %entry
906 ; LINUXOSX64-NEXT: pushq %rax
907 ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16
908 ; LINUXOSX64-NEXT: movl $1, %eax
909 ; LINUXOSX64-NEXT: movl $1, %ecx
910 ; LINUXOSX64-NEXT: movl $1, %edx
911 ; LINUXOSX64-NEXT: callq test_argv8i1
912 ; LINUXOSX64-NEXT: popq %rcx
913 ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8
914 ; LINUXOSX64-NEXT: retq
916 %v0 = bitcast i8 1 to <8 x i1>
917 %call = call x86_regcallcc i8 @test_argv8i1(<8 x i1> %v0, <8 x i1> %v0, <8 x i1> %v0)
921 ; Test regcall when returning v8i1 type
922 define x86_regcallcc <8 x i1> @test_retv8i1() {
923 ; X32-LABEL: test_retv8i1:
925 ; X32-NEXT: movb $1, %al
928 ; CHECK64-LABEL: test_retv8i1:
930 ; CHECK64-NEXT: movb $1, %al
932 %a = bitcast i8 1 to <8 x i1>
936 ; Test regcall when processing result of v8i1 type
937 define <8 x i1> @caller_retv8i1() #0 {
938 ; X32-LABEL: caller_retv8i1:
939 ; X32: # %bb.0: # %entry
940 ; X32-NEXT: calll _test_retv8i1
941 ; X32-NEXT: # kill: def $al killed $al def $eax
942 ; X32-NEXT: kmovd %eax, %k0
943 ; X32-NEXT: vpmovm2w %k0, %zmm0
944 ; X32-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
945 ; X32-NEXT: vzeroupper
948 ; WIN64-LABEL: caller_retv8i1:
949 ; WIN64: # %bb.0: # %entry
950 ; WIN64-NEXT: pushq %rsi
951 ; WIN64-NEXT: .seh_pushreg 6
952 ; WIN64-NEXT: pushq %rdi
953 ; WIN64-NEXT: .seh_pushreg 7
954 ; WIN64-NEXT: subq $40, %rsp
955 ; WIN64-NEXT: .seh_stackalloc 40
956 ; WIN64-NEXT: vmovaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
957 ; WIN64-NEXT: .seh_savexmm 7, 16
958 ; WIN64-NEXT: vmovaps %xmm6, (%rsp) # 16-byte Spill
959 ; WIN64-NEXT: .seh_savexmm 6, 0
960 ; WIN64-NEXT: .seh_endprologue
961 ; WIN64-NEXT: callq test_retv8i1
962 ; WIN64-NEXT: # kill: def $al killed $al def $eax
963 ; WIN64-NEXT: kmovd %eax, %k0
964 ; WIN64-NEXT: vpmovm2w %k0, %zmm0
965 ; WIN64-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
966 ; WIN64-NEXT: vmovaps (%rsp), %xmm6 # 16-byte Reload
967 ; WIN64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 16-byte Reload
968 ; WIN64-NEXT: addq $40, %rsp
969 ; WIN64-NEXT: popq %rdi
970 ; WIN64-NEXT: popq %rsi
971 ; WIN64-NEXT: vzeroupper
973 ; WIN64-NEXT: .seh_handlerdata
975 ; WIN64-NEXT: .seh_endproc
977 ; LINUXOSX64-LABEL: caller_retv8i1:
978 ; LINUXOSX64: # %bb.0: # %entry
979 ; LINUXOSX64-NEXT: pushq %rax
980 ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16
981 ; LINUXOSX64-NEXT: callq test_retv8i1
982 ; LINUXOSX64-NEXT: # kill: def $al killed $al def $eax
983 ; LINUXOSX64-NEXT: kmovd %eax, %k0
984 ; LINUXOSX64-NEXT: vpmovm2w %k0, %zmm0
985 ; LINUXOSX64-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
986 ; LINUXOSX64-NEXT: popq %rax
987 ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8
988 ; LINUXOSX64-NEXT: vzeroupper
989 ; LINUXOSX64-NEXT: retq
991 %call = call x86_regcallcc <8 x i1> @test_retv8i1()