1 // RUN: %clang_cc1 -emit-llvm %s -o - -ffreestanding -triple=i386-pc-win32 | FileCheck %s --check-prefix=X86
2 // RUN: %clang_cc1 -emit-llvm %s -o - -ffreestanding -triple=x86_64-pc-win32 | FileCheck %s --check-prefix=X64
4 void __vectorcall
v1(int a
, int b
) {}
5 // X86: define dso_local x86_vectorcallcc void @"\01v1@@8"(i32 inreg noundef %a, i32 inreg noundef %b)
6 // X64: define dso_local x86_vectorcallcc void @"\01v1@@16"(i32 noundef %a, i32 noundef %b)
8 void __vectorcall
v2(char a
, char b
) {}
9 // X86: define dso_local x86_vectorcallcc void @"\01v2@@8"(i8 inreg noundef signext %a, i8 inreg noundef signext %b)
10 // X64: define dso_local x86_vectorcallcc void @"\01v2@@16"(i8 noundef %a, i8 noundef %b)
12 struct Small
{ int x
; };
13 void __vectorcall
v3(int a
, struct Small b
, int c
) {}
14 // X86: define dso_local x86_vectorcallcc void @"\01v3@@12"(i32 inreg noundef %a, i32 %b.0, i32 inreg noundef %c)
15 // X64: define dso_local x86_vectorcallcc void @"\01v3@@24"(i32 noundef %a, i32 %b.coerce, i32 noundef %c)
17 struct Large
{ int a
[5]; };
18 void __vectorcall
v4(int a
, struct Large b
, int c
) {}
19 // X86: define dso_local x86_vectorcallcc void @"\01v4@@28"(i32 inreg noundef %a, ptr noundef byval(%struct.Large) align 4 %b, i32 inreg noundef %c)
20 // X64: define dso_local x86_vectorcallcc void @"\01v4@@40"(i32 noundef %a, ptr noundef %b, i32 noundef %c)
22 void __vectorcall
v5(long long a
, int b
, int c
) {}
23 // X86: define dso_local x86_vectorcallcc void @"\01v5@@16"(i64 noundef %a, i32 inreg noundef %b, i32 inreg noundef %c)
24 // X64: define dso_local x86_vectorcallcc void @"\01v5@@24"(i64 noundef %a, i32 noundef %b, i32 noundef %c)
26 struct HFA2
{ double x
, y
; };
27 struct HFA4
{ double w
, x
, y
, z
; };
28 struct HFA5
{ double v
, w
, x
, y
, z
; };
30 void __vectorcall
hfa1(int a
, struct HFA4 b
, int c
) {}
31 // X86: define dso_local x86_vectorcallcc void @"\01hfa1@@40"(i32 inreg noundef %a, %struct.HFA4 inreg %b.coerce, i32 inreg noundef %c)
32 // X64: define dso_local x86_vectorcallcc void @"\01hfa1@@48"(i32 noundef %a, %struct.HFA4 inreg %b.coerce, i32 noundef %c)
34 // HFAs that would require more than six total SSE registers are passed
35 // indirectly. Additional vector arguments can consume the rest of the SSE
37 void __vectorcall
hfa2(struct HFA4 a
, struct HFA4 b
, double c
) {}
38 // X86: define dso_local x86_vectorcallcc void @"\01hfa2@@72"(%struct.HFA4 inreg %a.coerce, ptr inreg noundef %b, double inreg noundef %c)
39 // X64: define dso_local x86_vectorcallcc void @"\01hfa2@@72"(%struct.HFA4 inreg %a.coerce, ptr noundef %b, double noundef %c)
41 // Ensure that we pass builtin types directly while counting them against the
42 // SSE register usage.
43 void __vectorcall
hfa3(double a
, double b
, double c
, double d
, double e
, struct HFA2 f
) {}
44 // X86: define dso_local x86_vectorcallcc void @"\01hfa3@@56"(double inreg noundef %a, double inreg noundef %b, double inreg noundef %c, double inreg noundef %d, double inreg noundef %e, ptr inreg noundef %f)
45 // X64: define dso_local x86_vectorcallcc void @"\01hfa3@@56"(double noundef %a, double noundef %b, double noundef %c, double noundef %d, double noundef %e, ptr noundef %f)
47 // Aggregates with more than four elements are not HFAs and are passed byval.
48 // Because they are not classified as homogeneous, they don't get special
49 // handling to ensure alignment.
50 void __vectorcall
hfa4(struct HFA5 a
) {}
51 // X86: define dso_local x86_vectorcallcc void @"\01hfa4@@40"(ptr noundef byval(%struct.HFA5) align 4 %0)
52 // X64: define dso_local x86_vectorcallcc void @"\01hfa4@@40"(ptr noundef %a)
54 // Return HFAs of 4 or fewer elements in registers.
55 static struct HFA2 g_hfa2
;
56 struct HFA2 __vectorcall
hfa5(void) { return g_hfa2
; }
57 // X86: define dso_local x86_vectorcallcc %struct.HFA2 @"\01hfa5@@0"()
58 // X64: define dso_local x86_vectorcallcc %struct.HFA2 @"\01hfa5@@0"()
60 typedef float __attribute__((vector_size(16))) v4f32
;
61 struct HVA2
{ v4f32 x
, y
; };
62 struct HVA3
{ v4f32 w
, x
, y
; };
63 struct HVA4
{ v4f32 w
, x
, y
, z
; };
64 struct HVA5
{ v4f32 w
, x
, y
, z
, p
; };
66 v4f32 __vectorcall
hva1(int a
, struct HVA4 b
, int c
) {return b
.w
;}
67 // X86: define dso_local x86_vectorcallcc <4 x float> @"\01hva1@@72"(i32 inreg noundef %a, %struct.HVA4 inreg %b.coerce, i32 inreg noundef %c)
68 // X64: define dso_local x86_vectorcallcc <4 x float> @"\01hva1@@80"(i32 noundef %a, %struct.HVA4 inreg %b.coerce, i32 noundef %c)
70 v4f32 __vectorcall
hva2(struct HVA4 a
, struct HVA4 b
, v4f32 c
) {return c
;}
71 // X86: define dso_local x86_vectorcallcc <4 x float> @"\01hva2@@144"(%struct.HVA4 inreg %a.coerce, ptr inreg noundef %b, <4 x float> inreg noundef %c)
72 // X64: define dso_local x86_vectorcallcc <4 x float> @"\01hva2@@144"(%struct.HVA4 inreg %a.coerce, ptr noundef %b, <4 x float> noundef %c)
74 v4f32 __vectorcall
hva3(v4f32 a
, v4f32 b
, v4f32 c
, v4f32 d
, v4f32 e
, struct HVA2 f
) {return f
.x
;}
75 // X86: define dso_local x86_vectorcallcc <4 x float> @"\01hva3@@112"(<4 x float> inreg noundef %a, <4 x float> inreg noundef %b, <4 x float> inreg noundef %c, <4 x float> inreg noundef %d, <4 x float> inreg noundef %e, ptr inreg noundef %f)
76 // X64: define dso_local x86_vectorcallcc <4 x float> @"\01hva3@@112"(<4 x float> noundef %a, <4 x float> noundef %b, <4 x float> noundef %c, <4 x float> noundef %d, <4 x float> noundef %e, ptr noundef %f)
78 // vector types have higher priority then HVA structures, So vector types are allocated first
79 // and HVAs are allocated if enough registers are available
80 v4f32 __vectorcall
hva4(struct HVA4 a
, struct HVA2 b
, v4f32 c
) {return b
.y
;}
81 // X86: define dso_local x86_vectorcallcc <4 x float> @"\01hva4@@112"(%struct.HVA4 inreg %a.coerce, ptr inreg noundef %b, <4 x float> inreg noundef %c)
82 // X64: define dso_local x86_vectorcallcc <4 x float> @"\01hva4@@112"(%struct.HVA4 inreg %a.coerce, ptr noundef %b, <4 x float> noundef %c)
84 v4f32 __vectorcall
hva5(struct HVA3 a
, struct HVA3 b
, v4f32 c
, struct HVA2 d
) {return d
.y
;}
85 // X86: define dso_local x86_vectorcallcc <4 x float> @"\01hva5@@144"(%struct.HVA3 inreg %a.coerce, ptr inreg noundef %b, <4 x float> inreg noundef %c, %struct.HVA2 inreg %d.coerce)
86 // X64: define dso_local x86_vectorcallcc <4 x float> @"\01hva5@@144"(%struct.HVA3 inreg %a.coerce, ptr noundef %b, <4 x float> noundef %c, %struct.HVA2 inreg %d.coerce)
88 struct HVA4 __vectorcall
hva6(struct HVA4 a
, struct HVA4 b
) { return b
;}
89 // X86: define dso_local x86_vectorcallcc %struct.HVA4 @"\01hva6@@128"(%struct.HVA4 inreg %a.coerce, ptr inreg noundef %b)
90 // X64: define dso_local x86_vectorcallcc %struct.HVA4 @"\01hva6@@128"(%struct.HVA4 inreg %a.coerce, ptr noundef %b)
92 struct HVA5 __vectorcall
hva7(void) {struct HVA5 a
= {}; return a
;}
93 // X86: define dso_local x86_vectorcallcc void @"\01hva7@@0"(ptr inreg noalias sret(%struct.HVA5) align 16 %agg.result)
94 // X64: define dso_local x86_vectorcallcc void @"\01hva7@@0"(ptr noalias sret(%struct.HVA5) align 16 %agg.result)
96 v4f32 __vectorcall
hva8(v4f32 a
, v4f32 b
, v4f32 c
, v4f32 d
, int e
, v4f32 f
) {return f
;}
97 // X86: define dso_local x86_vectorcallcc <4 x float> @"\01hva8@@84"(<4 x float> inreg noundef %a, <4 x float> inreg noundef %b, <4 x float> inreg noundef %c, <4 x float> inreg noundef %d, i32 inreg noundef %e, <4 x float> inreg noundef %f)
98 // X64: define dso_local x86_vectorcallcc <4 x float> @"\01hva8@@88"(<4 x float> noundef %a, <4 x float> noundef %b, <4 x float> noundef %c, <4 x float> noundef %d, i32 noundef %e, <4 x float> noundef %f)
100 typedef float __attribute__((ext_vector_type(3))) v3f32
;
101 struct OddSizeHVA
{ v3f32 x
, y
; };
103 void __vectorcall
odd_size_hva(struct OddSizeHVA a
) {}
104 // X86: define dso_local x86_vectorcallcc void @"\01odd_size_hva@@32"(%struct.OddSizeHVA inreg %a.coerce)
105 // X64: define dso_local x86_vectorcallcc void @"\01odd_size_hva@@32"(%struct.OddSizeHVA inreg %a.coerce)
107 // The Vectorcall ABI only allows passing the first 6 items in registers in x64, so this shouldn't
108 // consider 'p7' as a register. Instead p5 gets put into the register on the second pass.
109 // x86 should pass p2, p6 and p7 in registers, then p1 in the second pass.
110 struct HFA2 __vectorcall
AddParticles(struct HFA2 p1
, float p2
, struct HFA4 p3
, int p4
, struct HFA2 p5
, float p6
, float p7
, int p8
){ return p1
;}
111 // X86: define dso_local x86_vectorcallcc %struct.HFA2 @"\01AddParticles@@84"(%struct.HFA2 inreg %p1.coerce, float inreg noundef %p2, ptr inreg noundef %p3, i32 inreg noundef %p4, ptr noundef %p5, float inreg noundef %p6, float inreg noundef %p7, i32 noundef %p8)
112 // X64: define dso_local x86_vectorcallcc %struct.HFA2 @"\01AddParticles@@104"(%struct.HFA2 inreg %p1.coerce, float noundef %p2, ptr noundef %p3, i32 noundef %p4, %struct.HFA2 inreg %p5.coerce, float noundef %p6, float noundef %p7, i32 noundef %p8)
114 // Vectorcall in both architectures allows passing of an HVA as long as there is room,
115 // even if it is not one of the first 6 arguments. First pass puts p4 into a
116 // register on both. p9 ends up in a register in x86 only. Second pass puts p1
117 // in a register, does NOT put p7 in a register (since there's no room), then puts
119 void __vectorcall
HVAAnywhere(struct HFA2 p1
, int p2
, int p3
, float p4
, int p5
, int p6
, struct HFA4 p7
, struct HFA2 p8
, float p9
){}
120 // X86: define dso_local x86_vectorcallcc void @"\01HVAAnywhere@@88"(%struct.HFA2 inreg %p1.coerce, i32 inreg noundef %p2, i32 inreg noundef %p3, float inreg noundef %p4, i32 noundef %p5, i32 noundef %p6, ptr noundef %p7, %struct.HFA2 inreg %p8.coerce, float inreg noundef %p9)
121 // X64: define dso_local x86_vectorcallcc void @"\01HVAAnywhere@@112"(%struct.HFA2 inreg %p1.coerce, i32 noundef %p2, i32 noundef %p3, float noundef %p4, i32 noundef %p5, i32 noundef %p6, ptr noundef %p7, %struct.HFA2 inreg %p8.coerce, float noundef %p9)
124 // This covers the three ways XMM values can be passed on 32-bit x86:
125 // - directly in XMM register (xmm5)
126 // - indirectly by address, address in GPR (ecx)
127 // - indirectly by address, address on stack
128 void __vectorcall
vectorcall_indirect_vec(
129 double xmm0
, double xmm1
, double xmm2
, double xmm3
, double xmm4
,
130 v4f32 xmm5
, v4f32 ecx
, int edx
, v4f32 mem
) {
133 // X86: define dso_local x86_vectorcallcc void @"\01vectorcall_indirect_vec@@{{[0-9]+}}"
134 // X86-SAME: (double inreg noundef %xmm0,
135 // X86-SAME: double inreg noundef %xmm1,
136 // X86-SAME: double inreg noundef %xmm2,
137 // X86-SAME: double inreg noundef %xmm3,
138 // X86-SAME: double inreg noundef %xmm4,
139 // X86-SAME: <4 x float> inreg noundef %xmm5,
140 // X86-SAME: ptr inreg noundef %0,
141 // X86-SAME: i32 inreg noundef %edx,
142 // X86-SAME: ptr noundef %1)