1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f -O0 | FileCheck %s
4 define <16 x float> @makefloat(float %f1, float %f2, float %f3, float %f4, float %f5, float %f6, float %f7, float %f8, float %f9, float %f10, float %f11, float %f12, float %f13, float %f14, float %f15, float %f16) #0 {
5 ; CHECK-LABEL: makefloat:
6 ; CHECK: # BB#0: # %entry
7 ; CHECK-NEXT: pushq %rbp
8 ; CHECK-NEXT: .cfi_def_cfa_offset 16
9 ; CHECK-NEXT: .cfi_offset %rbp, -16
10 ; CHECK-NEXT: movq %rsp, %rbp
11 ; CHECK-NEXT: .cfi_def_cfa_register %rbp
12 ; CHECK-NEXT: andq $-64, %rsp
13 ; CHECK-NEXT: subq $256, %rsp # imm = 0x100
14 ; CHECK-NEXT: vmovss {{.*#+}} xmm8 = mem[0],zero,zero,zero
15 ; CHECK-NEXT: vmovss {{.*#+}} xmm9 = mem[0],zero,zero,zero
16 ; CHECK-NEXT: vmovss {{.*#+}} xmm10 = mem[0],zero,zero,zero
17 ; CHECK-NEXT: vmovss {{.*#+}} xmm11 = mem[0],zero,zero,zero
18 ; CHECK-NEXT: vmovss {{.*#+}} xmm12 = mem[0],zero,zero,zero
19 ; CHECK-NEXT: vmovss {{.*#+}} xmm13 = mem[0],zero,zero,zero
20 ; CHECK-NEXT: vmovss {{.*#+}} xmm14 = mem[0],zero,zero,zero
21 ; CHECK-NEXT: vmovss {{.*#+}} xmm15 = mem[0],zero,zero,zero
22 ; CHECK-NEXT: vmovss %xmm0, {{[0-9]+}}(%rsp)
23 ; CHECK-NEXT: vmovss %xmm1, {{[0-9]+}}(%rsp)
24 ; CHECK-NEXT: vmovss %xmm2, {{[0-9]+}}(%rsp)
25 ; CHECK-NEXT: vmovss %xmm3, {{[0-9]+}}(%rsp)
26 ; CHECK-NEXT: vmovss %xmm4, {{[0-9]+}}(%rsp)
27 ; CHECK-NEXT: vmovss %xmm5, {{[0-9]+}}(%rsp)
28 ; CHECK-NEXT: vmovss %xmm6, {{[0-9]+}}(%rsp)
29 ; CHECK-NEXT: vmovss %xmm7, {{[0-9]+}}(%rsp)
30 ; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
31 ; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
32 ; CHECK-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
33 ; CHECK-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
34 ; CHECK-NEXT: vmovss {{.*#+}} xmm4 = mem[0],zero,zero,zero
35 ; CHECK-NEXT: vmovss {{.*#+}} xmm5 = mem[0],zero,zero,zero
36 ; CHECK-NEXT: vmovss {{.*#+}} xmm6 = mem[0],zero,zero,zero
37 ; CHECK-NEXT: vmovss {{.*#+}} xmm7 = mem[0],zero,zero,zero
38 ; CHECK-NEXT: vmovss {{.*#+}} xmm16 = mem[0],zero,zero,zero
39 ; CHECK-NEXT: vmovss {{.*#+}} xmm17 = mem[0],zero,zero,zero
40 ; CHECK-NEXT: vmovss {{.*#+}} xmm18 = mem[0],zero,zero,zero
41 ; CHECK-NEXT: vmovss {{.*#+}} xmm19 = mem[0],zero,zero,zero
42 ; CHECK-NEXT: vmovss {{.*#+}} xmm20 = mem[0],zero,zero,zero
43 ; CHECK-NEXT: vmovss {{.*#+}} xmm21 = mem[0],zero,zero,zero
44 ; CHECK-NEXT: vmovss {{.*#+}} xmm22 = mem[0],zero,zero,zero
45 ; CHECK-NEXT: vmovss {{.*#+}} xmm23 = mem[0],zero,zero,zero
46 ; CHECK-NEXT: vmovss %xmm0, {{[0-9]+}}(%rsp)
47 ; CHECK-NEXT: vmovss %xmm1, {{[0-9]+}}(%rsp)
48 ; CHECK-NEXT: vmovss %xmm2, {{[0-9]+}}(%rsp)
49 ; CHECK-NEXT: vmovss %xmm3, {{[0-9]+}}(%rsp)
50 ; CHECK-NEXT: vmovss %xmm4, {{[0-9]+}}(%rsp)
51 ; CHECK-NEXT: vmovss %xmm5, {{[0-9]+}}(%rsp)
52 ; CHECK-NEXT: vmovss %xmm6, {{[0-9]+}}(%rsp)
53 ; CHECK-NEXT: vmovss %xmm7, {{[0-9]+}}(%rsp)
54 ; CHECK-NEXT: vmovss %xmm16, {{[0-9]+}}(%rsp)
55 ; CHECK-NEXT: vmovss %xmm17, {{[0-9]+}}(%rsp)
56 ; CHECK-NEXT: vmovss %xmm18, {{[0-9]+}}(%rsp)
57 ; CHECK-NEXT: vmovss %xmm19, {{[0-9]+}}(%rsp)
58 ; CHECK-NEXT: vmovss %xmm20, {{[0-9]+}}(%rsp)
59 ; CHECK-NEXT: vmovss %xmm21, {{[0-9]+}}(%rsp)
60 ; CHECK-NEXT: vmovss %xmm22, {{[0-9]+}}(%rsp)
61 ; CHECK-NEXT: vmovss %xmm23, {{[0-9]+}}(%rsp)
62 ; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
63 ; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
64 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
65 ; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
66 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
67 ; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
68 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
69 ; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
70 ; CHECK-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
71 ; CHECK-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
72 ; CHECK-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
73 ; CHECK-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
74 ; CHECK-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
75 ; CHECK-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[0]
76 ; CHECK-NEXT: # implicit-def: %ymm2
77 ; CHECK-NEXT: vmovaps %xmm1, %xmm2
78 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm2
79 ; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
80 ; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
81 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
82 ; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
83 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
84 ; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
85 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
86 ; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
87 ; CHECK-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
88 ; CHECK-NEXT: vinsertps {{.*#+}} xmm1 = xmm3[0],xmm1[0],xmm3[2,3]
89 ; CHECK-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
90 ; CHECK-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm3[0],xmm1[3]
91 ; CHECK-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
92 ; CHECK-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm3[0]
93 ; CHECK-NEXT: # implicit-def: %ymm3
94 ; CHECK-NEXT: vmovaps %xmm1, %xmm3
95 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm3
96 ; CHECK-NEXT: # implicit-def: %zmm24
97 ; CHECK-NEXT: vmovaps %zmm3, %zmm24
98 ; CHECK-NEXT: vinsertf64x4 $1, %ymm2, %zmm24, %zmm24
99 ; CHECK-NEXT: vmovaps %zmm24, {{[0-9]+}}(%rsp)
100 ; CHECK-NEXT: vmovaps {{[0-9]+}}(%rsp), %zmm0
101 ; CHECK-NEXT: vmovss %xmm15, {{[0-9]+}}(%rsp) # 4-byte Spill
102 ; CHECK-NEXT: vmovss %xmm8, {{[0-9]+}}(%rsp) # 4-byte Spill
103 ; CHECK-NEXT: vmovss %xmm9, {{[0-9]+}}(%rsp) # 4-byte Spill
104 ; CHECK-NEXT: vmovss %xmm10, {{[0-9]+}}(%rsp) # 4-byte Spill
105 ; CHECK-NEXT: vmovss %xmm11, {{[0-9]+}}(%rsp) # 4-byte Spill
106 ; CHECK-NEXT: vmovss %xmm12, {{[0-9]+}}(%rsp) # 4-byte Spill
107 ; CHECK-NEXT: vmovss %xmm13, {{[0-9]+}}(%rsp) # 4-byte Spill
108 ; CHECK-NEXT: vmovss %xmm14, (%rsp) # 4-byte Spill
109 ; CHECK-NEXT: movq %rbp, %rsp
110 ; CHECK-NEXT: popq %rbp
113 %__A.addr.i = alloca float, align 4
114 %__B.addr.i = alloca float, align 4
115 %__C.addr.i = alloca float, align 4
116 %__D.addr.i = alloca float, align 4
117 %__E.addr.i = alloca float, align 4
118 %__F.addr.i = alloca float, align 4
119 %__G.addr.i = alloca float, align 4
120 %__H.addr.i = alloca float, align 4
121 %__I.addr.i = alloca float, align 4
122 %__J.addr.i = alloca float, align 4
123 %__K.addr.i = alloca float, align 4
124 %__L.addr.i = alloca float, align 4
125 %__M.addr.i = alloca float, align 4
126 %__N.addr.i = alloca float, align 4
127 %__O.addr.i = alloca float, align 4
128 %__P.addr.i = alloca float, align 4
129 %.compoundliteral.i = alloca <16 x float>, align 64
130 %f1.addr = alloca float, align 4
131 %f2.addr = alloca float, align 4
132 %f3.addr = alloca float, align 4
133 %f4.addr = alloca float, align 4
134 %f5.addr = alloca float, align 4
135 %f6.addr = alloca float, align 4
136 %f7.addr = alloca float, align 4
137 %f8.addr = alloca float, align 4
138 %f9.addr = alloca float, align 4
139 %f10.addr = alloca float, align 4
140 %f11.addr = alloca float, align 4
141 %f12.addr = alloca float, align 4
142 %f13.addr = alloca float, align 4
143 %f14.addr = alloca float, align 4
144 %f15.addr = alloca float, align 4
145 %f16.addr = alloca float, align 4
146 store float %f1, float* %f1.addr, align 4
147 store float %f2, float* %f2.addr, align 4
148 store float %f3, float* %f3.addr, align 4
149 store float %f4, float* %f4.addr, align 4
150 store float %f5, float* %f5.addr, align 4
151 store float %f6, float* %f6.addr, align 4
152 store float %f7, float* %f7.addr, align 4
153 store float %f8, float* %f8.addr, align 4
154 store float %f9, float* %f9.addr, align 4
155 store float %f10, float* %f10.addr, align 4
156 store float %f11, float* %f11.addr, align 4
157 store float %f12, float* %f12.addr, align 4
158 store float %f13, float* %f13.addr, align 4
159 store float %f14, float* %f14.addr, align 4
160 store float %f15, float* %f15.addr, align 4
161 store float %f16, float* %f16.addr, align 4
162 %0 = load float, float* %f16.addr, align 4
163 %1 = load float, float* %f15.addr, align 4
164 %2 = load float, float* %f14.addr, align 4
165 %3 = load float, float* %f13.addr, align 4
166 %4 = load float, float* %f12.addr, align 4
167 %5 = load float, float* %f11.addr, align 4
168 %6 = load float, float* %f10.addr, align 4
169 %7 = load float, float* %f9.addr, align 4
170 %8 = load float, float* %f8.addr, align 4
171 %9 = load float, float* %f7.addr, align 4
172 %10 = load float, float* %f6.addr, align 4
173 %11 = load float, float* %f5.addr, align 4
174 %12 = load float, float* %f4.addr, align 4
175 %13 = load float, float* %f3.addr, align 4
176 %14 = load float, float* %f2.addr, align 4
177 %15 = load float, float* %f1.addr, align 4
178 store float %0, float* %__A.addr.i, align 4
179 store float %1, float* %__B.addr.i, align 4
180 store float %2, float* %__C.addr.i, align 4
181 store float %3, float* %__D.addr.i, align 4
182 store float %4, float* %__E.addr.i, align 4
183 store float %5, float* %__F.addr.i, align 4
184 store float %6, float* %__G.addr.i, align 4
185 store float %7, float* %__H.addr.i, align 4
186 store float %8, float* %__I.addr.i, align 4
187 store float %9, float* %__J.addr.i, align 4
188 store float %10, float* %__K.addr.i, align 4
189 store float %11, float* %__L.addr.i, align 4
190 store float %12, float* %__M.addr.i, align 4
191 store float %13, float* %__N.addr.i, align 4
192 store float %14, float* %__O.addr.i, align 4
193 store float %15, float* %__P.addr.i, align 4
194 %16 = load float, float* %__P.addr.i, align 4
195 %vecinit.i = insertelement <16 x float> undef, float %16, i32 0
196 %17 = load float, float* %__O.addr.i, align 4
197 %vecinit1.i = insertelement <16 x float> %vecinit.i, float %17, i32 1
198 %18 = load float, float* %__N.addr.i, align 4
199 %vecinit2.i = insertelement <16 x float> %vecinit1.i, float %18, i32 2
200 %19 = load float, float* %__M.addr.i, align 4
201 %vecinit3.i = insertelement <16 x float> %vecinit2.i, float %19, i32 3
202 %20 = load float, float* %__L.addr.i, align 4
203 %vecinit4.i = insertelement <16 x float> %vecinit3.i, float %20, i32 4
204 %21 = load float, float* %__K.addr.i, align 4
205 %vecinit5.i = insertelement <16 x float> %vecinit4.i, float %21, i32 5
206 %22 = load float, float* %__J.addr.i, align 4
207 %vecinit6.i = insertelement <16 x float> %vecinit5.i, float %22, i32 6
208 %23 = load float, float* %__I.addr.i, align 4
209 %vecinit7.i = insertelement <16 x float> %vecinit6.i, float %23, i32 7
210 %24 = load float, float* %__H.addr.i, align 4
211 %vecinit8.i = insertelement <16 x float> %vecinit7.i, float %24, i32 8
212 %25 = load float, float* %__G.addr.i, align 4
213 %vecinit9.i = insertelement <16 x float> %vecinit8.i, float %25, i32 9
214 %26 = load float, float* %__F.addr.i, align 4
215 %vecinit10.i = insertelement <16 x float> %vecinit9.i, float %26, i32 10
216 %27 = load float, float* %__E.addr.i, align 4
217 %vecinit11.i = insertelement <16 x float> %vecinit10.i, float %27, i32 11
218 %28 = load float, float* %__D.addr.i, align 4
219 %vecinit12.i = insertelement <16 x float> %vecinit11.i, float %28, i32 12
220 %29 = load float, float* %__C.addr.i, align 4
221 %vecinit13.i = insertelement <16 x float> %vecinit12.i, float %29, i32 13
222 %30 = load float, float* %__B.addr.i, align 4
223 %vecinit14.i = insertelement <16 x float> %vecinit13.i, float %30, i32 14
224 %31 = load float, float* %__A.addr.i, align 4
225 %vecinit15.i = insertelement <16 x float> %vecinit14.i, float %31, i32 15
226 store <16 x float> %vecinit15.i, <16 x float>* %.compoundliteral.i, align 64
227 %32 = load <16 x float>, <16 x float>* %.compoundliteral.i, align 64