1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes
2 ; RUN: opt -S -argpromotion < %s | FileCheck %s
3 ; RUN: opt -S -passes=argpromotion < %s | FileCheck %s
4 ; Test that we only promote arguments when the caller/callee have compatible
7 target triple = "x86_64-unknown-linux-gnu"
10 define internal fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #0 {
11 ; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512
12 ; CHECK-SAME: (<8 x i64>* [[ARG:%.*]], <8 x i64> [[ARG1_VAL:%.*]])
14 ; CHECK-NEXT: store <8 x i64> [[ARG1_VAL]], <8 x i64>* [[ARG]]
15 ; CHECK-NEXT: ret void
18 %tmp = load <8 x i64>, <8 x i64>* %arg1
19 store <8 x i64> %tmp, <8 x i64>* %arg
23 define void @avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* %arg) #0 {
24 ; CHECK-LABEL: define {{[^@]+}}@avx512_legal512_prefer512_call_avx512_legal512_prefer512
25 ; CHECK-SAME: (<8 x i64>* [[ARG:%.*]])
27 ; CHECK-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32
28 ; CHECK-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
29 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8*
30 ; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 32 [[TMP3]], i8 0, i64 32, i1 false)
31 ; CHECK-NEXT: [[TMP_VAL:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]]
32 ; CHECK-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* [[TMP2]], <8 x i64> [[TMP_VAL]])
33 ; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32
34 ; CHECK-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2
35 ; CHECK-NEXT: ret void
38 %tmp = alloca <8 x i64>, align 32
39 %tmp2 = alloca <8 x i64>, align 32
40 %tmp3 = bitcast <8 x i64>* %tmp to i8*
41 call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false)
42 call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* %tmp2, <8 x i64>* %tmp)
43 %tmp4 = load <8 x i64>, <8 x i64>* %tmp2, align 32
44 store <8 x i64> %tmp4, <8 x i64>* %arg, align 2
49 define internal fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #1 {
50 ; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256
51 ; CHECK-SAME: (<8 x i64>* [[ARG:%.*]], <8 x i64> [[ARG1_VAL:%.*]])
53 ; CHECK-NEXT: store <8 x i64> [[ARG1_VAL]], <8 x i64>* [[ARG]]
54 ; CHECK-NEXT: ret void
57 %tmp = load <8 x i64>, <8 x i64>* %arg1
58 store <8 x i64> %tmp, <8 x i64>* %arg
62 define void @avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* %arg) #1 {
63 ; CHECK-LABEL: define {{[^@]+}}@avx512_legal512_prefer256_call_avx512_legal512_prefer256
64 ; CHECK-SAME: (<8 x i64>* [[ARG:%.*]])
66 ; CHECK-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32
67 ; CHECK-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
68 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8*
69 ; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 32 [[TMP3]], i8 0, i64 32, i1 false)
70 ; CHECK-NEXT: [[TMP_VAL:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]]
71 ; CHECK-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* [[TMP2]], <8 x i64> [[TMP_VAL]])
72 ; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32
73 ; CHECK-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2
74 ; CHECK-NEXT: ret void
77 %tmp = alloca <8 x i64>, align 32
78 %tmp2 = alloca <8 x i64>, align 32
79 %tmp3 = bitcast <8 x i64>* %tmp to i8*
80 call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false)
81 call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* %tmp2, <8 x i64>* %tmp)
82 %tmp4 = load <8 x i64>, <8 x i64>* %tmp2, align 32
83 store <8 x i64> %tmp4, <8 x i64>* %arg, align 2
88 define internal fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #1 {
89 ; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256
90 ; CHECK-SAME: (<8 x i64>* [[ARG:%.*]], <8 x i64> [[ARG1_VAL:%.*]])
92 ; CHECK-NEXT: store <8 x i64> [[ARG1_VAL]], <8 x i64>* [[ARG]]
93 ; CHECK-NEXT: ret void
96 %tmp = load <8 x i64>, <8 x i64>* %arg1
97 store <8 x i64> %tmp, <8 x i64>* %arg
101 define void @avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* %arg) #0 {
102 ; CHECK-LABEL: define {{[^@]+}}@avx512_legal512_prefer512_call_avx512_legal512_prefer256
103 ; CHECK-SAME: (<8 x i64>* [[ARG:%.*]])
105 ; CHECK-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32
106 ; CHECK-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
107 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8*
108 ; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 32 [[TMP3]], i8 0, i64 32, i1 false)
109 ; CHECK-NEXT: [[TMP_VAL:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]]
110 ; CHECK-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* [[TMP2]], <8 x i64> [[TMP_VAL]])
111 ; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32
112 ; CHECK-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2
113 ; CHECK-NEXT: ret void
116 %tmp = alloca <8 x i64>, align 32
117 %tmp2 = alloca <8 x i64>, align 32
118 %tmp3 = bitcast <8 x i64>* %tmp to i8*
119 call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false)
120 call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* %tmp2, <8 x i64>* %tmp)
121 %tmp4 = load <8 x i64>, <8 x i64>* %tmp2, align 32
122 store <8 x i64> %tmp4, <8 x i64>* %arg, align 2
126 ; This should promote
127 define internal fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #0 {
128 ; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512
129 ; CHECK-SAME: (<8 x i64>* [[ARG:%.*]], <8 x i64> [[ARG1_VAL:%.*]])
131 ; CHECK-NEXT: store <8 x i64> [[ARG1_VAL]], <8 x i64>* [[ARG]]
132 ; CHECK-NEXT: ret void
135 %tmp = load <8 x i64>, <8 x i64>* %arg1
136 store <8 x i64> %tmp, <8 x i64>* %arg
140 define void @avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* %arg) #1 {
141 ; CHECK-LABEL: define {{[^@]+}}@avx512_legal512_prefer256_call_avx512_legal512_prefer512
142 ; CHECK-SAME: (<8 x i64>* [[ARG:%.*]])
144 ; CHECK-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32
145 ; CHECK-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
146 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8*
147 ; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 32 [[TMP3]], i8 0, i64 32, i1 false)
148 ; CHECK-NEXT: [[TMP_VAL:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]]
149 ; CHECK-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* [[TMP2]], <8 x i64> [[TMP_VAL]])
150 ; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32
151 ; CHECK-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2
152 ; CHECK-NEXT: ret void
155 %tmp = alloca <8 x i64>, align 32
156 %tmp2 = alloca <8 x i64>, align 32
157 %tmp3 = bitcast <8 x i64>* %tmp to i8*
158 call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false)
159 call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* %tmp2, <8 x i64>* %tmp)
160 %tmp4 = load <8 x i64>, <8 x i64>* %tmp2, align 32
161 store <8 x i64> %tmp4, <8 x i64>* %arg, align 2
165 ; This should not promote
166 define internal fastcc void @callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #1 {
167 ; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256
168 ; CHECK-SAME: (<8 x i64>* [[ARG:%.*]], <8 x i64>* readonly [[ARG1:%.*]])
170 ; CHECK-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1]]
171 ; CHECK-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG]]
172 ; CHECK-NEXT: ret void
175 %tmp = load <8 x i64>, <8 x i64>* %arg1
176 store <8 x i64> %tmp, <8 x i64>* %arg
180 define void @avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* %arg) #2 {
181 ; CHECK-LABEL: define {{[^@]+}}@avx512_legal256_prefer256_call_avx512_legal512_prefer256
182 ; CHECK-SAME: (<8 x i64>* [[ARG:%.*]])
184 ; CHECK-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32
185 ; CHECK-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
186 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8*
187 ; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 32 [[TMP3]], i8 0, i64 32, i1 false)
188 ; CHECK-NEXT: call fastcc void @callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* [[TMP2]], <8 x i64>* [[TMP]])
189 ; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32
190 ; CHECK-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2
191 ; CHECK-NEXT: ret void
194 %tmp = alloca <8 x i64>, align 32
195 %tmp2 = alloca <8 x i64>, align 32
196 %tmp3 = bitcast <8 x i64>* %tmp to i8*
197 call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false)
198 call fastcc void @callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* %tmp2, <8 x i64>* %tmp)
199 %tmp4 = load <8 x i64>, <8 x i64>* %tmp2, align 32
200 store <8 x i64> %tmp4, <8 x i64>* %arg, align 2
204 ; This should not promote
205 define internal fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #2 {
206 ; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256
207 ; CHECK-SAME: (<8 x i64>* [[ARG:%.*]], <8 x i64>* readonly [[ARG1:%.*]])
209 ; CHECK-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1]]
210 ; CHECK-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG]]
211 ; CHECK-NEXT: ret void
214 %tmp = load <8 x i64>, <8 x i64>* %arg1
215 store <8 x i64> %tmp, <8 x i64>* %arg
219 define void @avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* %arg) #1 {
220 ; CHECK-LABEL: define {{[^@]+}}@avx512_legal512_prefer256_call_avx512_legal256_prefer256
221 ; CHECK-SAME: (<8 x i64>* [[ARG:%.*]])
223 ; CHECK-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32
224 ; CHECK-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
225 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8*
226 ; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 32 [[TMP3]], i8 0, i64 32, i1 false)
227 ; CHECK-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* [[TMP2]], <8 x i64>* [[TMP]])
228 ; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32
229 ; CHECK-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2
230 ; CHECK-NEXT: ret void
233 %tmp = alloca <8 x i64>, align 32
234 %tmp2 = alloca <8 x i64>, align 32
235 %tmp3 = bitcast <8 x i64>* %tmp to i8*
236 call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false)
237 call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* %tmp2, <8 x i64>* %tmp)
238 %tmp4 = load <8 x i64>, <8 x i64>* %tmp2, align 32
239 store <8 x i64> %tmp4, <8 x i64>* %arg, align 2
243 ; This should promote
244 define internal fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #3 {
245 ; CHECK-LABEL: define {{[^@]+}}@callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256
246 ; CHECK-SAME: (<8 x i64>* [[ARG:%.*]], <8 x i64> [[ARG1_VAL:%.*]])
248 ; CHECK-NEXT: store <8 x i64> [[ARG1_VAL]], <8 x i64>* [[ARG]]
249 ; CHECK-NEXT: ret void
252 %tmp = load <8 x i64>, <8 x i64>* %arg1
253 store <8 x i64> %tmp, <8 x i64>* %arg
257 define void @avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* %arg) #4 {
258 ; CHECK-LABEL: define {{[^@]+}}@avx2_legal256_prefer256_call_avx2_legal512_prefer256
259 ; CHECK-SAME: (<8 x i64>* [[ARG:%.*]])
261 ; CHECK-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32
262 ; CHECK-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
263 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8*
264 ; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 32 [[TMP3]], i8 0, i64 32, i1 false)
265 ; CHECK-NEXT: [[TMP_VAL:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]]
266 ; CHECK-NEXT: call fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* [[TMP2]], <8 x i64> [[TMP_VAL]])
267 ; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32
268 ; CHECK-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2
269 ; CHECK-NEXT: ret void
272 %tmp = alloca <8 x i64>, align 32
273 %tmp2 = alloca <8 x i64>, align 32
274 %tmp3 = bitcast <8 x i64>* %tmp to i8*
275 call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false)
276 call fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* %tmp2, <8 x i64>* %tmp)
277 %tmp4 = load <8 x i64>, <8 x i64>* %tmp2, align 32
278 store <8 x i64> %tmp4, <8 x i64>* %arg, align 2
282 ; This should promote
283 define internal fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #4 {
284 ; CHECK-LABEL: define {{[^@]+}}@callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256
285 ; CHECK-SAME: (<8 x i64>* [[ARG:%.*]], <8 x i64> [[ARG1_VAL:%.*]])
287 ; CHECK-NEXT: store <8 x i64> [[ARG1_VAL]], <8 x i64>* [[ARG]]
288 ; CHECK-NEXT: ret void
291 %tmp = load <8 x i64>, <8 x i64>* %arg1
292 store <8 x i64> %tmp, <8 x i64>* %arg
296 define void @avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* %arg) #3 {
297 ; CHECK-LABEL: define {{[^@]+}}@avx2_legal512_prefer256_call_avx2_legal256_prefer256
298 ; CHECK-SAME: (<8 x i64>* [[ARG:%.*]])
300 ; CHECK-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32
301 ; CHECK-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
302 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8*
303 ; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 32 [[TMP3]], i8 0, i64 32, i1 false)
304 ; CHECK-NEXT: [[TMP_VAL:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]]
305 ; CHECK-NEXT: call fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* [[TMP2]], <8 x i64> [[TMP_VAL]])
306 ; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32
307 ; CHECK-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2
308 ; CHECK-NEXT: ret void
311 %tmp = alloca <8 x i64>, align 32
312 %tmp2 = alloca <8 x i64>, align 32
313 %tmp3 = bitcast <8 x i64>* %tmp to i8*
314 call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false)
315 call fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* %tmp2, <8 x i64>* %tmp)
316 %tmp4 = load <8 x i64>, <8 x i64>* %tmp2, align 32
317 store <8 x i64> %tmp4, <8 x i64>* %arg, align 2
321 ; If the arguments are scalar, its ok to promote.
322 define internal i32 @scalar_callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(i32* %X, i32* %Y) #2 {
323 ; CHECK-LABEL: define {{[^@]+}}@scalar_callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256
324 ; CHECK-SAME: (i32 [[X_VAL:%.*]], i32 [[Y_VAL:%.*]])
325 ; CHECK-NEXT: [[C:%.*]] = add i32 [[X_VAL]], [[Y_VAL]]
326 ; CHECK-NEXT: ret i32 [[C]]
328 %A = load i32, i32* %X
329 %B = load i32, i32* %Y
334 define i32 @scalar_avx512_legal256_prefer256_call_avx512_legal512_prefer256(i32* %B) #2 {
335 ; CHECK-LABEL: define {{[^@]+}}@scalar_avx512_legal256_prefer256_call_avx512_legal512_prefer256
336 ; CHECK-SAME: (i32* [[B:%.*]])
337 ; CHECK-NEXT: [[A:%.*]] = alloca i32
338 ; CHECK-NEXT: store i32 1, i32* [[A]]
339 ; CHECK-NEXT: [[A_VAL:%.*]] = load i32, i32* [[A]]
340 ; CHECK-NEXT: [[B_VAL:%.*]] = load i32, i32* [[B]]
341 ; CHECK-NEXT: [[C:%.*]] = call i32 @scalar_callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(i32 [[A_VAL]], i32 [[B_VAL]])
342 ; CHECK-NEXT: ret i32 [[C]]
346 %C = call i32 @scalar_callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(i32* %A, i32* %B)
350 ; If the arguments are scalar, its ok to promote.
351 define internal i32 @scalar_callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(i32* %X, i32* %Y) #2 {
352 ; CHECK-LABEL: define {{[^@]+}}@scalar_callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256
353 ; CHECK-SAME: (i32 [[X_VAL:%.*]], i32 [[Y_VAL:%.*]])
354 ; CHECK-NEXT: [[C:%.*]] = add i32 [[X_VAL]], [[Y_VAL]]
355 ; CHECK-NEXT: ret i32 [[C]]
357 %A = load i32, i32* %X
358 %B = load i32, i32* %Y
363 define i32 @scalar_avx512_legal512_prefer256_call_avx512_legal256_prefer256(i32* %B) #2 {
364 ; CHECK-LABEL: define {{[^@]+}}@scalar_avx512_legal512_prefer256_call_avx512_legal256_prefer256
365 ; CHECK-SAME: (i32* [[B:%.*]])
366 ; CHECK-NEXT: [[A:%.*]] = alloca i32
367 ; CHECK-NEXT: store i32 1, i32* [[A]]
368 ; CHECK-NEXT: [[A_VAL:%.*]] = load i32, i32* [[A]]
369 ; CHECK-NEXT: [[B_VAL:%.*]] = load i32, i32* [[B]]
370 ; CHECK-NEXT: [[C:%.*]] = call i32 @scalar_callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(i32 [[A_VAL]], i32 [[B_VAL]])
371 ; CHECK-NEXT: ret i32 [[C]]
375 %C = call i32 @scalar_callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(i32* %A, i32* %B)
379 ; Function Attrs: argmemonly nounwind
380 declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1) #5
382 attributes #0 = { inlinehint norecurse nounwind uwtable "target-features"="+avx512vl" "min-legal-vector-width"="512" "prefer-vector-width"="512" }
383 attributes #1 = { inlinehint norecurse nounwind uwtable "target-features"="+avx512vl" "min-legal-vector-width"="512" "prefer-vector-width"="256" }
384 attributes #2 = { inlinehint norecurse nounwind uwtable "target-features"="+avx512vl" "min-legal-vector-width"="256" "prefer-vector-width"="256" }
385 attributes #3 = { inlinehint norecurse nounwind uwtable "target-features"="+avx2" "min-legal-vector-width"="512" "prefer-vector-width"="256" }
386 attributes #4 = { inlinehint norecurse nounwind uwtable "target-features"="+avx2" "min-legal-vector-width"="256" "prefer-vector-width"="256" }
387 attributes #5 = { argmemonly nounwind }