1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals
2 ; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,TUNIT
3 ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,CGSCC
4 ; Test that we only promote arguments when the caller/callee have compatible
7 target triple = "x86_64-unknown-linux-gnu"
10 define internal fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(ptr %arg, ptr readonly %arg1) #0 {
12 ; CHECK: Function Attrs: inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable
13 ; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512
14 ; CHECK-SAME: (ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] {
16 ; CHECK-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64>, align 64
17 ; CHECK-NEXT: store <8 x i64> [[TMP0]], ptr [[ARG1_PRIV]], align 64
18 ; CHECK-NEXT: [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1_PRIV]], align 64
19 ; CHECK-NEXT: store <8 x i64> [[TMP]], ptr [[ARG]], align 64
20 ; CHECK-NEXT: ret void
23 %tmp = load <8 x i64>, ptr %arg1
24 store <8 x i64> %tmp, ptr %arg
28 define void @avx512_legal512_prefer512_call_avx512_legal512_prefer512(ptr %arg) #0 {
30 ; TUNIT: Function Attrs: inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable
31 ; TUNIT-LABEL: define {{[^@]+}}@avx512_legal512_prefer512_call_avx512_legal512_prefer512
32 ; TUNIT-SAME: (ptr nocapture nofree writeonly [[ARG:%.*]]) #[[ATTR0]] {
34 ; TUNIT-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32
35 ; TUNIT-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
36 ; TUNIT-NEXT: call void @llvm.memset.p0.i64(ptr noalias nocapture nofree noundef nonnull writeonly align 32 dereferenceable(64) [[TMP]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR5:[0-9]+]]
37 ; TUNIT-NEXT: [[TMP0:%.*]] = load <8 x i64>, ptr [[TMP]], align 64
38 ; TUNIT-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) #[[ATTR6:[0-9]+]]
39 ; TUNIT-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 64
40 ; TUNIT-NEXT: store <8 x i64> [[TMP4]], ptr [[ARG]], align 2
41 ; TUNIT-NEXT: ret void
43 ; CGSCC: Function Attrs: inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable
44 ; CGSCC-LABEL: define {{[^@]+}}@avx512_legal512_prefer512_call_avx512_legal512_prefer512
45 ; CGSCC-SAME: (ptr nocapture nofree noundef nonnull writeonly align 2 dereferenceable(64) [[ARG:%.*]]) #[[ATTR0]] {
47 ; CGSCC-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32
48 ; CGSCC-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
49 ; CGSCC-NEXT: call void @llvm.memset.p0.i64(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR5:[0-9]+]]
50 ; CGSCC-NEXT: [[TMP0:%.*]] = load <8 x i64>, ptr [[TMP]], align 64
51 ; CGSCC-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) #[[ATTR6:[0-9]+]]
52 ; CGSCC-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 64
53 ; CGSCC-NEXT: store <8 x i64> [[TMP4]], ptr [[ARG]], align 2
54 ; CGSCC-NEXT: ret void
57 %tmp = alloca <8 x i64>, align 32
58 %tmp2 = alloca <8 x i64>, align 32
59 call void @llvm.memset.p0.i64(ptr align 32 %tmp, i8 0, i64 32, i1 false)
60 call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(ptr %tmp2, ptr %tmp)
61 %tmp4 = load <8 x i64>, ptr %tmp2, align 32
62 store <8 x i64> %tmp4, ptr %arg, align 2
67 define internal fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(ptr %arg, ptr readonly %arg1) #1 {
69 ; CHECK: Function Attrs: inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable
70 ; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256
71 ; CHECK-SAME: (ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) #[[ATTR1:[0-9]+]] {
73 ; CHECK-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64>, align 64
74 ; CHECK-NEXT: store <8 x i64> [[TMP0]], ptr [[ARG1_PRIV]], align 64
75 ; CHECK-NEXT: [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1_PRIV]], align 64
76 ; CHECK-NEXT: store <8 x i64> [[TMP]], ptr [[ARG]], align 64
77 ; CHECK-NEXT: ret void
80 %tmp = load <8 x i64>, ptr %arg1
81 store <8 x i64> %tmp, ptr %arg
85 define void @avx512_legal512_prefer256_call_avx512_legal512_prefer256(ptr %arg) #1 {
87 ; TUNIT: Function Attrs: inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable
88 ; TUNIT-LABEL: define {{[^@]+}}@avx512_legal512_prefer256_call_avx512_legal512_prefer256
89 ; TUNIT-SAME: (ptr nocapture nofree writeonly [[ARG:%.*]]) #[[ATTR1]] {
91 ; TUNIT-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32
92 ; TUNIT-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
93 ; TUNIT-NEXT: call void @llvm.memset.p0.i64(ptr noalias nocapture nofree noundef nonnull writeonly align 32 dereferenceable(64) [[TMP]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR5]]
94 ; TUNIT-NEXT: [[TMP0:%.*]] = load <8 x i64>, ptr [[TMP]], align 64
95 ; TUNIT-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) #[[ATTR6]]
96 ; TUNIT-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 64
97 ; TUNIT-NEXT: store <8 x i64> [[TMP4]], ptr [[ARG]], align 2
98 ; TUNIT-NEXT: ret void
100 ; CGSCC: Function Attrs: inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable
101 ; CGSCC-LABEL: define {{[^@]+}}@avx512_legal512_prefer256_call_avx512_legal512_prefer256
102 ; CGSCC-SAME: (ptr nocapture nofree noundef nonnull writeonly align 2 dereferenceable(64) [[ARG:%.*]]) #[[ATTR1]] {
104 ; CGSCC-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32
105 ; CGSCC-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
106 ; CGSCC-NEXT: call void @llvm.memset.p0.i64(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR5]]
107 ; CGSCC-NEXT: [[TMP0:%.*]] = load <8 x i64>, ptr [[TMP]], align 64
108 ; CGSCC-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) #[[ATTR6]]
109 ; CGSCC-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 64
110 ; CGSCC-NEXT: store <8 x i64> [[TMP4]], ptr [[ARG]], align 2
111 ; CGSCC-NEXT: ret void
114 %tmp = alloca <8 x i64>, align 32
115 %tmp2 = alloca <8 x i64>, align 32
116 call void @llvm.memset.p0.i64(ptr align 32 %tmp, i8 0, i64 32, i1 false)
117 call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(ptr %tmp2, ptr %tmp)
118 %tmp4 = load <8 x i64>, ptr %tmp2, align 32
119 store <8 x i64> %tmp4, ptr %arg, align 2
123 ; This should promote
124 define internal fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(ptr %arg, ptr readonly %arg1) #1 {
126 ; CHECK: Function Attrs: inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable
127 ; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256
128 ; CHECK-SAME: (ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) #[[ATTR1]] {
130 ; CHECK-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64>, align 64
131 ; CHECK-NEXT: store <8 x i64> [[TMP0]], ptr [[ARG1_PRIV]], align 64
132 ; CHECK-NEXT: [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1_PRIV]], align 64
133 ; CHECK-NEXT: store <8 x i64> [[TMP]], ptr [[ARG]], align 64
134 ; CHECK-NEXT: ret void
137 %tmp = load <8 x i64>, ptr %arg1
138 store <8 x i64> %tmp, ptr %arg
142 define void @avx512_legal512_prefer512_call_avx512_legal512_prefer256(ptr %arg) #0 {
144 ; TUNIT: Function Attrs: inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable
145 ; TUNIT-LABEL: define {{[^@]+}}@avx512_legal512_prefer512_call_avx512_legal512_prefer256
146 ; TUNIT-SAME: (ptr nocapture nofree writeonly [[ARG:%.*]]) #[[ATTR0]] {
148 ; TUNIT-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32
149 ; TUNIT-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
150 ; TUNIT-NEXT: call void @llvm.memset.p0.i64(ptr noalias nocapture nofree noundef nonnull writeonly align 32 dereferenceable(64) [[TMP]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR5]]
151 ; TUNIT-NEXT: [[TMP0:%.*]] = load <8 x i64>, ptr [[TMP]], align 64
152 ; TUNIT-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) #[[ATTR6]]
153 ; TUNIT-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 64
154 ; TUNIT-NEXT: store <8 x i64> [[TMP4]], ptr [[ARG]], align 2
155 ; TUNIT-NEXT: ret void
157 ; CGSCC: Function Attrs: inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable
158 ; CGSCC-LABEL: define {{[^@]+}}@avx512_legal512_prefer512_call_avx512_legal512_prefer256
159 ; CGSCC-SAME: (ptr nocapture nofree noundef nonnull writeonly align 2 dereferenceable(64) [[ARG:%.*]]) #[[ATTR0]] {
161 ; CGSCC-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32
162 ; CGSCC-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
163 ; CGSCC-NEXT: call void @llvm.memset.p0.i64(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR5]]
164 ; CGSCC-NEXT: [[TMP0:%.*]] = load <8 x i64>, ptr [[TMP]], align 64
165 ; CGSCC-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) #[[ATTR6]]
166 ; CGSCC-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 64
167 ; CGSCC-NEXT: store <8 x i64> [[TMP4]], ptr [[ARG]], align 2
168 ; CGSCC-NEXT: ret void
171 %tmp = alloca <8 x i64>, align 32
172 %tmp2 = alloca <8 x i64>, align 32
173 call void @llvm.memset.p0.i64(ptr align 32 %tmp, i8 0, i64 32, i1 false)
174 call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(ptr %tmp2, ptr %tmp)
175 %tmp4 = load <8 x i64>, ptr %tmp2, align 32
176 store <8 x i64> %tmp4, ptr %arg, align 2
180 ; This should promote
181 define internal fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(ptr %arg, ptr readonly %arg1) #0 {
183 ; CHECK: Function Attrs: inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable
184 ; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512
185 ; CHECK-SAME: (ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) #[[ATTR0]] {
187 ; CHECK-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64>, align 64
188 ; CHECK-NEXT: store <8 x i64> [[TMP0]], ptr [[ARG1_PRIV]], align 64
189 ; CHECK-NEXT: [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1_PRIV]], align 64
190 ; CHECK-NEXT: store <8 x i64> [[TMP]], ptr [[ARG]], align 64
191 ; CHECK-NEXT: ret void
194 %tmp = load <8 x i64>, ptr %arg1
195 store <8 x i64> %tmp, ptr %arg
199 define void @avx512_legal512_prefer256_call_avx512_legal512_prefer512(ptr %arg) #1 {
201 ; TUNIT: Function Attrs: inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable
202 ; TUNIT-LABEL: define {{[^@]+}}@avx512_legal512_prefer256_call_avx512_legal512_prefer512
203 ; TUNIT-SAME: (ptr nocapture nofree writeonly [[ARG:%.*]]) #[[ATTR1]] {
205 ; TUNIT-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32
206 ; TUNIT-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
207 ; TUNIT-NEXT: call void @llvm.memset.p0.i64(ptr noalias nocapture nofree noundef nonnull writeonly align 32 dereferenceable(64) [[TMP]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR5]]
208 ; TUNIT-NEXT: [[TMP0:%.*]] = load <8 x i64>, ptr [[TMP]], align 64
209 ; TUNIT-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) #[[ATTR6]]
210 ; TUNIT-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 64
211 ; TUNIT-NEXT: store <8 x i64> [[TMP4]], ptr [[ARG]], align 2
212 ; TUNIT-NEXT: ret void
214 ; CGSCC: Function Attrs: inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable
215 ; CGSCC-LABEL: define {{[^@]+}}@avx512_legal512_prefer256_call_avx512_legal512_prefer512
216 ; CGSCC-SAME: (ptr nocapture nofree noundef nonnull writeonly align 2 dereferenceable(64) [[ARG:%.*]]) #[[ATTR1]] {
218 ; CGSCC-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32
219 ; CGSCC-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
220 ; CGSCC-NEXT: call void @llvm.memset.p0.i64(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR5]]
221 ; CGSCC-NEXT: [[TMP0:%.*]] = load <8 x i64>, ptr [[TMP]], align 64
222 ; CGSCC-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) #[[ATTR6]]
223 ; CGSCC-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 64
224 ; CGSCC-NEXT: store <8 x i64> [[TMP4]], ptr [[ARG]], align 2
225 ; CGSCC-NEXT: ret void
228 %tmp = alloca <8 x i64>, align 32
229 %tmp2 = alloca <8 x i64>, align 32
230 call void @llvm.memset.p0.i64(ptr align 32 %tmp, i8 0, i64 32, i1 false)
231 call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(ptr %tmp2, ptr %tmp)
232 %tmp4 = load <8 x i64>, ptr %tmp2, align 32
233 store <8 x i64> %tmp4, ptr %arg, align 2
237 ; This should not promote
238 define internal fastcc void @callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(ptr %arg, ptr readonly %arg1) #1 {
240 ; CHECK: Function Attrs: inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable
241 ; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256
242 ; CHECK-SAME: (ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], ptr noalias nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[ARG1:%.*]]) #[[ATTR1]] {
244 ; CHECK-NEXT: [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1]], align 64
245 ; CHECK-NEXT: store <8 x i64> [[TMP]], ptr [[ARG]], align 64
246 ; CHECK-NEXT: ret void
249 %tmp = load <8 x i64>, ptr %arg1
250 store <8 x i64> %tmp, ptr %arg
254 define void @avx512_legal256_prefer256_call_avx512_legal512_prefer256(ptr %arg) #2 {
256 ; TUNIT: Function Attrs: inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable
257 ; TUNIT-LABEL: define {{[^@]+}}@avx512_legal256_prefer256_call_avx512_legal512_prefer256
258 ; TUNIT-SAME: (ptr nocapture nofree writeonly [[ARG:%.*]]) #[[ATTR2:[0-9]+]] {
260 ; TUNIT-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32
261 ; TUNIT-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
262 ; TUNIT-NEXT: call void @llvm.memset.p0.i64(ptr noalias nocapture nofree noundef nonnull writeonly align 32 dereferenceable(64) [[TMP]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR5]]
263 ; TUNIT-NEXT: call fastcc void @callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], ptr noalias nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) #[[ATTR6]]
264 ; TUNIT-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 64
265 ; TUNIT-NEXT: store <8 x i64> [[TMP4]], ptr [[ARG]], align 2
266 ; TUNIT-NEXT: ret void
268 ; CGSCC: Function Attrs: inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable
269 ; CGSCC-LABEL: define {{[^@]+}}@avx512_legal256_prefer256_call_avx512_legal512_prefer256
270 ; CGSCC-SAME: (ptr nocapture nofree noundef nonnull writeonly align 2 dereferenceable(64) [[ARG:%.*]]) #[[ATTR2:[0-9]+]] {
272 ; CGSCC-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32
273 ; CGSCC-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
274 ; CGSCC-NEXT: call void @llvm.memset.p0.i64(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR5]]
275 ; CGSCC-NEXT: call fastcc void @callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], ptr noalias nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) #[[ATTR6]]
276 ; CGSCC-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 64
277 ; CGSCC-NEXT: store <8 x i64> [[TMP4]], ptr [[ARG]], align 2
278 ; CGSCC-NEXT: ret void
281 %tmp = alloca <8 x i64>, align 32
282 %tmp2 = alloca <8 x i64>, align 32
283 call void @llvm.memset.p0.i64(ptr align 32 %tmp, i8 0, i64 32, i1 false)
284 call fastcc void @callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(ptr %tmp2, ptr %tmp)
285 %tmp4 = load <8 x i64>, ptr %tmp2, align 32
286 store <8 x i64> %tmp4, ptr %arg, align 2
290 ; This should not promote
291 define internal fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(ptr %arg, ptr readonly %arg1) #2 {
293 ; CHECK: Function Attrs: inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable
294 ; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256
295 ; CHECK-SAME: (ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], ptr noalias nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[ARG1:%.*]]) #[[ATTR2:[0-9]+]] {
297 ; CHECK-NEXT: [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1]], align 64
298 ; CHECK-NEXT: store <8 x i64> [[TMP]], ptr [[ARG]], align 64
299 ; CHECK-NEXT: ret void
302 %tmp = load <8 x i64>, ptr %arg1
303 store <8 x i64> %tmp, ptr %arg
307 define void @avx512_legal512_prefer256_call_avx512_legal256_prefer256(ptr %arg) #1 {
309 ; TUNIT: Function Attrs: inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable
310 ; TUNIT-LABEL: define {{[^@]+}}@avx512_legal512_prefer256_call_avx512_legal256_prefer256
311 ; TUNIT-SAME: (ptr nocapture nofree writeonly [[ARG:%.*]]) #[[ATTR1]] {
313 ; TUNIT-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32
314 ; TUNIT-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
315 ; TUNIT-NEXT: call void @llvm.memset.p0.i64(ptr noalias nocapture nofree noundef nonnull writeonly align 32 dereferenceable(64) [[TMP]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR5]]
316 ; TUNIT-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], ptr noalias nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) #[[ATTR6]]
317 ; TUNIT-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 64
318 ; TUNIT-NEXT: store <8 x i64> [[TMP4]], ptr [[ARG]], align 2
319 ; TUNIT-NEXT: ret void
321 ; CGSCC: Function Attrs: inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable
322 ; CGSCC-LABEL: define {{[^@]+}}@avx512_legal512_prefer256_call_avx512_legal256_prefer256
323 ; CGSCC-SAME: (ptr nocapture nofree noundef nonnull writeonly align 2 dereferenceable(64) [[ARG:%.*]]) #[[ATTR1]] {
325 ; CGSCC-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32
326 ; CGSCC-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
327 ; CGSCC-NEXT: call void @llvm.memset.p0.i64(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR5]]
328 ; CGSCC-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], ptr noalias nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) #[[ATTR6]]
329 ; CGSCC-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 64
330 ; CGSCC-NEXT: store <8 x i64> [[TMP4]], ptr [[ARG]], align 2
331 ; CGSCC-NEXT: ret void
334 %tmp = alloca <8 x i64>, align 32
335 %tmp2 = alloca <8 x i64>, align 32
336 call void @llvm.memset.p0.i64(ptr align 32 %tmp, i8 0, i64 32, i1 false)
337 call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(ptr %tmp2, ptr %tmp)
338 %tmp4 = load <8 x i64>, ptr %tmp2, align 32
339 store <8 x i64> %tmp4, ptr %arg, align 2
343 ; This should promote
344 define internal fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(ptr %arg, ptr readonly %arg1) #3 {
346 ; CHECK: Function Attrs: inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable
347 ; CHECK-LABEL: define {{[^@]+}}@callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256
348 ; CHECK-SAME: (ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) #[[ATTR3:[0-9]+]] {
350 ; CHECK-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64>, align 64
351 ; CHECK-NEXT: store <8 x i64> [[TMP0]], ptr [[ARG1_PRIV]], align 64
352 ; CHECK-NEXT: [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1_PRIV]], align 64
353 ; CHECK-NEXT: store <8 x i64> [[TMP]], ptr [[ARG]], align 64
354 ; CHECK-NEXT: ret void
357 %tmp = load <8 x i64>, ptr %arg1
358 store <8 x i64> %tmp, ptr %arg
362 define void @avx2_legal256_prefer256_call_avx2_legal512_prefer256(ptr %arg) #4 {
364 ; TUNIT: Function Attrs: inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable
365 ; TUNIT-LABEL: define {{[^@]+}}@avx2_legal256_prefer256_call_avx2_legal512_prefer256
366 ; TUNIT-SAME: (ptr nocapture nofree writeonly [[ARG:%.*]]) #[[ATTR3]] {
368 ; TUNIT-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32
369 ; TUNIT-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
370 ; TUNIT-NEXT: call void @llvm.memset.p0.i64(ptr noalias nocapture nofree noundef nonnull writeonly align 32 dereferenceable(64) [[TMP]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR5]]
371 ; TUNIT-NEXT: [[TMP0:%.*]] = load <8 x i64>, ptr [[TMP]], align 64
372 ; TUNIT-NEXT: call fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) #[[ATTR6]]
373 ; TUNIT-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 64
374 ; TUNIT-NEXT: store <8 x i64> [[TMP4]], ptr [[ARG]], align 2
375 ; TUNIT-NEXT: ret void
377 ; CGSCC: Function Attrs: inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable
378 ; CGSCC-LABEL: define {{[^@]+}}@avx2_legal256_prefer256_call_avx2_legal512_prefer256
379 ; CGSCC-SAME: (ptr nocapture nofree noundef nonnull writeonly align 2 dereferenceable(64) [[ARG:%.*]]) #[[ATTR3]] {
381 ; CGSCC-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32
382 ; CGSCC-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
383 ; CGSCC-NEXT: call void @llvm.memset.p0.i64(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR5]]
384 ; CGSCC-NEXT: [[TMP0:%.*]] = load <8 x i64>, ptr [[TMP]], align 64
385 ; CGSCC-NEXT: call fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) #[[ATTR6]]
386 ; CGSCC-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 64
387 ; CGSCC-NEXT: store <8 x i64> [[TMP4]], ptr [[ARG]], align 2
388 ; CGSCC-NEXT: ret void
391 %tmp = alloca <8 x i64>, align 32
392 %tmp2 = alloca <8 x i64>, align 32
393 call void @llvm.memset.p0.i64(ptr align 32 %tmp, i8 0, i64 32, i1 false)
394 call fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(ptr %tmp2, ptr %tmp)
395 %tmp4 = load <8 x i64>, ptr %tmp2, align 32
396 store <8 x i64> %tmp4, ptr %arg, align 2
400 ; This should promote
401 define internal fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(ptr %arg, ptr readonly %arg1) #4 {
403 ; CHECK: Function Attrs: inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable
404 ; CHECK-LABEL: define {{[^@]+}}@callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256
405 ; CHECK-SAME: (ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) #[[ATTR3]] {
407 ; CHECK-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64>, align 64
408 ; CHECK-NEXT: store <8 x i64> [[TMP0]], ptr [[ARG1_PRIV]], align 64
409 ; CHECK-NEXT: [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1_PRIV]], align 64
410 ; CHECK-NEXT: store <8 x i64> [[TMP]], ptr [[ARG]], align 64
411 ; CHECK-NEXT: ret void
414 %tmp = load <8 x i64>, ptr %arg1
415 store <8 x i64> %tmp, ptr %arg
419 define void @avx2_legal512_prefer256_call_avx2_legal256_prefer256(ptr %arg) #3 {
421 ; TUNIT: Function Attrs: inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable
422 ; TUNIT-LABEL: define {{[^@]+}}@avx2_legal512_prefer256_call_avx2_legal256_prefer256
423 ; TUNIT-SAME: (ptr nocapture nofree writeonly [[ARG:%.*]]) #[[ATTR3]] {
425 ; TUNIT-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32
426 ; TUNIT-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
427 ; TUNIT-NEXT: call void @llvm.memset.p0.i64(ptr noalias nocapture nofree noundef nonnull writeonly align 32 dereferenceable(64) [[TMP]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR5]]
428 ; TUNIT-NEXT: [[TMP0:%.*]] = load <8 x i64>, ptr [[TMP]], align 64
429 ; TUNIT-NEXT: call fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) #[[ATTR6]]
430 ; TUNIT-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 64
431 ; TUNIT-NEXT: store <8 x i64> [[TMP4]], ptr [[ARG]], align 2
432 ; TUNIT-NEXT: ret void
434 ; CGSCC: Function Attrs: inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable
435 ; CGSCC-LABEL: define {{[^@]+}}@avx2_legal512_prefer256_call_avx2_legal256_prefer256
436 ; CGSCC-SAME: (ptr nocapture nofree noundef nonnull writeonly align 2 dereferenceable(64) [[ARG:%.*]]) #[[ATTR3]] {
438 ; CGSCC-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32
439 ; CGSCC-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
440 ; CGSCC-NEXT: call void @llvm.memset.p0.i64(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR5]]
441 ; CGSCC-NEXT: [[TMP0:%.*]] = load <8 x i64>, ptr [[TMP]], align 64
442 ; CGSCC-NEXT: call fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) #[[ATTR6]]
443 ; CGSCC-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 64
444 ; CGSCC-NEXT: store <8 x i64> [[TMP4]], ptr [[ARG]], align 2
445 ; CGSCC-NEXT: ret void
448 %tmp = alloca <8 x i64>, align 32
449 %tmp2 = alloca <8 x i64>, align 32
450 call void @llvm.memset.p0.i64(ptr align 32 %tmp, i8 0, i64 32, i1 false)
451 call fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(ptr %tmp2, ptr %tmp)
452 %tmp4 = load <8 x i64>, ptr %tmp2, align 32
453 store <8 x i64> %tmp4, ptr %arg, align 2
457 ; Function Attrs: argmemonly nounwind
458 declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1) #5
460 attributes #0 = { inlinehint norecurse nounwind uwtable "target-features"="+avx512vl" "min-legal-vector-width"="512" "prefer-vector-width"="512" }
461 attributes #1 = { inlinehint norecurse nounwind uwtable "target-features"="+avx512vl" "min-legal-vector-width"="512" "prefer-vector-width"="256" }
462 attributes #2 = { inlinehint norecurse nounwind uwtable "target-features"="+avx512vl" "min-legal-vector-width"="256" "prefer-vector-width"="256" }
463 attributes #3 = { inlinehint norecurse nounwind uwtable "target-features"="+avx2" "min-legal-vector-width"="512" "prefer-vector-width"="256" }
464 attributes #4 = { inlinehint norecurse nounwind uwtable "target-features"="+avx2" "min-legal-vector-width"="256" "prefer-vector-width"="256" }
465 attributes #5 = { argmemonly nounwind }
467 ; TUNIT: attributes #[[ATTR0]] = { inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable "min-legal-vector-width"="512" "prefer-vector-width"="512" "target-features"="+avx512vl" }
468 ; TUNIT: attributes #[[ATTR1]] = { inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable "min-legal-vector-width"="512" "prefer-vector-width"="256" "target-features"="+avx512vl" }
469 ; TUNIT: attributes #[[ATTR2]] = { inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable "min-legal-vector-width"="256" "prefer-vector-width"="256" "target-features"="+avx512vl" }
470 ; TUNIT: attributes #[[ATTR3]] = { inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable "min-legal-vector-width"="512" "prefer-vector-width"="256" "target-features"="+avx2" }
471 ; TUNIT: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: write) }
472 ; TUNIT: attributes #[[ATTR5]] = { nofree willreturn memory(write) }
473 ; TUNIT: attributes #[[ATTR6]] = { nofree nosync nounwind willreturn }
475 ; CGSCC: attributes #[[ATTR0]] = { inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable "min-legal-vector-width"="512" "prefer-vector-width"="512" "target-features"="+avx512vl" }
476 ; CGSCC: attributes #[[ATTR1]] = { inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable "min-legal-vector-width"="512" "prefer-vector-width"="256" "target-features"="+avx512vl" }
477 ; CGSCC: attributes #[[ATTR2]] = { inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable "min-legal-vector-width"="256" "prefer-vector-width"="256" "target-features"="+avx512vl" }
478 ; CGSCC: attributes #[[ATTR3]] = { inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable "min-legal-vector-width"="512" "prefer-vector-width"="256" "target-features"="+avx2" }
479 ; CGSCC: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: write) }
480 ; CGSCC: attributes #[[ATTR5]] = { nofree willreturn memory(write) }
481 ; CGSCC: attributes #[[ATTR6]] = { nofree nounwind willreturn }