1 //===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements the auto-upgrade helper functions.
10 // This is where deprecated IR intrinsics and other IR features are updated to
11 // current specifications.
13 //===----------------------------------------------------------------------===//
15 #include "llvm/IR/AutoUpgrade.h"
16 #include "llvm/ADT/StringSwitch.h"
17 #include "llvm/IR/Constants.h"
18 #include "llvm/IR/DIBuilder.h"
19 #include "llvm/IR/DebugInfo.h"
20 #include "llvm/IR/DiagnosticInfo.h"
21 #include "llvm/IR/Function.h"
22 #include "llvm/IR/IRBuilder.h"
23 #include "llvm/IR/InstVisitor.h"
24 #include "llvm/IR/Instruction.h"
25 #include "llvm/IR/IntrinsicInst.h"
26 #include "llvm/IR/Intrinsics.h"
27 #include "llvm/IR/IntrinsicsAArch64.h"
28 #include "llvm/IR/IntrinsicsARM.h"
29 #include "llvm/IR/IntrinsicsX86.h"
30 #include "llvm/IR/LLVMContext.h"
31 #include "llvm/IR/Module.h"
32 #include "llvm/IR/Verifier.h"
33 #include "llvm/Support/ErrorHandling.h"
34 #include "llvm/Support/Regex.h"
38 static void rename(GlobalValue
*GV
) { GV
->setName(GV
->getName() + ".old"); }
40 // Upgrade the declarations of the SSE4.1 ptest intrinsics whose arguments have
41 // changed their type from v4f32 to v2i64.
42 static bool UpgradePTESTIntrinsic(Function
* F
, Intrinsic::ID IID
,
44 // Check whether this is an old version of the function, which received
46 Type
*Arg0Type
= F
->getFunctionType()->getParamType(0);
47 if (Arg0Type
!= FixedVectorType::get(Type::getFloatTy(F
->getContext()), 4))
50 // Yes, it's old, replace it with new version.
52 NewFn
= Intrinsic::getDeclaration(F
->getParent(), IID
);
56 // Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
57 // arguments have changed their type from i32 to i8.
58 static bool UpgradeX86IntrinsicsWith8BitMask(Function
*F
, Intrinsic::ID IID
,
60 // Check that the last argument is an i32.
61 Type
*LastArgType
= F
->getFunctionType()->getParamType(
62 F
->getFunctionType()->getNumParams() - 1);
63 if (!LastArgType
->isIntegerTy(32))
66 // Move this function aside and map down.
68 NewFn
= Intrinsic::getDeclaration(F
->getParent(), IID
);
72 // Upgrade the declaration of fp compare intrinsics that change return type
73 // from scalar to vXi1 mask.
74 static bool UpgradeX86MaskedFPCompare(Function
*F
, Intrinsic::ID IID
,
76 // Check if the return type is a vector.
77 if (F
->getReturnType()->isVectorTy())
81 NewFn
= Intrinsic::getDeclaration(F
->getParent(), IID
);
85 static bool ShouldUpgradeX86Intrinsic(Function
*F
, StringRef Name
) {
86 // All of the intrinsics matches below should be marked with which llvm
87 // version started autoupgrading them. At some point in the future we would
88 // like to use this information to remove upgrade code for some older
89 // intrinsics. It is currently undecided how we will determine that future
91 if (Name
== "addcarryx.u32" || // Added in 8.0
92 Name
== "addcarryx.u64" || // Added in 8.0
93 Name
== "addcarry.u32" || // Added in 8.0
94 Name
== "addcarry.u64" || // Added in 8.0
95 Name
== "subborrow.u32" || // Added in 8.0
96 Name
== "subborrow.u64" || // Added in 8.0
97 Name
.startswith("sse2.padds.") || // Added in 8.0
98 Name
.startswith("sse2.psubs.") || // Added in 8.0
99 Name
.startswith("sse2.paddus.") || // Added in 8.0
100 Name
.startswith("sse2.psubus.") || // Added in 8.0
101 Name
.startswith("avx2.padds.") || // Added in 8.0
102 Name
.startswith("avx2.psubs.") || // Added in 8.0
103 Name
.startswith("avx2.paddus.") || // Added in 8.0
104 Name
.startswith("avx2.psubus.") || // Added in 8.0
105 Name
.startswith("avx512.padds.") || // Added in 8.0
106 Name
.startswith("avx512.psubs.") || // Added in 8.0
107 Name
.startswith("avx512.mask.padds.") || // Added in 8.0
108 Name
.startswith("avx512.mask.psubs.") || // Added in 8.0
109 Name
.startswith("avx512.mask.paddus.") || // Added in 8.0
110 Name
.startswith("avx512.mask.psubus.") || // Added in 8.0
111 Name
=="ssse3.pabs.b.128" || // Added in 6.0
112 Name
=="ssse3.pabs.w.128" || // Added in 6.0
113 Name
=="ssse3.pabs.d.128" || // Added in 6.0
114 Name
.startswith("fma4.vfmadd.s") || // Added in 7.0
115 Name
.startswith("fma.vfmadd.") || // Added in 7.0
116 Name
.startswith("fma.vfmsub.") || // Added in 7.0
117 Name
.startswith("fma.vfmsubadd.") || // Added in 7.0
118 Name
.startswith("fma.vfnmadd.") || // Added in 7.0
119 Name
.startswith("fma.vfnmsub.") || // Added in 7.0
120 Name
.startswith("avx512.mask.vfmadd.") || // Added in 7.0
121 Name
.startswith("avx512.mask.vfnmadd.") || // Added in 7.0
122 Name
.startswith("avx512.mask.vfnmsub.") || // Added in 7.0
123 Name
.startswith("avx512.mask3.vfmadd.") || // Added in 7.0
124 Name
.startswith("avx512.maskz.vfmadd.") || // Added in 7.0
125 Name
.startswith("avx512.mask3.vfmsub.") || // Added in 7.0
126 Name
.startswith("avx512.mask3.vfnmsub.") || // Added in 7.0
127 Name
.startswith("avx512.mask.vfmaddsub.") || // Added in 7.0
128 Name
.startswith("avx512.maskz.vfmaddsub.") || // Added in 7.0
129 Name
.startswith("avx512.mask3.vfmaddsub.") || // Added in 7.0
130 Name
.startswith("avx512.mask3.vfmsubadd.") || // Added in 7.0
131 Name
.startswith("avx512.mask.shuf.i") || // Added in 6.0
132 Name
.startswith("avx512.mask.shuf.f") || // Added in 6.0
133 Name
.startswith("avx512.kunpck") || //added in 6.0
134 Name
.startswith("avx2.pabs.") || // Added in 6.0
135 Name
.startswith("avx512.mask.pabs.") || // Added in 6.0
136 Name
.startswith("avx512.broadcastm") || // Added in 6.0
137 Name
== "sse.sqrt.ss" || // Added in 7.0
138 Name
== "sse2.sqrt.sd" || // Added in 7.0
139 Name
.startswith("avx512.mask.sqrt.p") || // Added in 7.0
140 Name
.startswith("avx.sqrt.p") || // Added in 7.0
141 Name
.startswith("sse2.sqrt.p") || // Added in 7.0
142 Name
.startswith("sse.sqrt.p") || // Added in 7.0
143 Name
.startswith("avx512.mask.pbroadcast") || // Added in 6.0
144 Name
.startswith("sse2.pcmpeq.") || // Added in 3.1
145 Name
.startswith("sse2.pcmpgt.") || // Added in 3.1
146 Name
.startswith("avx2.pcmpeq.") || // Added in 3.1
147 Name
.startswith("avx2.pcmpgt.") || // Added in 3.1
148 Name
.startswith("avx512.mask.pcmpeq.") || // Added in 3.9
149 Name
.startswith("avx512.mask.pcmpgt.") || // Added in 3.9
150 Name
.startswith("avx.vperm2f128.") || // Added in 6.0
151 Name
== "avx2.vperm2i128" || // Added in 6.0
152 Name
== "sse.add.ss" || // Added in 4.0
153 Name
== "sse2.add.sd" || // Added in 4.0
154 Name
== "sse.sub.ss" || // Added in 4.0
155 Name
== "sse2.sub.sd" || // Added in 4.0
156 Name
== "sse.mul.ss" || // Added in 4.0
157 Name
== "sse2.mul.sd" || // Added in 4.0
158 Name
== "sse.div.ss" || // Added in 4.0
159 Name
== "sse2.div.sd" || // Added in 4.0
160 Name
== "sse41.pmaxsb" || // Added in 3.9
161 Name
== "sse2.pmaxs.w" || // Added in 3.9
162 Name
== "sse41.pmaxsd" || // Added in 3.9
163 Name
== "sse2.pmaxu.b" || // Added in 3.9
164 Name
== "sse41.pmaxuw" || // Added in 3.9
165 Name
== "sse41.pmaxud" || // Added in 3.9
166 Name
== "sse41.pminsb" || // Added in 3.9
167 Name
== "sse2.pmins.w" || // Added in 3.9
168 Name
== "sse41.pminsd" || // Added in 3.9
169 Name
== "sse2.pminu.b" || // Added in 3.9
170 Name
== "sse41.pminuw" || // Added in 3.9
171 Name
== "sse41.pminud" || // Added in 3.9
172 Name
== "avx512.kand.w" || // Added in 7.0
173 Name
== "avx512.kandn.w" || // Added in 7.0
174 Name
== "avx512.knot.w" || // Added in 7.0
175 Name
== "avx512.kor.w" || // Added in 7.0
176 Name
== "avx512.kxor.w" || // Added in 7.0
177 Name
== "avx512.kxnor.w" || // Added in 7.0
178 Name
== "avx512.kortestc.w" || // Added in 7.0
179 Name
== "avx512.kortestz.w" || // Added in 7.0
180 Name
.startswith("avx512.mask.pshuf.b.") || // Added in 4.0
181 Name
.startswith("avx2.pmax") || // Added in 3.9
182 Name
.startswith("avx2.pmin") || // Added in 3.9
183 Name
.startswith("avx512.mask.pmax") || // Added in 4.0
184 Name
.startswith("avx512.mask.pmin") || // Added in 4.0
185 Name
.startswith("avx2.vbroadcast") || // Added in 3.8
186 Name
.startswith("avx2.pbroadcast") || // Added in 3.8
187 Name
.startswith("avx.vpermil.") || // Added in 3.1
188 Name
.startswith("sse2.pshuf") || // Added in 3.9
189 Name
.startswith("avx512.pbroadcast") || // Added in 3.9
190 Name
.startswith("avx512.mask.broadcast.s") || // Added in 3.9
191 Name
.startswith("avx512.mask.movddup") || // Added in 3.9
192 Name
.startswith("avx512.mask.movshdup") || // Added in 3.9
193 Name
.startswith("avx512.mask.movsldup") || // Added in 3.9
194 Name
.startswith("avx512.mask.pshuf.d.") || // Added in 3.9
195 Name
.startswith("avx512.mask.pshufl.w.") || // Added in 3.9
196 Name
.startswith("avx512.mask.pshufh.w.") || // Added in 3.9
197 Name
.startswith("avx512.mask.shuf.p") || // Added in 4.0
198 Name
.startswith("avx512.mask.vpermil.p") || // Added in 3.9
199 Name
.startswith("avx512.mask.perm.df.") || // Added in 3.9
200 Name
.startswith("avx512.mask.perm.di.") || // Added in 3.9
201 Name
.startswith("avx512.mask.punpckl") || // Added in 3.9
202 Name
.startswith("avx512.mask.punpckh") || // Added in 3.9
203 Name
.startswith("avx512.mask.unpckl.") || // Added in 3.9
204 Name
.startswith("avx512.mask.unpckh.") || // Added in 3.9
205 Name
.startswith("avx512.mask.pand.") || // Added in 3.9
206 Name
.startswith("avx512.mask.pandn.") || // Added in 3.9
207 Name
.startswith("avx512.mask.por.") || // Added in 3.9
208 Name
.startswith("avx512.mask.pxor.") || // Added in 3.9
209 Name
.startswith("avx512.mask.and.") || // Added in 3.9
210 Name
.startswith("avx512.mask.andn.") || // Added in 3.9
211 Name
.startswith("avx512.mask.or.") || // Added in 3.9
212 Name
.startswith("avx512.mask.xor.") || // Added in 3.9
213 Name
.startswith("avx512.mask.padd.") || // Added in 4.0
214 Name
.startswith("avx512.mask.psub.") || // Added in 4.0
215 Name
.startswith("avx512.mask.pmull.") || // Added in 4.0
216 Name
.startswith("avx512.mask.cvtdq2pd.") || // Added in 4.0
217 Name
.startswith("avx512.mask.cvtudq2pd.") || // Added in 4.0
218 Name
.startswith("avx512.mask.cvtudq2ps.") || // Added in 7.0 updated 9.0
219 Name
.startswith("avx512.mask.cvtqq2pd.") || // Added in 7.0 updated 9.0
220 Name
.startswith("avx512.mask.cvtuqq2pd.") || // Added in 7.0 updated 9.0
221 Name
.startswith("avx512.mask.cvtdq2ps.") || // Added in 7.0 updated 9.0
222 Name
== "avx512.mask.vcvtph2ps.128" || // Added in 11.0
223 Name
== "avx512.mask.vcvtph2ps.256" || // Added in 11.0
224 Name
== "avx512.mask.cvtqq2ps.256" || // Added in 9.0
225 Name
== "avx512.mask.cvtqq2ps.512" || // Added in 9.0
226 Name
== "avx512.mask.cvtuqq2ps.256" || // Added in 9.0
227 Name
== "avx512.mask.cvtuqq2ps.512" || // Added in 9.0
228 Name
== "avx512.mask.cvtpd2dq.256" || // Added in 7.0
229 Name
== "avx512.mask.cvtpd2ps.256" || // Added in 7.0
230 Name
== "avx512.mask.cvttpd2dq.256" || // Added in 7.0
231 Name
== "avx512.mask.cvttps2dq.128" || // Added in 7.0
232 Name
== "avx512.mask.cvttps2dq.256" || // Added in 7.0
233 Name
== "avx512.mask.cvtps2pd.128" || // Added in 7.0
234 Name
== "avx512.mask.cvtps2pd.256" || // Added in 7.0
235 Name
== "avx512.cvtusi2sd" || // Added in 7.0
236 Name
.startswith("avx512.mask.permvar.") || // Added in 7.0
237 Name
== "sse2.pmulu.dq" || // Added in 7.0
238 Name
== "sse41.pmuldq" || // Added in 7.0
239 Name
== "avx2.pmulu.dq" || // Added in 7.0
240 Name
== "avx2.pmul.dq" || // Added in 7.0
241 Name
== "avx512.pmulu.dq.512" || // Added in 7.0
242 Name
== "avx512.pmul.dq.512" || // Added in 7.0
243 Name
.startswith("avx512.mask.pmul.dq.") || // Added in 4.0
244 Name
.startswith("avx512.mask.pmulu.dq.") || // Added in 4.0
245 Name
.startswith("avx512.mask.pmul.hr.sw.") || // Added in 7.0
246 Name
.startswith("avx512.mask.pmulh.w.") || // Added in 7.0
247 Name
.startswith("avx512.mask.pmulhu.w.") || // Added in 7.0
248 Name
.startswith("avx512.mask.pmaddw.d.") || // Added in 7.0
249 Name
.startswith("avx512.mask.pmaddubs.w.") || // Added in 7.0
250 Name
.startswith("avx512.mask.packsswb.") || // Added in 5.0
251 Name
.startswith("avx512.mask.packssdw.") || // Added in 5.0
252 Name
.startswith("avx512.mask.packuswb.") || // Added in 5.0
253 Name
.startswith("avx512.mask.packusdw.") || // Added in 5.0
254 Name
.startswith("avx512.mask.cmp.b") || // Added in 5.0
255 Name
.startswith("avx512.mask.cmp.d") || // Added in 5.0
256 Name
.startswith("avx512.mask.cmp.q") || // Added in 5.0
257 Name
.startswith("avx512.mask.cmp.w") || // Added in 5.0
258 Name
.startswith("avx512.cmp.p") || // Added in 12.0
259 Name
.startswith("avx512.mask.ucmp.") || // Added in 5.0
260 Name
.startswith("avx512.cvtb2mask.") || // Added in 7.0
261 Name
.startswith("avx512.cvtw2mask.") || // Added in 7.0
262 Name
.startswith("avx512.cvtd2mask.") || // Added in 7.0
263 Name
.startswith("avx512.cvtq2mask.") || // Added in 7.0
264 Name
.startswith("avx512.mask.vpermilvar.") || // Added in 4.0
265 Name
.startswith("avx512.mask.psll.d") || // Added in 4.0
266 Name
.startswith("avx512.mask.psll.q") || // Added in 4.0
267 Name
.startswith("avx512.mask.psll.w") || // Added in 4.0
268 Name
.startswith("avx512.mask.psra.d") || // Added in 4.0
269 Name
.startswith("avx512.mask.psra.q") || // Added in 4.0
270 Name
.startswith("avx512.mask.psra.w") || // Added in 4.0
271 Name
.startswith("avx512.mask.psrl.d") || // Added in 4.0
272 Name
.startswith("avx512.mask.psrl.q") || // Added in 4.0
273 Name
.startswith("avx512.mask.psrl.w") || // Added in 4.0
274 Name
.startswith("avx512.mask.pslli") || // Added in 4.0
275 Name
.startswith("avx512.mask.psrai") || // Added in 4.0
276 Name
.startswith("avx512.mask.psrli") || // Added in 4.0
277 Name
.startswith("avx512.mask.psllv") || // Added in 4.0
278 Name
.startswith("avx512.mask.psrav") || // Added in 4.0
279 Name
.startswith("avx512.mask.psrlv") || // Added in 4.0
280 Name
.startswith("sse41.pmovsx") || // Added in 3.8
281 Name
.startswith("sse41.pmovzx") || // Added in 3.9
282 Name
.startswith("avx2.pmovsx") || // Added in 3.9
283 Name
.startswith("avx2.pmovzx") || // Added in 3.9
284 Name
.startswith("avx512.mask.pmovsx") || // Added in 4.0
285 Name
.startswith("avx512.mask.pmovzx") || // Added in 4.0
286 Name
.startswith("avx512.mask.lzcnt.") || // Added in 5.0
287 Name
.startswith("avx512.mask.pternlog.") || // Added in 7.0
288 Name
.startswith("avx512.maskz.pternlog.") || // Added in 7.0
289 Name
.startswith("avx512.mask.vpmadd52") || // Added in 7.0
290 Name
.startswith("avx512.maskz.vpmadd52") || // Added in 7.0
291 Name
.startswith("avx512.mask.vpermi2var.") || // Added in 7.0
292 Name
.startswith("avx512.mask.vpermt2var.") || // Added in 7.0
293 Name
.startswith("avx512.maskz.vpermt2var.") || // Added in 7.0
294 Name
.startswith("avx512.mask.vpdpbusd.") || // Added in 7.0
295 Name
.startswith("avx512.maskz.vpdpbusd.") || // Added in 7.0
296 Name
.startswith("avx512.mask.vpdpbusds.") || // Added in 7.0
297 Name
.startswith("avx512.maskz.vpdpbusds.") || // Added in 7.0
298 Name
.startswith("avx512.mask.vpdpwssd.") || // Added in 7.0
299 Name
.startswith("avx512.maskz.vpdpwssd.") || // Added in 7.0
300 Name
.startswith("avx512.mask.vpdpwssds.") || // Added in 7.0
301 Name
.startswith("avx512.maskz.vpdpwssds.") || // Added in 7.0
302 Name
.startswith("avx512.mask.dbpsadbw.") || // Added in 7.0
303 Name
.startswith("avx512.mask.vpshld.") || // Added in 7.0
304 Name
.startswith("avx512.mask.vpshrd.") || // Added in 7.0
305 Name
.startswith("avx512.mask.vpshldv.") || // Added in 8.0
306 Name
.startswith("avx512.mask.vpshrdv.") || // Added in 8.0
307 Name
.startswith("avx512.maskz.vpshldv.") || // Added in 8.0
308 Name
.startswith("avx512.maskz.vpshrdv.") || // Added in 8.0
309 Name
.startswith("avx512.vpshld.") || // Added in 8.0
310 Name
.startswith("avx512.vpshrd.") || // Added in 8.0
311 Name
.startswith("avx512.mask.add.p") || // Added in 7.0. 128/256 in 4.0
312 Name
.startswith("avx512.mask.sub.p") || // Added in 7.0. 128/256 in 4.0
313 Name
.startswith("avx512.mask.mul.p") || // Added in 7.0. 128/256 in 4.0
314 Name
.startswith("avx512.mask.div.p") || // Added in 7.0. 128/256 in 4.0
315 Name
.startswith("avx512.mask.max.p") || // Added in 7.0. 128/256 in 5.0
316 Name
.startswith("avx512.mask.min.p") || // Added in 7.0. 128/256 in 5.0
317 Name
.startswith("avx512.mask.fpclass.p") || // Added in 7.0
318 Name
.startswith("avx512.mask.vpshufbitqmb.") || // Added in 8.0
319 Name
.startswith("avx512.mask.pmultishift.qb.") || // Added in 8.0
320 Name
.startswith("avx512.mask.conflict.") || // Added in 9.0
321 Name
== "avx512.mask.pmov.qd.256" || // Added in 9.0
322 Name
== "avx512.mask.pmov.qd.512" || // Added in 9.0
323 Name
== "avx512.mask.pmov.wb.256" || // Added in 9.0
324 Name
== "avx512.mask.pmov.wb.512" || // Added in 9.0
325 Name
== "sse.cvtsi2ss" || // Added in 7.0
326 Name
== "sse.cvtsi642ss" || // Added in 7.0
327 Name
== "sse2.cvtsi2sd" || // Added in 7.0
328 Name
== "sse2.cvtsi642sd" || // Added in 7.0
329 Name
== "sse2.cvtss2sd" || // Added in 7.0
330 Name
== "sse2.cvtdq2pd" || // Added in 3.9
331 Name
== "sse2.cvtdq2ps" || // Added in 7.0
332 Name
== "sse2.cvtps2pd" || // Added in 3.9
333 Name
== "avx.cvtdq2.pd.256" || // Added in 3.9
334 Name
== "avx.cvtdq2.ps.256" || // Added in 7.0
335 Name
== "avx.cvt.ps2.pd.256" || // Added in 3.9
336 Name
.startswith("vcvtph2ps.") || // Added in 11.0
337 Name
.startswith("avx.vinsertf128.") || // Added in 3.7
338 Name
== "avx2.vinserti128" || // Added in 3.7
339 Name
.startswith("avx512.mask.insert") || // Added in 4.0
340 Name
.startswith("avx.vextractf128.") || // Added in 3.7
341 Name
== "avx2.vextracti128" || // Added in 3.7
342 Name
.startswith("avx512.mask.vextract") || // Added in 4.0
343 Name
.startswith("sse4a.movnt.") || // Added in 3.9
344 Name
.startswith("avx.movnt.") || // Added in 3.2
345 Name
.startswith("avx512.storent.") || // Added in 3.9
346 Name
== "sse41.movntdqa" || // Added in 5.0
347 Name
== "avx2.movntdqa" || // Added in 5.0
348 Name
== "avx512.movntdqa" || // Added in 5.0
349 Name
== "sse2.storel.dq" || // Added in 3.9
350 Name
.startswith("sse.storeu.") || // Added in 3.9
351 Name
.startswith("sse2.storeu.") || // Added in 3.9
352 Name
.startswith("avx.storeu.") || // Added in 3.9
353 Name
.startswith("avx512.mask.storeu.") || // Added in 3.9
354 Name
.startswith("avx512.mask.store.p") || // Added in 3.9
355 Name
.startswith("avx512.mask.store.b.") || // Added in 3.9
356 Name
.startswith("avx512.mask.store.w.") || // Added in 3.9
357 Name
.startswith("avx512.mask.store.d.") || // Added in 3.9
358 Name
.startswith("avx512.mask.store.q.") || // Added in 3.9
359 Name
== "avx512.mask.store.ss" || // Added in 7.0
360 Name
.startswith("avx512.mask.loadu.") || // Added in 3.9
361 Name
.startswith("avx512.mask.load.") || // Added in 3.9
362 Name
.startswith("avx512.mask.expand.load.") || // Added in 7.0
363 Name
.startswith("avx512.mask.compress.store.") || // Added in 7.0
364 Name
.startswith("avx512.mask.expand.b") || // Added in 9.0
365 Name
.startswith("avx512.mask.expand.w") || // Added in 9.0
366 Name
.startswith("avx512.mask.expand.d") || // Added in 9.0
367 Name
.startswith("avx512.mask.expand.q") || // Added in 9.0
368 Name
.startswith("avx512.mask.expand.p") || // Added in 9.0
369 Name
.startswith("avx512.mask.compress.b") || // Added in 9.0
370 Name
.startswith("avx512.mask.compress.w") || // Added in 9.0
371 Name
.startswith("avx512.mask.compress.d") || // Added in 9.0
372 Name
.startswith("avx512.mask.compress.q") || // Added in 9.0
373 Name
.startswith("avx512.mask.compress.p") || // Added in 9.0
374 Name
== "sse42.crc32.64.8" || // Added in 3.4
375 Name
.startswith("avx.vbroadcast.s") || // Added in 3.5
376 Name
.startswith("avx512.vbroadcast.s") || // Added in 7.0
377 Name
.startswith("avx512.mask.palignr.") || // Added in 3.9
378 Name
.startswith("avx512.mask.valign.") || // Added in 4.0
379 Name
.startswith("sse2.psll.dq") || // Added in 3.7
380 Name
.startswith("sse2.psrl.dq") || // Added in 3.7
381 Name
.startswith("avx2.psll.dq") || // Added in 3.7
382 Name
.startswith("avx2.psrl.dq") || // Added in 3.7
383 Name
.startswith("avx512.psll.dq") || // Added in 3.9
384 Name
.startswith("avx512.psrl.dq") || // Added in 3.9
385 Name
== "sse41.pblendw" || // Added in 3.7
386 Name
.startswith("sse41.blendp") || // Added in 3.7
387 Name
.startswith("avx.blend.p") || // Added in 3.7
388 Name
== "avx2.pblendw" || // Added in 3.7
389 Name
.startswith("avx2.pblendd.") || // Added in 3.7
390 Name
.startswith("avx.vbroadcastf128") || // Added in 4.0
391 Name
== "avx2.vbroadcasti128" || // Added in 3.7
392 Name
.startswith("avx512.mask.broadcastf32x4.") || // Added in 6.0
393 Name
.startswith("avx512.mask.broadcastf64x2.") || // Added in 6.0
394 Name
.startswith("avx512.mask.broadcastf32x8.") || // Added in 6.0
395 Name
.startswith("avx512.mask.broadcastf64x4.") || // Added in 6.0
396 Name
.startswith("avx512.mask.broadcasti32x4.") || // Added in 6.0
397 Name
.startswith("avx512.mask.broadcasti64x2.") || // Added in 6.0
398 Name
.startswith("avx512.mask.broadcasti32x8.") || // Added in 6.0
399 Name
.startswith("avx512.mask.broadcasti64x4.") || // Added in 6.0
400 Name
== "xop.vpcmov" || // Added in 3.8
401 Name
== "xop.vpcmov.256" || // Added in 5.0
402 Name
.startswith("avx512.mask.move.s") || // Added in 4.0
403 Name
.startswith("avx512.cvtmask2") || // Added in 5.0
404 Name
.startswith("xop.vpcom") || // Added in 3.2, Updated in 9.0
405 Name
.startswith("xop.vprot") || // Added in 8.0
406 Name
.startswith("avx512.prol") || // Added in 8.0
407 Name
.startswith("avx512.pror") || // Added in 8.0
408 Name
.startswith("avx512.mask.prorv.") || // Added in 8.0
409 Name
.startswith("avx512.mask.pror.") || // Added in 8.0
410 Name
.startswith("avx512.mask.prolv.") || // Added in 8.0
411 Name
.startswith("avx512.mask.prol.") || // Added in 8.0
412 Name
.startswith("avx512.ptestm") || //Added in 6.0
413 Name
.startswith("avx512.ptestnm") || //Added in 6.0
414 Name
.startswith("avx512.mask.pavg")) // Added in 6.0
420 static bool UpgradeX86IntrinsicFunction(Function
*F
, StringRef Name
,
422 // Only handle intrinsics that start with "x86.".
423 if (!Name
.startswith("x86."))
425 // Remove "x86." prefix.
426 Name
= Name
.substr(4);
428 if (ShouldUpgradeX86Intrinsic(F
, Name
)) {
433 if (Name
== "rdtscp") { // Added in 8.0
434 // If this intrinsic has 0 operands, it's the new version.
435 if (F
->getFunctionType()->getNumParams() == 0)
439 NewFn
= Intrinsic::getDeclaration(F
->getParent(),
440 Intrinsic::x86_rdtscp
);
444 // SSE4.1 ptest functions may have an old signature.
445 if (Name
.startswith("sse41.ptest")) { // Added in 3.2
446 if (Name
.substr(11) == "c")
447 return UpgradePTESTIntrinsic(F
, Intrinsic::x86_sse41_ptestc
, NewFn
);
448 if (Name
.substr(11) == "z")
449 return UpgradePTESTIntrinsic(F
, Intrinsic::x86_sse41_ptestz
, NewFn
);
450 if (Name
.substr(11) == "nzc")
451 return UpgradePTESTIntrinsic(F
, Intrinsic::x86_sse41_ptestnzc
, NewFn
);
453 // Several blend and other instructions with masks used the wrong number of
455 if (Name
== "sse41.insertps") // Added in 3.6
456 return UpgradeX86IntrinsicsWith8BitMask(F
, Intrinsic::x86_sse41_insertps
,
458 if (Name
== "sse41.dppd") // Added in 3.6
459 return UpgradeX86IntrinsicsWith8BitMask(F
, Intrinsic::x86_sse41_dppd
,
461 if (Name
== "sse41.dpps") // Added in 3.6
462 return UpgradeX86IntrinsicsWith8BitMask(F
, Intrinsic::x86_sse41_dpps
,
464 if (Name
== "sse41.mpsadbw") // Added in 3.6
465 return UpgradeX86IntrinsicsWith8BitMask(F
, Intrinsic::x86_sse41_mpsadbw
,
467 if (Name
== "avx.dp.ps.256") // Added in 3.6
468 return UpgradeX86IntrinsicsWith8BitMask(F
, Intrinsic::x86_avx_dp_ps_256
,
470 if (Name
== "avx2.mpsadbw") // Added in 3.6
471 return UpgradeX86IntrinsicsWith8BitMask(F
, Intrinsic::x86_avx2_mpsadbw
,
473 if (Name
== "avx512.mask.cmp.pd.128") // Added in 7.0
474 return UpgradeX86MaskedFPCompare(F
, Intrinsic::x86_avx512_mask_cmp_pd_128
,
476 if (Name
== "avx512.mask.cmp.pd.256") // Added in 7.0
477 return UpgradeX86MaskedFPCompare(F
, Intrinsic::x86_avx512_mask_cmp_pd_256
,
479 if (Name
== "avx512.mask.cmp.pd.512") // Added in 7.0
480 return UpgradeX86MaskedFPCompare(F
, Intrinsic::x86_avx512_mask_cmp_pd_512
,
482 if (Name
== "avx512.mask.cmp.ps.128") // Added in 7.0
483 return UpgradeX86MaskedFPCompare(F
, Intrinsic::x86_avx512_mask_cmp_ps_128
,
485 if (Name
== "avx512.mask.cmp.ps.256") // Added in 7.0
486 return UpgradeX86MaskedFPCompare(F
, Intrinsic::x86_avx512_mask_cmp_ps_256
,
488 if (Name
== "avx512.mask.cmp.ps.512") // Added in 7.0
489 return UpgradeX86MaskedFPCompare(F
, Intrinsic::x86_avx512_mask_cmp_ps_512
,
492 // frcz.ss/sd may need to have an argument dropped. Added in 3.2
493 if (Name
.startswith("xop.vfrcz.ss") && F
->arg_size() == 2) {
495 NewFn
= Intrinsic::getDeclaration(F
->getParent(),
496 Intrinsic::x86_xop_vfrcz_ss
);
499 if (Name
.startswith("xop.vfrcz.sd") && F
->arg_size() == 2) {
501 NewFn
= Intrinsic::getDeclaration(F
->getParent(),
502 Intrinsic::x86_xop_vfrcz_sd
);
505 // Upgrade any XOP PERMIL2 index operand still using a float/double vector.
506 if (Name
.startswith("xop.vpermil2")) { // Added in 3.9
507 auto Idx
= F
->getFunctionType()->getParamType(2);
508 if (Idx
->isFPOrFPVectorTy()) {
510 unsigned IdxSize
= Idx
->getPrimitiveSizeInBits();
511 unsigned EltSize
= Idx
->getScalarSizeInBits();
512 Intrinsic::ID Permil2ID
;
513 if (EltSize
== 64 && IdxSize
== 128)
514 Permil2ID
= Intrinsic::x86_xop_vpermil2pd
;
515 else if (EltSize
== 32 && IdxSize
== 128)
516 Permil2ID
= Intrinsic::x86_xop_vpermil2ps
;
517 else if (EltSize
== 64 && IdxSize
== 256)
518 Permil2ID
= Intrinsic::x86_xop_vpermil2pd_256
;
520 Permil2ID
= Intrinsic::x86_xop_vpermil2ps_256
;
521 NewFn
= Intrinsic::getDeclaration(F
->getParent(), Permil2ID
);
526 if (Name
== "seh.recoverfp") {
527 NewFn
= Intrinsic::getDeclaration(F
->getParent(), Intrinsic::eh_recoverfp
);
534 static bool UpgradeIntrinsicFunction1(Function
*F
, Function
*&NewFn
) {
535 assert(F
&& "Illegal to upgrade a non-existent Function.");
537 // Quickly eliminate it, if it's not a candidate.
538 StringRef Name
= F
->getName();
539 if (Name
.size() <= 8 || !Name
.startswith("llvm."))
541 Name
= Name
.substr(5); // Strip off "llvm."
546 if (Name
.startswith("arm.rbit") || Name
.startswith("aarch64.rbit")) {
547 NewFn
= Intrinsic::getDeclaration(F
->getParent(), Intrinsic::bitreverse
,
548 F
->arg_begin()->getType());
551 if (Name
.startswith("aarch64.neon.frintn")) {
552 NewFn
= Intrinsic::getDeclaration(F
->getParent(), Intrinsic::roundeven
,
553 F
->arg_begin()->getType());
556 if (Name
.startswith("aarch64.neon.rbit")) {
557 NewFn
= Intrinsic::getDeclaration(F
->getParent(), Intrinsic::bitreverse
,
558 F
->arg_begin()->getType());
561 if (Name
.startswith("arm.neon.vclz")) {
563 F
->arg_begin()->getType(),
564 Type::getInt1Ty(F
->getContext())
566 // Can't use Intrinsic::getDeclaration here as it adds a ".i1" to
567 // the end of the name. Change name from llvm.arm.neon.vclz.* to
569 FunctionType
* fType
= FunctionType::get(F
->getReturnType(), args
, false);
570 NewFn
= Function::Create(fType
, F
->getLinkage(), F
->getAddressSpace(),
571 "llvm.ctlz." + Name
.substr(14), F
->getParent());
574 if (Name
.startswith("arm.neon.vcnt")) {
575 NewFn
= Intrinsic::getDeclaration(F
->getParent(), Intrinsic::ctpop
,
576 F
->arg_begin()->getType());
579 static const Regex
vldRegex("^arm\\.neon\\.vld([1234]|[234]lane)\\.v[a-z0-9]*$");
580 if (vldRegex
.match(Name
)) {
581 auto fArgs
= F
->getFunctionType()->params();
582 SmallVector
<Type
*, 4> Tys(fArgs
.begin(), fArgs
.end());
583 // Can't use Intrinsic::getDeclaration here as the return types might
584 // then only be structurally equal.
585 FunctionType
* fType
= FunctionType::get(F
->getReturnType(), Tys
, false);
586 NewFn
= Function::Create(fType
, F
->getLinkage(), F
->getAddressSpace(),
587 "llvm." + Name
+ ".p0i8", F
->getParent());
590 static const Regex
vstRegex("^arm\\.neon\\.vst([1234]|[234]lane)\\.v[a-z0-9]*$");
591 if (vstRegex
.match(Name
)) {
592 static const Intrinsic::ID StoreInts
[] = {Intrinsic::arm_neon_vst1
,
593 Intrinsic::arm_neon_vst2
,
594 Intrinsic::arm_neon_vst3
,
595 Intrinsic::arm_neon_vst4
};
597 static const Intrinsic::ID StoreLaneInts
[] = {
598 Intrinsic::arm_neon_vst2lane
, Intrinsic::arm_neon_vst3lane
,
599 Intrinsic::arm_neon_vst4lane
602 auto fArgs
= F
->getFunctionType()->params();
603 Type
*Tys
[] = {fArgs
[0], fArgs
[1]};
604 if (Name
.find("lane") == StringRef::npos
)
605 NewFn
= Intrinsic::getDeclaration(F
->getParent(),
606 StoreInts
[fArgs
.size() - 3], Tys
);
608 NewFn
= Intrinsic::getDeclaration(F
->getParent(),
609 StoreLaneInts
[fArgs
.size() - 5], Tys
);
612 if (Name
== "aarch64.thread.pointer" || Name
== "arm.thread.pointer") {
613 NewFn
= Intrinsic::getDeclaration(F
->getParent(), Intrinsic::thread_pointer
);
616 if (Name
.startswith("arm.neon.vqadds.")) {
617 NewFn
= Intrinsic::getDeclaration(F
->getParent(), Intrinsic::sadd_sat
,
618 F
->arg_begin()->getType());
621 if (Name
.startswith("arm.neon.vqaddu.")) {
622 NewFn
= Intrinsic::getDeclaration(F
->getParent(), Intrinsic::uadd_sat
,
623 F
->arg_begin()->getType());
626 if (Name
.startswith("arm.neon.vqsubs.")) {
627 NewFn
= Intrinsic::getDeclaration(F
->getParent(), Intrinsic::ssub_sat
,
628 F
->arg_begin()->getType());
631 if (Name
.startswith("arm.neon.vqsubu.")) {
632 NewFn
= Intrinsic::getDeclaration(F
->getParent(), Intrinsic::usub_sat
,
633 F
->arg_begin()->getType());
636 if (Name
.startswith("aarch64.neon.addp")) {
637 if (F
->arg_size() != 2)
638 break; // Invalid IR.
639 VectorType
*Ty
= dyn_cast
<VectorType
>(F
->getReturnType());
640 if (Ty
&& Ty
->getElementType()->isFloatingPointTy()) {
641 NewFn
= Intrinsic::getDeclaration(F
->getParent(),
642 Intrinsic::aarch64_neon_faddp
, Ty
);
647 // Changed in 12.0: bfdot accept v4bf16 and v8bf16 instead of v8i8 and v16i8
649 if ((Name
.startswith("arm.neon.bfdot.") ||
650 Name
.startswith("aarch64.neon.bfdot.")) &&
651 Name
.endswith("i8")) {
653 StringSwitch
<Intrinsic::ID
>(Name
)
654 .Cases("arm.neon.bfdot.v2f32.v8i8",
655 "arm.neon.bfdot.v4f32.v16i8",
656 Intrinsic::arm_neon_bfdot
)
657 .Cases("aarch64.neon.bfdot.v2f32.v8i8",
658 "aarch64.neon.bfdot.v4f32.v16i8",
659 Intrinsic::aarch64_neon_bfdot
)
660 .Default(Intrinsic::not_intrinsic
);
661 if (IID
== Intrinsic::not_intrinsic
)
664 size_t OperandWidth
= F
->getReturnType()->getPrimitiveSizeInBits();
665 assert((OperandWidth
== 64 || OperandWidth
== 128) &&
666 "Unexpected operand width");
667 LLVMContext
&Ctx
= F
->getParent()->getContext();
668 std::array
<Type
*, 2> Tys
{{
670 FixedVectorType::get(Type::getBFloatTy(Ctx
), OperandWidth
/ 16)
672 NewFn
= Intrinsic::getDeclaration(F
->getParent(), IID
, Tys
);
676 // Changed in 12.0: bfmmla, bfmlalb and bfmlalt are not polymorphic anymore
677 // and accept v8bf16 instead of v16i8
678 if ((Name
.startswith("arm.neon.bfm") ||
679 Name
.startswith("aarch64.neon.bfm")) &&
680 Name
.endswith(".v4f32.v16i8")) {
682 StringSwitch
<Intrinsic::ID
>(Name
)
683 .Case("arm.neon.bfmmla.v4f32.v16i8",
684 Intrinsic::arm_neon_bfmmla
)
685 .Case("arm.neon.bfmlalb.v4f32.v16i8",
686 Intrinsic::arm_neon_bfmlalb
)
687 .Case("arm.neon.bfmlalt.v4f32.v16i8",
688 Intrinsic::arm_neon_bfmlalt
)
689 .Case("aarch64.neon.bfmmla.v4f32.v16i8",
690 Intrinsic::aarch64_neon_bfmmla
)
691 .Case("aarch64.neon.bfmlalb.v4f32.v16i8",
692 Intrinsic::aarch64_neon_bfmlalb
)
693 .Case("aarch64.neon.bfmlalt.v4f32.v16i8",
694 Intrinsic::aarch64_neon_bfmlalt
)
695 .Default(Intrinsic::not_intrinsic
);
696 if (IID
== Intrinsic::not_intrinsic
)
699 std::array
<Type
*, 0> Tys
;
700 NewFn
= Intrinsic::getDeclaration(F
->getParent(), IID
, Tys
);
707 if (Name
.startswith("ctlz.") && F
->arg_size() == 1) {
709 NewFn
= Intrinsic::getDeclaration(F
->getParent(), Intrinsic::ctlz
,
710 F
->arg_begin()->getType());
713 if (Name
.startswith("cttz.") && F
->arg_size() == 1) {
715 NewFn
= Intrinsic::getDeclaration(F
->getParent(), Intrinsic::cttz
,
716 F
->arg_begin()->getType());
722 if (Name
== "dbg.value" && F
->arg_size() == 4) {
724 NewFn
= Intrinsic::getDeclaration(F
->getParent(), Intrinsic::dbg_value
);
730 SmallVector
<StringRef
, 2> Groups
;
731 static const Regex
R("^experimental.vector.reduce.([a-z]+)\\.[a-z][0-9]+");
732 if (R
.match(Name
, &Groups
)) {
734 ID
= StringSwitch
<Intrinsic::ID
>(Groups
[1])
735 .Case("add", Intrinsic::vector_reduce_add
)
736 .Case("mul", Intrinsic::vector_reduce_mul
)
737 .Case("and", Intrinsic::vector_reduce_and
)
738 .Case("or", Intrinsic::vector_reduce_or
)
739 .Case("xor", Intrinsic::vector_reduce_xor
)
740 .Case("smax", Intrinsic::vector_reduce_smax
)
741 .Case("smin", Intrinsic::vector_reduce_smin
)
742 .Case("umax", Intrinsic::vector_reduce_umax
)
743 .Case("umin", Intrinsic::vector_reduce_umin
)
744 .Case("fmax", Intrinsic::vector_reduce_fmax
)
745 .Case("fmin", Intrinsic::vector_reduce_fmin
)
746 .Default(Intrinsic::not_intrinsic
);
747 if (ID
!= Intrinsic::not_intrinsic
) {
749 auto Args
= F
->getFunctionType()->params();
750 NewFn
= Intrinsic::getDeclaration(F
->getParent(), ID
, {Args
[0]});
754 static const Regex
R2(
755 "^experimental.vector.reduce.v2.([a-z]+)\\.[fi][0-9]+");
757 if (R2
.match(Name
, &Groups
)) {
758 Intrinsic::ID ID
= Intrinsic::not_intrinsic
;
759 if (Groups
[1] == "fadd")
760 ID
= Intrinsic::vector_reduce_fadd
;
761 if (Groups
[1] == "fmul")
762 ID
= Intrinsic::vector_reduce_fmul
;
763 if (ID
!= Intrinsic::not_intrinsic
) {
765 auto Args
= F
->getFunctionType()->params();
766 Type
*Tys
[] = {Args
[1]};
767 NewFn
= Intrinsic::getDeclaration(F
->getParent(), ID
, Tys
);
775 bool IsLifetimeStart
= Name
.startswith("lifetime.start");
776 if (IsLifetimeStart
|| Name
.startswith("invariant.start")) {
777 Intrinsic::ID ID
= IsLifetimeStart
?
778 Intrinsic::lifetime_start
: Intrinsic::invariant_start
;
779 auto Args
= F
->getFunctionType()->params();
780 Type
* ObjectPtr
[1] = {Args
[1]};
781 if (F
->getName() != Intrinsic::getName(ID
, ObjectPtr
, F
->getParent())) {
783 NewFn
= Intrinsic::getDeclaration(F
->getParent(), ID
, ObjectPtr
);
788 bool IsLifetimeEnd
= Name
.startswith("lifetime.end");
789 if (IsLifetimeEnd
|| Name
.startswith("invariant.end")) {
790 Intrinsic::ID ID
= IsLifetimeEnd
?
791 Intrinsic::lifetime_end
: Intrinsic::invariant_end
;
793 auto Args
= F
->getFunctionType()->params();
794 Type
* ObjectPtr
[1] = {Args
[IsLifetimeEnd
? 1 : 2]};
795 if (F
->getName() != Intrinsic::getName(ID
, ObjectPtr
, F
->getParent())) {
797 NewFn
= Intrinsic::getDeclaration(F
->getParent(), ID
, ObjectPtr
);
801 if (Name
.startswith("invariant.group.barrier")) {
802 // Rename invariant.group.barrier to launder.invariant.group
803 auto Args
= F
->getFunctionType()->params();
804 Type
* ObjectPtr
[1] = {Args
[0]};
806 NewFn
= Intrinsic::getDeclaration(F
->getParent(),
807 Intrinsic::launder_invariant_group
, ObjectPtr
);
815 if (Name
.startswith("masked.load.")) {
816 Type
*Tys
[] = { F
->getReturnType(), F
->arg_begin()->getType() };
818 Intrinsic::getName(Intrinsic::masked_load
, Tys
, F
->getParent())) {
820 NewFn
= Intrinsic::getDeclaration(F
->getParent(),
821 Intrinsic::masked_load
,
826 if (Name
.startswith("masked.store.")) {
827 auto Args
= F
->getFunctionType()->params();
828 Type
*Tys
[] = { Args
[0], Args
[1] };
830 Intrinsic::getName(Intrinsic::masked_store
, Tys
, F
->getParent())) {
832 NewFn
= Intrinsic::getDeclaration(F
->getParent(),
833 Intrinsic::masked_store
,
838 // Renaming gather/scatter intrinsics with no address space overloading
839 // to the new overload which includes an address space
840 if (Name
.startswith("masked.gather.")) {
841 Type
*Tys
[] = {F
->getReturnType(), F
->arg_begin()->getType()};
843 Intrinsic::getName(Intrinsic::masked_gather
, Tys
, F
->getParent())) {
845 NewFn
= Intrinsic::getDeclaration(F
->getParent(),
846 Intrinsic::masked_gather
, Tys
);
850 if (Name
.startswith("masked.scatter.")) {
851 auto Args
= F
->getFunctionType()->params();
852 Type
*Tys
[] = {Args
[0], Args
[1]};
854 Intrinsic::getName(Intrinsic::masked_scatter
, Tys
, F
->getParent())) {
856 NewFn
= Intrinsic::getDeclaration(F
->getParent(),
857 Intrinsic::masked_scatter
, Tys
);
861 // Updating the memory intrinsics (memcpy/memmove/memset) that have an
862 // alignment parameter to embedding the alignment as an attribute of
864 if (Name
.startswith("memcpy.") && F
->arg_size() == 5) {
866 // Get the types of dest, src, and len
867 ArrayRef
<Type
*> ParamTypes
= F
->getFunctionType()->params().slice(0, 3);
868 NewFn
= Intrinsic::getDeclaration(F
->getParent(), Intrinsic::memcpy
,
872 if (Name
.startswith("memmove.") && F
->arg_size() == 5) {
874 // Get the types of dest, src, and len
875 ArrayRef
<Type
*> ParamTypes
= F
->getFunctionType()->params().slice(0, 3);
876 NewFn
= Intrinsic::getDeclaration(F
->getParent(), Intrinsic::memmove
,
880 if (Name
.startswith("memset.") && F
->arg_size() == 5) {
882 // Get the types of dest, and len
883 const auto *FT
= F
->getFunctionType();
884 Type
*ParamTypes
[2] = {
885 FT
->getParamType(0), // Dest
886 FT
->getParamType(2) // len
888 NewFn
= Intrinsic::getDeclaration(F
->getParent(), Intrinsic::memset
,
895 if (Name
.startswith("nvvm.")) {
896 Name
= Name
.substr(5);
898 // The following nvvm intrinsics correspond exactly to an LLVM intrinsic.
899 Intrinsic::ID IID
= StringSwitch
<Intrinsic::ID
>(Name
)
900 .Cases("brev32", "brev64", Intrinsic::bitreverse
)
901 .Case("clz.i", Intrinsic::ctlz
)
902 .Case("popc.i", Intrinsic::ctpop
)
903 .Default(Intrinsic::not_intrinsic
);
904 if (IID
!= Intrinsic::not_intrinsic
&& F
->arg_size() == 1) {
905 NewFn
= Intrinsic::getDeclaration(F
->getParent(), IID
,
906 {F
->getReturnType()});
910 // The following nvvm intrinsics correspond exactly to an LLVM idiom, but
911 // not to an intrinsic alone. We expand them in UpgradeIntrinsicCall.
913 // TODO: We could add lohi.i2d.
914 bool Expand
= StringSwitch
<bool>(Name
)
915 .Cases("abs.i", "abs.ll", true)
916 .Cases("clz.ll", "popc.ll", "h2f", true)
917 .Cases("max.i", "max.ll", "max.ui", "max.ull", true)
918 .Cases("min.i", "min.ll", "min.ui", "min.ull", true)
919 .StartsWith("atomic.load.add.f32.p", true)
920 .StartsWith("atomic.load.add.f64.p", true)
930 // We only need to change the name to match the mangling including the
932 if (Name
.startswith("objectsize.")) {
933 Type
*Tys
[2] = { F
->getReturnType(), F
->arg_begin()->getType() };
934 if (F
->arg_size() == 2 || F
->arg_size() == 3 ||
936 Intrinsic::getName(Intrinsic::objectsize
, Tys
, F
->getParent())) {
938 NewFn
= Intrinsic::getDeclaration(F
->getParent(), Intrinsic::objectsize
,
946 if (Name
== "prefetch") {
947 // Handle address space overloading.
948 Type
*Tys
[] = {F
->arg_begin()->getType()};
950 Intrinsic::getName(Intrinsic::prefetch
, Tys
, F
->getParent())) {
953 Intrinsic::getDeclaration(F
->getParent(), Intrinsic::prefetch
, Tys
);
956 } else if (Name
.startswith("ptr.annotation.") && F
->arg_size() == 4) {
958 NewFn
= Intrinsic::getDeclaration(F
->getParent(),
959 Intrinsic::ptr_annotation
,
960 F
->arg_begin()->getType());
966 if (Name
== "stackprotectorcheck") {
973 if (Name
== "var.annotation" && F
->arg_size() == 4) {
975 NewFn
= Intrinsic::getDeclaration(F
->getParent(),
976 Intrinsic::var_annotation
);
983 if (UpgradeX86IntrinsicFunction(F
, Name
, NewFn
))
986 // Remangle our intrinsic since we upgrade the mangling
987 auto Result
= llvm::Intrinsic::remangleIntrinsicFunction(F
);
988 if (Result
!= None
) {
989 NewFn
= Result
.getValue();
993 // This may not belong here. This function is effectively being overloaded
994 // to both detect an intrinsic which needs upgrading, and to provide the
995 // upgraded form of the intrinsic. We should perhaps have two separate
996 // functions for this.
1000 bool llvm::UpgradeIntrinsicFunction(Function
*F
, Function
*&NewFn
) {
1002 bool Upgraded
= UpgradeIntrinsicFunction1(F
, NewFn
);
1003 assert(F
!= NewFn
&& "Intrinsic function upgraded to the same function");
1005 // Upgrade intrinsic attributes. This does not change the function.
1008 if (Intrinsic::ID id
= F
->getIntrinsicID())
1009 F
->setAttributes(Intrinsic::getAttributes(F
->getContext(), id
));
1013 GlobalVariable
*llvm::UpgradeGlobalVariable(GlobalVariable
*GV
) {
1014 if (!(GV
->hasName() && (GV
->getName() == "llvm.global_ctors" ||
1015 GV
->getName() == "llvm.global_dtors")) ||
1016 !GV
->hasInitializer())
1018 ArrayType
*ATy
= dyn_cast
<ArrayType
>(GV
->getValueType());
1021 StructType
*STy
= dyn_cast
<StructType
>(ATy
->getElementType());
1022 if (!STy
|| STy
->getNumElements() != 2)
1025 LLVMContext
&C
= GV
->getContext();
1027 auto EltTy
= StructType::get(STy
->getElementType(0), STy
->getElementType(1),
1028 IRB
.getInt8PtrTy());
1029 Constant
*Init
= GV
->getInitializer();
1030 unsigned N
= Init
->getNumOperands();
1031 std::vector
<Constant
*> NewCtors(N
);
1032 for (unsigned i
= 0; i
!= N
; ++i
) {
1033 auto Ctor
= cast
<Constant
>(Init
->getOperand(i
));
1034 NewCtors
[i
] = ConstantStruct::get(
1035 EltTy
, Ctor
->getAggregateElement(0u), Ctor
->getAggregateElement(1),
1036 Constant::getNullValue(IRB
.getInt8PtrTy()));
1038 Constant
*NewInit
= ConstantArray::get(ArrayType::get(EltTy
, N
), NewCtors
);
1040 return new GlobalVariable(NewInit
->getType(), false, GV
->getLinkage(),
1041 NewInit
, GV
->getName());
1044 // Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them
1045 // to byte shuffles.
1046 static Value
*UpgradeX86PSLLDQIntrinsics(IRBuilder
<> &Builder
,
1047 Value
*Op
, unsigned Shift
) {
1048 auto *ResultTy
= cast
<FixedVectorType
>(Op
->getType());
1049 unsigned NumElts
= ResultTy
->getNumElements() * 8;
1051 // Bitcast from a 64-bit element type to a byte element type.
1052 Type
*VecTy
= FixedVectorType::get(Builder
.getInt8Ty(), NumElts
);
1053 Op
= Builder
.CreateBitCast(Op
, VecTy
, "cast");
1055 // We'll be shuffling in zeroes.
1056 Value
*Res
= Constant::getNullValue(VecTy
);
1058 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1059 // we'll just return the zero vector.
1062 // 256/512-bit version is split into 2/4 16-byte lanes.
1063 for (unsigned l
= 0; l
!= NumElts
; l
+= 16)
1064 for (unsigned i
= 0; i
!= 16; ++i
) {
1065 unsigned Idx
= NumElts
+ i
- Shift
;
1067 Idx
-= NumElts
- 16; // end of lane, switch operand.
1068 Idxs
[l
+ i
] = Idx
+ l
;
1071 Res
= Builder
.CreateShuffleVector(Res
, Op
, makeArrayRef(Idxs
, NumElts
));
1074 // Bitcast back to a 64-bit element type.
1075 return Builder
.CreateBitCast(Res
, ResultTy
, "cast");
1078 // Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them
1079 // to byte shuffles.
1080 static Value
*UpgradeX86PSRLDQIntrinsics(IRBuilder
<> &Builder
, Value
*Op
,
1082 auto *ResultTy
= cast
<FixedVectorType
>(Op
->getType());
1083 unsigned NumElts
= ResultTy
->getNumElements() * 8;
1085 // Bitcast from a 64-bit element type to a byte element type.
1086 Type
*VecTy
= FixedVectorType::get(Builder
.getInt8Ty(), NumElts
);
1087 Op
= Builder
.CreateBitCast(Op
, VecTy
, "cast");
1089 // We'll be shuffling in zeroes.
1090 Value
*Res
= Constant::getNullValue(VecTy
);
1092 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1093 // we'll just return the zero vector.
1096 // 256/512-bit version is split into 2/4 16-byte lanes.
1097 for (unsigned l
= 0; l
!= NumElts
; l
+= 16)
1098 for (unsigned i
= 0; i
!= 16; ++i
) {
1099 unsigned Idx
= i
+ Shift
;
1101 Idx
+= NumElts
- 16; // end of lane, switch operand.
1102 Idxs
[l
+ i
] = Idx
+ l
;
1105 Res
= Builder
.CreateShuffleVector(Op
, Res
, makeArrayRef(Idxs
, NumElts
));
1108 // Bitcast back to a 64-bit element type.
1109 return Builder
.CreateBitCast(Res
, ResultTy
, "cast");
1112 static Value
*getX86MaskVec(IRBuilder
<> &Builder
, Value
*Mask
,
1114 assert(isPowerOf2_32(NumElts
) && "Expected power-of-2 mask elements");
1115 llvm::VectorType
*MaskTy
= FixedVectorType::get(
1116 Builder
.getInt1Ty(), cast
<IntegerType
>(Mask
->getType())->getBitWidth());
1117 Mask
= Builder
.CreateBitCast(Mask
, MaskTy
);
1119 // If we have less than 8 elements (1, 2 or 4), then the starting mask was an
1120 // i8 and we need to extract down to the right number of elements.
1123 for (unsigned i
= 0; i
!= NumElts
; ++i
)
1125 Mask
= Builder
.CreateShuffleVector(
1126 Mask
, Mask
, makeArrayRef(Indices
, NumElts
), "extract");
1132 static Value
*EmitX86Select(IRBuilder
<> &Builder
, Value
*Mask
,
1133 Value
*Op0
, Value
*Op1
) {
1134 // If the mask is all ones just emit the first operation.
1135 if (const auto *C
= dyn_cast
<Constant
>(Mask
))
1136 if (C
->isAllOnesValue())
1139 Mask
= getX86MaskVec(Builder
, Mask
,
1140 cast
<FixedVectorType
>(Op0
->getType())->getNumElements());
1141 return Builder
.CreateSelect(Mask
, Op0
, Op1
);
1144 static Value
*EmitX86ScalarSelect(IRBuilder
<> &Builder
, Value
*Mask
,
1145 Value
*Op0
, Value
*Op1
) {
1146 // If the mask is all ones just emit the first operation.
1147 if (const auto *C
= dyn_cast
<Constant
>(Mask
))
1148 if (C
->isAllOnesValue())
1151 auto *MaskTy
= FixedVectorType::get(Builder
.getInt1Ty(),
1152 Mask
->getType()->getIntegerBitWidth());
1153 Mask
= Builder
.CreateBitCast(Mask
, MaskTy
);
1154 Mask
= Builder
.CreateExtractElement(Mask
, (uint64_t)0);
1155 return Builder
.CreateSelect(Mask
, Op0
, Op1
);
1158 // Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics.
1159 // PALIGNR handles large immediates by shifting while VALIGN masks the immediate
1160 // so we need to handle both cases. VALIGN also doesn't have 128-bit lanes.
1161 static Value
*UpgradeX86ALIGNIntrinsics(IRBuilder
<> &Builder
, Value
*Op0
,
1162 Value
*Op1
, Value
*Shift
,
1163 Value
*Passthru
, Value
*Mask
,
1165 unsigned ShiftVal
= cast
<llvm::ConstantInt
>(Shift
)->getZExtValue();
1167 unsigned NumElts
= cast
<FixedVectorType
>(Op0
->getType())->getNumElements();
1168 assert((IsVALIGN
|| NumElts
% 16 == 0) && "Illegal NumElts for PALIGNR!");
1169 assert((!IsVALIGN
|| NumElts
<= 16) && "NumElts too large for VALIGN!");
1170 assert(isPowerOf2_32(NumElts
) && "NumElts not a power of 2!");
1172 // Mask the immediate for VALIGN.
1174 ShiftVal
&= (NumElts
- 1);
1176 // If palignr is shifting the pair of vectors more than the size of two
1177 // lanes, emit zero.
1179 return llvm::Constant::getNullValue(Op0
->getType());
1181 // If palignr is shifting the pair of input vectors more than one lane,
1182 // but less than two lanes, convert to shifting in zeroes.
1183 if (ShiftVal
> 16) {
1186 Op0
= llvm::Constant::getNullValue(Op0
->getType());
1190 // 256-bit palignr operates on 128-bit lanes so we need to handle that
1191 for (unsigned l
= 0; l
< NumElts
; l
+= 16) {
1192 for (unsigned i
= 0; i
!= 16; ++i
) {
1193 unsigned Idx
= ShiftVal
+ i
;
1194 if (!IsVALIGN
&& Idx
>= 16) // Disable wrap for VALIGN.
1195 Idx
+= NumElts
- 16; // End of lane, switch operand.
1196 Indices
[l
+ i
] = Idx
+ l
;
1200 Value
*Align
= Builder
.CreateShuffleVector(Op1
, Op0
,
1201 makeArrayRef(Indices
, NumElts
),
1204 return EmitX86Select(Builder
, Mask
, Align
, Passthru
);
1207 static Value
*UpgradeX86VPERMT2Intrinsics(IRBuilder
<> &Builder
, CallInst
&CI
,
1208 bool ZeroMask
, bool IndexForm
) {
1209 Type
*Ty
= CI
.getType();
1210 unsigned VecWidth
= Ty
->getPrimitiveSizeInBits();
1211 unsigned EltWidth
= Ty
->getScalarSizeInBits();
1212 bool IsFloat
= Ty
->isFPOrFPVectorTy();
1214 if (VecWidth
== 128 && EltWidth
== 32 && IsFloat
)
1215 IID
= Intrinsic::x86_avx512_vpermi2var_ps_128
;
1216 else if (VecWidth
== 128 && EltWidth
== 32 && !IsFloat
)
1217 IID
= Intrinsic::x86_avx512_vpermi2var_d_128
;
1218 else if (VecWidth
== 128 && EltWidth
== 64 && IsFloat
)
1219 IID
= Intrinsic::x86_avx512_vpermi2var_pd_128
;
1220 else if (VecWidth
== 128 && EltWidth
== 64 && !IsFloat
)
1221 IID
= Intrinsic::x86_avx512_vpermi2var_q_128
;
1222 else if (VecWidth
== 256 && EltWidth
== 32 && IsFloat
)
1223 IID
= Intrinsic::x86_avx512_vpermi2var_ps_256
;
1224 else if (VecWidth
== 256 && EltWidth
== 32 && !IsFloat
)
1225 IID
= Intrinsic::x86_avx512_vpermi2var_d_256
;
1226 else if (VecWidth
== 256 && EltWidth
== 64 && IsFloat
)
1227 IID
= Intrinsic::x86_avx512_vpermi2var_pd_256
;
1228 else if (VecWidth
== 256 && EltWidth
== 64 && !IsFloat
)
1229 IID
= Intrinsic::x86_avx512_vpermi2var_q_256
;
1230 else if (VecWidth
== 512 && EltWidth
== 32 && IsFloat
)
1231 IID
= Intrinsic::x86_avx512_vpermi2var_ps_512
;
1232 else if (VecWidth
== 512 && EltWidth
== 32 && !IsFloat
)
1233 IID
= Intrinsic::x86_avx512_vpermi2var_d_512
;
1234 else if (VecWidth
== 512 && EltWidth
== 64 && IsFloat
)
1235 IID
= Intrinsic::x86_avx512_vpermi2var_pd_512
;
1236 else if (VecWidth
== 512 && EltWidth
== 64 && !IsFloat
)
1237 IID
= Intrinsic::x86_avx512_vpermi2var_q_512
;
1238 else if (VecWidth
== 128 && EltWidth
== 16)
1239 IID
= Intrinsic::x86_avx512_vpermi2var_hi_128
;
1240 else if (VecWidth
== 256 && EltWidth
== 16)
1241 IID
= Intrinsic::x86_avx512_vpermi2var_hi_256
;
1242 else if (VecWidth
== 512 && EltWidth
== 16)
1243 IID
= Intrinsic::x86_avx512_vpermi2var_hi_512
;
1244 else if (VecWidth
== 128 && EltWidth
== 8)
1245 IID
= Intrinsic::x86_avx512_vpermi2var_qi_128
;
1246 else if (VecWidth
== 256 && EltWidth
== 8)
1247 IID
= Intrinsic::x86_avx512_vpermi2var_qi_256
;
1248 else if (VecWidth
== 512 && EltWidth
== 8)
1249 IID
= Intrinsic::x86_avx512_vpermi2var_qi_512
;
1251 llvm_unreachable("Unexpected intrinsic");
1253 Value
*Args
[] = { CI
.getArgOperand(0) , CI
.getArgOperand(1),
1254 CI
.getArgOperand(2) };
1256 // If this isn't index form we need to swap operand 0 and 1.
1258 std::swap(Args
[0], Args
[1]);
1260 Value
*V
= Builder
.CreateCall(Intrinsic::getDeclaration(CI
.getModule(), IID
),
1262 Value
*PassThru
= ZeroMask
? ConstantAggregateZero::get(Ty
)
1263 : Builder
.CreateBitCast(CI
.getArgOperand(1),
1265 return EmitX86Select(Builder
, CI
.getArgOperand(3), V
, PassThru
);
1268 static Value
*UpgradeX86BinaryIntrinsics(IRBuilder
<> &Builder
, CallInst
&CI
,
1269 Intrinsic::ID IID
) {
1270 Type
*Ty
= CI
.getType();
1271 Value
*Op0
= CI
.getOperand(0);
1272 Value
*Op1
= CI
.getOperand(1);
1273 Function
*Intrin
= Intrinsic::getDeclaration(CI
.getModule(), IID
, Ty
);
1274 Value
*Res
= Builder
.CreateCall(Intrin
, {Op0
, Op1
});
1276 if (CI
.getNumArgOperands() == 4) { // For masked intrinsics.
1277 Value
*VecSrc
= CI
.getOperand(2);
1278 Value
*Mask
= CI
.getOperand(3);
1279 Res
= EmitX86Select(Builder
, Mask
, Res
, VecSrc
);
1284 static Value
*upgradeX86Rotate(IRBuilder
<> &Builder
, CallInst
&CI
,
1285 bool IsRotateRight
) {
1286 Type
*Ty
= CI
.getType();
1287 Value
*Src
= CI
.getArgOperand(0);
1288 Value
*Amt
= CI
.getArgOperand(1);
1290 // Amount may be scalar immediate, in which case create a splat vector.
1291 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
1292 // we only care about the lowest log2 bits anyway.
1293 if (Amt
->getType() != Ty
) {
1294 unsigned NumElts
= cast
<FixedVectorType
>(Ty
)->getNumElements();
1295 Amt
= Builder
.CreateIntCast(Amt
, Ty
->getScalarType(), false);
1296 Amt
= Builder
.CreateVectorSplat(NumElts
, Amt
);
1299 Intrinsic::ID IID
= IsRotateRight
? Intrinsic::fshr
: Intrinsic::fshl
;
1300 Function
*Intrin
= Intrinsic::getDeclaration(CI
.getModule(), IID
, Ty
);
1301 Value
*Res
= Builder
.CreateCall(Intrin
, {Src
, Src
, Amt
});
1303 if (CI
.getNumArgOperands() == 4) { // For masked intrinsics.
1304 Value
*VecSrc
= CI
.getOperand(2);
1305 Value
*Mask
= CI
.getOperand(3);
1306 Res
= EmitX86Select(Builder
, Mask
, Res
, VecSrc
);
1311 static Value
*upgradeX86vpcom(IRBuilder
<> &Builder
, CallInst
&CI
, unsigned Imm
,
1313 Type
*Ty
= CI
.getType();
1314 Value
*LHS
= CI
.getArgOperand(0);
1315 Value
*RHS
= CI
.getArgOperand(1);
1317 CmpInst::Predicate Pred
;
1320 Pred
= IsSigned
? ICmpInst::ICMP_SLT
: ICmpInst::ICMP_ULT
;
1323 Pred
= IsSigned
? ICmpInst::ICMP_SLE
: ICmpInst::ICMP_ULE
;
1326 Pred
= IsSigned
? ICmpInst::ICMP_SGT
: ICmpInst::ICMP_UGT
;
1329 Pred
= IsSigned
? ICmpInst::ICMP_SGE
: ICmpInst::ICMP_UGE
;
1332 Pred
= ICmpInst::ICMP_EQ
;
1335 Pred
= ICmpInst::ICMP_NE
;
1338 return Constant::getNullValue(Ty
); // FALSE
1340 return Constant::getAllOnesValue(Ty
); // TRUE
1342 llvm_unreachable("Unknown XOP vpcom/vpcomu predicate");
1345 Value
*Cmp
= Builder
.CreateICmp(Pred
, LHS
, RHS
);
1346 Value
*Ext
= Builder
.CreateSExt(Cmp
, Ty
);
1350 static Value
*upgradeX86ConcatShift(IRBuilder
<> &Builder
, CallInst
&CI
,
1351 bool IsShiftRight
, bool ZeroMask
) {
1352 Type
*Ty
= CI
.getType();
1353 Value
*Op0
= CI
.getArgOperand(0);
1354 Value
*Op1
= CI
.getArgOperand(1);
1355 Value
*Amt
= CI
.getArgOperand(2);
1358 std::swap(Op0
, Op1
);
1360 // Amount may be scalar immediate, in which case create a splat vector.
1361 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
1362 // we only care about the lowest log2 bits anyway.
1363 if (Amt
->getType() != Ty
) {
1364 unsigned NumElts
= cast
<FixedVectorType
>(Ty
)->getNumElements();
1365 Amt
= Builder
.CreateIntCast(Amt
, Ty
->getScalarType(), false);
1366 Amt
= Builder
.CreateVectorSplat(NumElts
, Amt
);
1369 Intrinsic::ID IID
= IsShiftRight
? Intrinsic::fshr
: Intrinsic::fshl
;
1370 Function
*Intrin
= Intrinsic::getDeclaration(CI
.getModule(), IID
, Ty
);
1371 Value
*Res
= Builder
.CreateCall(Intrin
, {Op0
, Op1
, Amt
});
1373 unsigned NumArgs
= CI
.getNumArgOperands();
1374 if (NumArgs
>= 4) { // For masked intrinsics.
1375 Value
*VecSrc
= NumArgs
== 5 ? CI
.getArgOperand(3) :
1376 ZeroMask
? ConstantAggregateZero::get(CI
.getType()) :
1377 CI
.getArgOperand(0);
1378 Value
*Mask
= CI
.getOperand(NumArgs
- 1);
1379 Res
= EmitX86Select(Builder
, Mask
, Res
, VecSrc
);
1384 static Value
*UpgradeMaskedStore(IRBuilder
<> &Builder
,
1385 Value
*Ptr
, Value
*Data
, Value
*Mask
,
1387 // Cast the pointer to the right type.
1388 Ptr
= Builder
.CreateBitCast(Ptr
,
1389 llvm::PointerType::getUnqual(Data
->getType()));
1390 const Align Alignment
=
1392 ? Align(Data
->getType()->getPrimitiveSizeInBits().getFixedSize() / 8)
1395 // If the mask is all ones just emit a regular store.
1396 if (const auto *C
= dyn_cast
<Constant
>(Mask
))
1397 if (C
->isAllOnesValue())
1398 return Builder
.CreateAlignedStore(Data
, Ptr
, Alignment
);
1400 // Convert the mask from an integer type to a vector of i1.
1401 unsigned NumElts
= cast
<FixedVectorType
>(Data
->getType())->getNumElements();
1402 Mask
= getX86MaskVec(Builder
, Mask
, NumElts
);
1403 return Builder
.CreateMaskedStore(Data
, Ptr
, Alignment
, Mask
);
1406 static Value
*UpgradeMaskedLoad(IRBuilder
<> &Builder
,
1407 Value
*Ptr
, Value
*Passthru
, Value
*Mask
,
1409 Type
*ValTy
= Passthru
->getType();
1410 // Cast the pointer to the right type.
1411 Ptr
= Builder
.CreateBitCast(Ptr
, llvm::PointerType::getUnqual(ValTy
));
1412 const Align Alignment
=
1414 ? Align(Passthru
->getType()->getPrimitiveSizeInBits().getFixedSize() /
1418 // If the mask is all ones just emit a regular store.
1419 if (const auto *C
= dyn_cast
<Constant
>(Mask
))
1420 if (C
->isAllOnesValue())
1421 return Builder
.CreateAlignedLoad(ValTy
, Ptr
, Alignment
);
1423 // Convert the mask from an integer type to a vector of i1.
1424 unsigned NumElts
= cast
<FixedVectorType
>(ValTy
)->getNumElements();
1425 Mask
= getX86MaskVec(Builder
, Mask
, NumElts
);
1426 return Builder
.CreateMaskedLoad(ValTy
, Ptr
, Alignment
, Mask
, Passthru
);
1429 static Value
*upgradeAbs(IRBuilder
<> &Builder
, CallInst
&CI
) {
1430 Type
*Ty
= CI
.getType();
1431 Value
*Op0
= CI
.getArgOperand(0);
1432 Function
*F
= Intrinsic::getDeclaration(CI
.getModule(), Intrinsic::abs
, Ty
);
1433 Value
*Res
= Builder
.CreateCall(F
, {Op0
, Builder
.getInt1(false)});
1434 if (CI
.getNumArgOperands() == 3)
1435 Res
= EmitX86Select(Builder
, CI
.getArgOperand(2), Res
, CI
.getArgOperand(1));
1439 static Value
*upgradePMULDQ(IRBuilder
<> &Builder
, CallInst
&CI
, bool IsSigned
) {
1440 Type
*Ty
= CI
.getType();
1442 // Arguments have a vXi32 type so cast to vXi64.
1443 Value
*LHS
= Builder
.CreateBitCast(CI
.getArgOperand(0), Ty
);
1444 Value
*RHS
= Builder
.CreateBitCast(CI
.getArgOperand(1), Ty
);
1447 // Shift left then arithmetic shift right.
1448 Constant
*ShiftAmt
= ConstantInt::get(Ty
, 32);
1449 LHS
= Builder
.CreateShl(LHS
, ShiftAmt
);
1450 LHS
= Builder
.CreateAShr(LHS
, ShiftAmt
);
1451 RHS
= Builder
.CreateShl(RHS
, ShiftAmt
);
1452 RHS
= Builder
.CreateAShr(RHS
, ShiftAmt
);
1454 // Clear the upper bits.
1455 Constant
*Mask
= ConstantInt::get(Ty
, 0xffffffff);
1456 LHS
= Builder
.CreateAnd(LHS
, Mask
);
1457 RHS
= Builder
.CreateAnd(RHS
, Mask
);
1460 Value
*Res
= Builder
.CreateMul(LHS
, RHS
);
1462 if (CI
.getNumArgOperands() == 4)
1463 Res
= EmitX86Select(Builder
, CI
.getArgOperand(3), Res
, CI
.getArgOperand(2));
1468 // Applying mask on vector of i1's and make sure result is at least 8 bits wide.
1469 static Value
*ApplyX86MaskOn1BitsVec(IRBuilder
<> &Builder
, Value
*Vec
,
1471 unsigned NumElts
= cast
<FixedVectorType
>(Vec
->getType())->getNumElements();
1473 const auto *C
= dyn_cast
<Constant
>(Mask
);
1474 if (!C
|| !C
->isAllOnesValue())
1475 Vec
= Builder
.CreateAnd(Vec
, getX86MaskVec(Builder
, Mask
, NumElts
));
1480 for (unsigned i
= 0; i
!= NumElts
; ++i
)
1482 for (unsigned i
= NumElts
; i
!= 8; ++i
)
1483 Indices
[i
] = NumElts
+ i
% NumElts
;
1484 Vec
= Builder
.CreateShuffleVector(Vec
,
1485 Constant::getNullValue(Vec
->getType()),
1488 return Builder
.CreateBitCast(Vec
, Builder
.getIntNTy(std::max(NumElts
, 8U)));
1491 static Value
*upgradeMaskedCompare(IRBuilder
<> &Builder
, CallInst
&CI
,
1492 unsigned CC
, bool Signed
) {
1493 Value
*Op0
= CI
.getArgOperand(0);
1494 unsigned NumElts
= cast
<FixedVectorType
>(Op0
->getType())->getNumElements();
1498 Cmp
= Constant::getNullValue(
1499 FixedVectorType::get(Builder
.getInt1Ty(), NumElts
));
1500 } else if (CC
== 7) {
1501 Cmp
= Constant::getAllOnesValue(
1502 FixedVectorType::get(Builder
.getInt1Ty(), NumElts
));
1504 ICmpInst::Predicate Pred
;
1506 default: llvm_unreachable("Unknown condition code");
1507 case 0: Pred
= ICmpInst::ICMP_EQ
; break;
1508 case 1: Pred
= Signed
? ICmpInst::ICMP_SLT
: ICmpInst::ICMP_ULT
; break;
1509 case 2: Pred
= Signed
? ICmpInst::ICMP_SLE
: ICmpInst::ICMP_ULE
; break;
1510 case 4: Pred
= ICmpInst::ICMP_NE
; break;
1511 case 5: Pred
= Signed
? ICmpInst::ICMP_SGE
: ICmpInst::ICMP_UGE
; break;
1512 case 6: Pred
= Signed
? ICmpInst::ICMP_SGT
: ICmpInst::ICMP_UGT
; break;
1514 Cmp
= Builder
.CreateICmp(Pred
, Op0
, CI
.getArgOperand(1));
1517 Value
*Mask
= CI
.getArgOperand(CI
.getNumArgOperands() - 1);
1519 return ApplyX86MaskOn1BitsVec(Builder
, Cmp
, Mask
);
1522 // Replace a masked intrinsic with an older unmasked intrinsic.
1523 static Value
*UpgradeX86MaskedShift(IRBuilder
<> &Builder
, CallInst
&CI
,
1524 Intrinsic::ID IID
) {
1525 Function
*Intrin
= Intrinsic::getDeclaration(CI
.getModule(), IID
);
1526 Value
*Rep
= Builder
.CreateCall(Intrin
,
1527 { CI
.getArgOperand(0), CI
.getArgOperand(1) });
1528 return EmitX86Select(Builder
, CI
.getArgOperand(3), Rep
, CI
.getArgOperand(2));
1531 static Value
* upgradeMaskedMove(IRBuilder
<> &Builder
, CallInst
&CI
) {
1532 Value
* A
= CI
.getArgOperand(0);
1533 Value
* B
= CI
.getArgOperand(1);
1534 Value
* Src
= CI
.getArgOperand(2);
1535 Value
* Mask
= CI
.getArgOperand(3);
1537 Value
* AndNode
= Builder
.CreateAnd(Mask
, APInt(8, 1));
1538 Value
* Cmp
= Builder
.CreateIsNotNull(AndNode
);
1539 Value
* Extract1
= Builder
.CreateExtractElement(B
, (uint64_t)0);
1540 Value
* Extract2
= Builder
.CreateExtractElement(Src
, (uint64_t)0);
1541 Value
* Select
= Builder
.CreateSelect(Cmp
, Extract1
, Extract2
);
1542 return Builder
.CreateInsertElement(A
, Select
, (uint64_t)0);
1546 static Value
* UpgradeMaskToInt(IRBuilder
<> &Builder
, CallInst
&CI
) {
1547 Value
* Op
= CI
.getArgOperand(0);
1548 Type
* ReturnOp
= CI
.getType();
1549 unsigned NumElts
= cast
<FixedVectorType
>(CI
.getType())->getNumElements();
1550 Value
*Mask
= getX86MaskVec(Builder
, Op
, NumElts
);
1551 return Builder
.CreateSExt(Mask
, ReturnOp
, "vpmovm2");
1554 // Replace intrinsic with unmasked version and a select.
1555 static bool upgradeAVX512MaskToSelect(StringRef Name
, IRBuilder
<> &Builder
,
1556 CallInst
&CI
, Value
*&Rep
) {
1557 Name
= Name
.substr(12); // Remove avx512.mask.
1559 unsigned VecWidth
= CI
.getType()->getPrimitiveSizeInBits();
1560 unsigned EltWidth
= CI
.getType()->getScalarSizeInBits();
1562 if (Name
.startswith("max.p")) {
1563 if (VecWidth
== 128 && EltWidth
== 32)
1564 IID
= Intrinsic::x86_sse_max_ps
;
1565 else if (VecWidth
== 128 && EltWidth
== 64)
1566 IID
= Intrinsic::x86_sse2_max_pd
;
1567 else if (VecWidth
== 256 && EltWidth
== 32)
1568 IID
= Intrinsic::x86_avx_max_ps_256
;
1569 else if (VecWidth
== 256 && EltWidth
== 64)
1570 IID
= Intrinsic::x86_avx_max_pd_256
;
1572 llvm_unreachable("Unexpected intrinsic");
1573 } else if (Name
.startswith("min.p")) {
1574 if (VecWidth
== 128 && EltWidth
== 32)
1575 IID
= Intrinsic::x86_sse_min_ps
;
1576 else if (VecWidth
== 128 && EltWidth
== 64)
1577 IID
= Intrinsic::x86_sse2_min_pd
;
1578 else if (VecWidth
== 256 && EltWidth
== 32)
1579 IID
= Intrinsic::x86_avx_min_ps_256
;
1580 else if (VecWidth
== 256 && EltWidth
== 64)
1581 IID
= Intrinsic::x86_avx_min_pd_256
;
1583 llvm_unreachable("Unexpected intrinsic");
1584 } else if (Name
.startswith("pshuf.b.")) {
1585 if (VecWidth
== 128)
1586 IID
= Intrinsic::x86_ssse3_pshuf_b_128
;
1587 else if (VecWidth
== 256)
1588 IID
= Intrinsic::x86_avx2_pshuf_b
;
1589 else if (VecWidth
== 512)
1590 IID
= Intrinsic::x86_avx512_pshuf_b_512
;
1592 llvm_unreachable("Unexpected intrinsic");
1593 } else if (Name
.startswith("pmul.hr.sw.")) {
1594 if (VecWidth
== 128)
1595 IID
= Intrinsic::x86_ssse3_pmul_hr_sw_128
;
1596 else if (VecWidth
== 256)
1597 IID
= Intrinsic::x86_avx2_pmul_hr_sw
;
1598 else if (VecWidth
== 512)
1599 IID
= Intrinsic::x86_avx512_pmul_hr_sw_512
;
1601 llvm_unreachable("Unexpected intrinsic");
1602 } else if (Name
.startswith("pmulh.w.")) {
1603 if (VecWidth
== 128)
1604 IID
= Intrinsic::x86_sse2_pmulh_w
;
1605 else if (VecWidth
== 256)
1606 IID
= Intrinsic::x86_avx2_pmulh_w
;
1607 else if (VecWidth
== 512)
1608 IID
= Intrinsic::x86_avx512_pmulh_w_512
;
1610 llvm_unreachable("Unexpected intrinsic");
1611 } else if (Name
.startswith("pmulhu.w.")) {
1612 if (VecWidth
== 128)
1613 IID
= Intrinsic::x86_sse2_pmulhu_w
;
1614 else if (VecWidth
== 256)
1615 IID
= Intrinsic::x86_avx2_pmulhu_w
;
1616 else if (VecWidth
== 512)
1617 IID
= Intrinsic::x86_avx512_pmulhu_w_512
;
1619 llvm_unreachable("Unexpected intrinsic");
1620 } else if (Name
.startswith("pmaddw.d.")) {
1621 if (VecWidth
== 128)
1622 IID
= Intrinsic::x86_sse2_pmadd_wd
;
1623 else if (VecWidth
== 256)
1624 IID
= Intrinsic::x86_avx2_pmadd_wd
;
1625 else if (VecWidth
== 512)
1626 IID
= Intrinsic::x86_avx512_pmaddw_d_512
;
1628 llvm_unreachable("Unexpected intrinsic");
1629 } else if (Name
.startswith("pmaddubs.w.")) {
1630 if (VecWidth
== 128)
1631 IID
= Intrinsic::x86_ssse3_pmadd_ub_sw_128
;
1632 else if (VecWidth
== 256)
1633 IID
= Intrinsic::x86_avx2_pmadd_ub_sw
;
1634 else if (VecWidth
== 512)
1635 IID
= Intrinsic::x86_avx512_pmaddubs_w_512
;
1637 llvm_unreachable("Unexpected intrinsic");
1638 } else if (Name
.startswith("packsswb.")) {
1639 if (VecWidth
== 128)
1640 IID
= Intrinsic::x86_sse2_packsswb_128
;
1641 else if (VecWidth
== 256)
1642 IID
= Intrinsic::x86_avx2_packsswb
;
1643 else if (VecWidth
== 512)
1644 IID
= Intrinsic::x86_avx512_packsswb_512
;
1646 llvm_unreachable("Unexpected intrinsic");
1647 } else if (Name
.startswith("packssdw.")) {
1648 if (VecWidth
== 128)
1649 IID
= Intrinsic::x86_sse2_packssdw_128
;
1650 else if (VecWidth
== 256)
1651 IID
= Intrinsic::x86_avx2_packssdw
;
1652 else if (VecWidth
== 512)
1653 IID
= Intrinsic::x86_avx512_packssdw_512
;
1655 llvm_unreachable("Unexpected intrinsic");
1656 } else if (Name
.startswith("packuswb.")) {
1657 if (VecWidth
== 128)
1658 IID
= Intrinsic::x86_sse2_packuswb_128
;
1659 else if (VecWidth
== 256)
1660 IID
= Intrinsic::x86_avx2_packuswb
;
1661 else if (VecWidth
== 512)
1662 IID
= Intrinsic::x86_avx512_packuswb_512
;
1664 llvm_unreachable("Unexpected intrinsic");
1665 } else if (Name
.startswith("packusdw.")) {
1666 if (VecWidth
== 128)
1667 IID
= Intrinsic::x86_sse41_packusdw
;
1668 else if (VecWidth
== 256)
1669 IID
= Intrinsic::x86_avx2_packusdw
;
1670 else if (VecWidth
== 512)
1671 IID
= Intrinsic::x86_avx512_packusdw_512
;
1673 llvm_unreachable("Unexpected intrinsic");
1674 } else if (Name
.startswith("vpermilvar.")) {
1675 if (VecWidth
== 128 && EltWidth
== 32)
1676 IID
= Intrinsic::x86_avx_vpermilvar_ps
;
1677 else if (VecWidth
== 128 && EltWidth
== 64)
1678 IID
= Intrinsic::x86_avx_vpermilvar_pd
;
1679 else if (VecWidth
== 256 && EltWidth
== 32)
1680 IID
= Intrinsic::x86_avx_vpermilvar_ps_256
;
1681 else if (VecWidth
== 256 && EltWidth
== 64)
1682 IID
= Intrinsic::x86_avx_vpermilvar_pd_256
;
1683 else if (VecWidth
== 512 && EltWidth
== 32)
1684 IID
= Intrinsic::x86_avx512_vpermilvar_ps_512
;
1685 else if (VecWidth
== 512 && EltWidth
== 64)
1686 IID
= Intrinsic::x86_avx512_vpermilvar_pd_512
;
1688 llvm_unreachable("Unexpected intrinsic");
1689 } else if (Name
== "cvtpd2dq.256") {
1690 IID
= Intrinsic::x86_avx_cvt_pd2dq_256
;
1691 } else if (Name
== "cvtpd2ps.256") {
1692 IID
= Intrinsic::x86_avx_cvt_pd2_ps_256
;
1693 } else if (Name
== "cvttpd2dq.256") {
1694 IID
= Intrinsic::x86_avx_cvtt_pd2dq_256
;
1695 } else if (Name
== "cvttps2dq.128") {
1696 IID
= Intrinsic::x86_sse2_cvttps2dq
;
1697 } else if (Name
== "cvttps2dq.256") {
1698 IID
= Intrinsic::x86_avx_cvtt_ps2dq_256
;
1699 } else if (Name
.startswith("permvar.")) {
1700 bool IsFloat
= CI
.getType()->isFPOrFPVectorTy();
1701 if (VecWidth
== 256 && EltWidth
== 32 && IsFloat
)
1702 IID
= Intrinsic::x86_avx2_permps
;
1703 else if (VecWidth
== 256 && EltWidth
== 32 && !IsFloat
)
1704 IID
= Intrinsic::x86_avx2_permd
;
1705 else if (VecWidth
== 256 && EltWidth
== 64 && IsFloat
)
1706 IID
= Intrinsic::x86_avx512_permvar_df_256
;
1707 else if (VecWidth
== 256 && EltWidth
== 64 && !IsFloat
)
1708 IID
= Intrinsic::x86_avx512_permvar_di_256
;
1709 else if (VecWidth
== 512 && EltWidth
== 32 && IsFloat
)
1710 IID
= Intrinsic::x86_avx512_permvar_sf_512
;
1711 else if (VecWidth
== 512 && EltWidth
== 32 && !IsFloat
)
1712 IID
= Intrinsic::x86_avx512_permvar_si_512
;
1713 else if (VecWidth
== 512 && EltWidth
== 64 && IsFloat
)
1714 IID
= Intrinsic::x86_avx512_permvar_df_512
;
1715 else if (VecWidth
== 512 && EltWidth
== 64 && !IsFloat
)
1716 IID
= Intrinsic::x86_avx512_permvar_di_512
;
1717 else if (VecWidth
== 128 && EltWidth
== 16)
1718 IID
= Intrinsic::x86_avx512_permvar_hi_128
;
1719 else if (VecWidth
== 256 && EltWidth
== 16)
1720 IID
= Intrinsic::x86_avx512_permvar_hi_256
;
1721 else if (VecWidth
== 512 && EltWidth
== 16)
1722 IID
= Intrinsic::x86_avx512_permvar_hi_512
;
1723 else if (VecWidth
== 128 && EltWidth
== 8)
1724 IID
= Intrinsic::x86_avx512_permvar_qi_128
;
1725 else if (VecWidth
== 256 && EltWidth
== 8)
1726 IID
= Intrinsic::x86_avx512_permvar_qi_256
;
1727 else if (VecWidth
== 512 && EltWidth
== 8)
1728 IID
= Intrinsic::x86_avx512_permvar_qi_512
;
1730 llvm_unreachable("Unexpected intrinsic");
1731 } else if (Name
.startswith("dbpsadbw.")) {
1732 if (VecWidth
== 128)
1733 IID
= Intrinsic::x86_avx512_dbpsadbw_128
;
1734 else if (VecWidth
== 256)
1735 IID
= Intrinsic::x86_avx512_dbpsadbw_256
;
1736 else if (VecWidth
== 512)
1737 IID
= Intrinsic::x86_avx512_dbpsadbw_512
;
1739 llvm_unreachable("Unexpected intrinsic");
1740 } else if (Name
.startswith("pmultishift.qb.")) {
1741 if (VecWidth
== 128)
1742 IID
= Intrinsic::x86_avx512_pmultishift_qb_128
;
1743 else if (VecWidth
== 256)
1744 IID
= Intrinsic::x86_avx512_pmultishift_qb_256
;
1745 else if (VecWidth
== 512)
1746 IID
= Intrinsic::x86_avx512_pmultishift_qb_512
;
1748 llvm_unreachable("Unexpected intrinsic");
1749 } else if (Name
.startswith("conflict.")) {
1750 if (Name
[9] == 'd' && VecWidth
== 128)
1751 IID
= Intrinsic::x86_avx512_conflict_d_128
;
1752 else if (Name
[9] == 'd' && VecWidth
== 256)
1753 IID
= Intrinsic::x86_avx512_conflict_d_256
;
1754 else if (Name
[9] == 'd' && VecWidth
== 512)
1755 IID
= Intrinsic::x86_avx512_conflict_d_512
;
1756 else if (Name
[9] == 'q' && VecWidth
== 128)
1757 IID
= Intrinsic::x86_avx512_conflict_q_128
;
1758 else if (Name
[9] == 'q' && VecWidth
== 256)
1759 IID
= Intrinsic::x86_avx512_conflict_q_256
;
1760 else if (Name
[9] == 'q' && VecWidth
== 512)
1761 IID
= Intrinsic::x86_avx512_conflict_q_512
;
1763 llvm_unreachable("Unexpected intrinsic");
1764 } else if (Name
.startswith("pavg.")) {
1765 if (Name
[5] == 'b' && VecWidth
== 128)
1766 IID
= Intrinsic::x86_sse2_pavg_b
;
1767 else if (Name
[5] == 'b' && VecWidth
== 256)
1768 IID
= Intrinsic::x86_avx2_pavg_b
;
1769 else if (Name
[5] == 'b' && VecWidth
== 512)
1770 IID
= Intrinsic::x86_avx512_pavg_b_512
;
1771 else if (Name
[5] == 'w' && VecWidth
== 128)
1772 IID
= Intrinsic::x86_sse2_pavg_w
;
1773 else if (Name
[5] == 'w' && VecWidth
== 256)
1774 IID
= Intrinsic::x86_avx2_pavg_w
;
1775 else if (Name
[5] == 'w' && VecWidth
== 512)
1776 IID
= Intrinsic::x86_avx512_pavg_w_512
;
1778 llvm_unreachable("Unexpected intrinsic");
1782 SmallVector
<Value
*, 4> Args(CI
.arg_operands().begin(),
1783 CI
.arg_operands().end());
1786 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(CI
.getModule(), IID
),
1788 unsigned NumArgs
= CI
.getNumArgOperands();
1789 Rep
= EmitX86Select(Builder
, CI
.getArgOperand(NumArgs
- 1), Rep
,
1790 CI
.getArgOperand(NumArgs
- 2));
1794 /// Upgrade comment in call to inline asm that represents an objc retain release
1796 void llvm::UpgradeInlineAsmString(std::string
*AsmStr
) {
1798 if (AsmStr
->find("mov\tfp") == 0 &&
1799 AsmStr
->find("objc_retainAutoreleaseReturnValue") != std::string::npos
&&
1800 (Pos
= AsmStr
->find("# marker")) != std::string::npos
) {
1801 AsmStr
->replace(Pos
, 1, ";");
1805 /// Upgrade a call to an old intrinsic. All argument and return casting must be
1806 /// provided to seamlessly integrate with existing context.
1807 void llvm::UpgradeIntrinsicCall(CallInst
*CI
, Function
*NewFn
) {
1808 Function
*F
= CI
->getCalledFunction();
1809 LLVMContext
&C
= CI
->getContext();
1810 IRBuilder
<> Builder(C
);
1811 Builder
.SetInsertPoint(CI
->getParent(), CI
->getIterator());
1813 assert(F
&& "Intrinsic call is not direct?");
1816 // Get the Function's name.
1817 StringRef Name
= F
->getName();
1819 assert(Name
.startswith("llvm.") && "Intrinsic doesn't start with 'llvm.'");
1820 Name
= Name
.substr(5);
1822 bool IsX86
= Name
.startswith("x86.");
1824 Name
= Name
.substr(4);
1825 bool IsNVVM
= Name
.startswith("nvvm.");
1827 Name
= Name
.substr(5);
1829 if (IsX86
&& Name
.startswith("sse4a.movnt.")) {
1830 Module
*M
= F
->getParent();
1831 SmallVector
<Metadata
*, 1> Elts
;
1833 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C
), 1)));
1834 MDNode
*Node
= MDNode::get(C
, Elts
);
1836 Value
*Arg0
= CI
->getArgOperand(0);
1837 Value
*Arg1
= CI
->getArgOperand(1);
1839 // Nontemporal (unaligned) store of the 0'th element of the float/double
1841 Type
*SrcEltTy
= cast
<VectorType
>(Arg1
->getType())->getElementType();
1842 PointerType
*EltPtrTy
= PointerType::getUnqual(SrcEltTy
);
1843 Value
*Addr
= Builder
.CreateBitCast(Arg0
, EltPtrTy
, "cast");
1845 Builder
.CreateExtractElement(Arg1
, (uint64_t)0, "extractelement");
1847 StoreInst
*SI
= Builder
.CreateAlignedStore(Extract
, Addr
, Align(1));
1848 SI
->setMetadata(M
->getMDKindID("nontemporal"), Node
);
1850 // Remove intrinsic.
1851 CI
->eraseFromParent();
1855 if (IsX86
&& (Name
.startswith("avx.movnt.") ||
1856 Name
.startswith("avx512.storent."))) {
1857 Module
*M
= F
->getParent();
1858 SmallVector
<Metadata
*, 1> Elts
;
1860 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C
), 1)));
1861 MDNode
*Node
= MDNode::get(C
, Elts
);
1863 Value
*Arg0
= CI
->getArgOperand(0);
1864 Value
*Arg1
= CI
->getArgOperand(1);
1866 // Convert the type of the pointer to a pointer to the stored type.
1867 Value
*BC
= Builder
.CreateBitCast(Arg0
,
1868 PointerType::getUnqual(Arg1
->getType()),
1870 StoreInst
*SI
= Builder
.CreateAlignedStore(
1872 Align(Arg1
->getType()->getPrimitiveSizeInBits().getFixedSize() / 8));
1873 SI
->setMetadata(M
->getMDKindID("nontemporal"), Node
);
1875 // Remove intrinsic.
1876 CI
->eraseFromParent();
1880 if (IsX86
&& Name
== "sse2.storel.dq") {
1881 Value
*Arg0
= CI
->getArgOperand(0);
1882 Value
*Arg1
= CI
->getArgOperand(1);
1884 auto *NewVecTy
= FixedVectorType::get(Type::getInt64Ty(C
), 2);
1885 Value
*BC0
= Builder
.CreateBitCast(Arg1
, NewVecTy
, "cast");
1886 Value
*Elt
= Builder
.CreateExtractElement(BC0
, (uint64_t)0);
1887 Value
*BC
= Builder
.CreateBitCast(Arg0
,
1888 PointerType::getUnqual(Elt
->getType()),
1890 Builder
.CreateAlignedStore(Elt
, BC
, Align(1));
1892 // Remove intrinsic.
1893 CI
->eraseFromParent();
1897 if (IsX86
&& (Name
.startswith("sse.storeu.") ||
1898 Name
.startswith("sse2.storeu.") ||
1899 Name
.startswith("avx.storeu."))) {
1900 Value
*Arg0
= CI
->getArgOperand(0);
1901 Value
*Arg1
= CI
->getArgOperand(1);
1903 Arg0
= Builder
.CreateBitCast(Arg0
,
1904 PointerType::getUnqual(Arg1
->getType()),
1906 Builder
.CreateAlignedStore(Arg1
, Arg0
, Align(1));
1908 // Remove intrinsic.
1909 CI
->eraseFromParent();
1913 if (IsX86
&& Name
== "avx512.mask.store.ss") {
1914 Value
*Mask
= Builder
.CreateAnd(CI
->getArgOperand(2), Builder
.getInt8(1));
1915 UpgradeMaskedStore(Builder
, CI
->getArgOperand(0), CI
->getArgOperand(1),
1918 // Remove intrinsic.
1919 CI
->eraseFromParent();
1923 if (IsX86
&& (Name
.startswith("avx512.mask.store"))) {
1924 // "avx512.mask.storeu." or "avx512.mask.store."
1925 bool Aligned
= Name
[17] != 'u'; // "avx512.mask.storeu".
1926 UpgradeMaskedStore(Builder
, CI
->getArgOperand(0), CI
->getArgOperand(1),
1927 CI
->getArgOperand(2), Aligned
);
1929 // Remove intrinsic.
1930 CI
->eraseFromParent();
1935 // Upgrade packed integer vector compare intrinsics to compare instructions.
1936 if (IsX86
&& (Name
.startswith("sse2.pcmp") ||
1937 Name
.startswith("avx2.pcmp"))) {
1938 // "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt."
1939 bool CmpEq
= Name
[9] == 'e';
1940 Rep
= Builder
.CreateICmp(CmpEq
? ICmpInst::ICMP_EQ
: ICmpInst::ICMP_SGT
,
1941 CI
->getArgOperand(0), CI
->getArgOperand(1));
1942 Rep
= Builder
.CreateSExt(Rep
, CI
->getType(), "");
1943 } else if (IsX86
&& (Name
.startswith("avx512.broadcastm"))) {
1944 Type
*ExtTy
= Type::getInt32Ty(C
);
1945 if (CI
->getOperand(0)->getType()->isIntegerTy(8))
1946 ExtTy
= Type::getInt64Ty(C
);
1947 unsigned NumElts
= CI
->getType()->getPrimitiveSizeInBits() /
1948 ExtTy
->getPrimitiveSizeInBits();
1949 Rep
= Builder
.CreateZExt(CI
->getArgOperand(0), ExtTy
);
1950 Rep
= Builder
.CreateVectorSplat(NumElts
, Rep
);
1951 } else if (IsX86
&& (Name
== "sse.sqrt.ss" ||
1952 Name
== "sse2.sqrt.sd")) {
1953 Value
*Vec
= CI
->getArgOperand(0);
1954 Value
*Elt0
= Builder
.CreateExtractElement(Vec
, (uint64_t)0);
1955 Function
*Intr
= Intrinsic::getDeclaration(F
->getParent(),
1956 Intrinsic::sqrt
, Elt0
->getType());
1957 Elt0
= Builder
.CreateCall(Intr
, Elt0
);
1958 Rep
= Builder
.CreateInsertElement(Vec
, Elt0
, (uint64_t)0);
1959 } else if (IsX86
&& (Name
.startswith("avx.sqrt.p") ||
1960 Name
.startswith("sse2.sqrt.p") ||
1961 Name
.startswith("sse.sqrt.p"))) {
1962 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(F
->getParent(),
1965 {CI
->getArgOperand(0)});
1966 } else if (IsX86
&& (Name
.startswith("avx512.mask.sqrt.p"))) {
1967 if (CI
->getNumArgOperands() == 4 &&
1968 (!isa
<ConstantInt
>(CI
->getArgOperand(3)) ||
1969 cast
<ConstantInt
>(CI
->getArgOperand(3))->getZExtValue() != 4)) {
1970 Intrinsic::ID IID
= Name
[18] == 's' ? Intrinsic::x86_avx512_sqrt_ps_512
1971 : Intrinsic::x86_avx512_sqrt_pd_512
;
1973 Value
*Args
[] = { CI
->getArgOperand(0), CI
->getArgOperand(3) };
1974 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(CI
->getModule(),
1977 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(F
->getParent(),
1980 {CI
->getArgOperand(0)});
1982 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(2), Rep
,
1983 CI
->getArgOperand(1));
1984 } else if (IsX86
&& (Name
.startswith("avx512.ptestm") ||
1985 Name
.startswith("avx512.ptestnm"))) {
1986 Value
*Op0
= CI
->getArgOperand(0);
1987 Value
*Op1
= CI
->getArgOperand(1);
1988 Value
*Mask
= CI
->getArgOperand(2);
1989 Rep
= Builder
.CreateAnd(Op0
, Op1
);
1990 llvm::Type
*Ty
= Op0
->getType();
1991 Value
*Zero
= llvm::Constant::getNullValue(Ty
);
1992 ICmpInst::Predicate Pred
=
1993 Name
.startswith("avx512.ptestm") ? ICmpInst::ICMP_NE
: ICmpInst::ICMP_EQ
;
1994 Rep
= Builder
.CreateICmp(Pred
, Rep
, Zero
);
1995 Rep
= ApplyX86MaskOn1BitsVec(Builder
, Rep
, Mask
);
1996 } else if (IsX86
&& (Name
.startswith("avx512.mask.pbroadcast"))){
1997 unsigned NumElts
= cast
<FixedVectorType
>(CI
->getArgOperand(1)->getType())
1999 Rep
= Builder
.CreateVectorSplat(NumElts
, CI
->getArgOperand(0));
2000 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(2), Rep
,
2001 CI
->getArgOperand(1));
2002 } else if (IsX86
&& (Name
.startswith("avx512.kunpck"))) {
2003 unsigned NumElts
= CI
->getType()->getScalarSizeInBits();
2004 Value
*LHS
= getX86MaskVec(Builder
, CI
->getArgOperand(0), NumElts
);
2005 Value
*RHS
= getX86MaskVec(Builder
, CI
->getArgOperand(1), NumElts
);
2007 for (unsigned i
= 0; i
!= NumElts
; ++i
)
2010 // First extract half of each vector. This gives better codegen than
2011 // doing it in a single shuffle.
2012 LHS
= Builder
.CreateShuffleVector(LHS
, LHS
,
2013 makeArrayRef(Indices
, NumElts
/ 2));
2014 RHS
= Builder
.CreateShuffleVector(RHS
, RHS
,
2015 makeArrayRef(Indices
, NumElts
/ 2));
2016 // Concat the vectors.
2017 // NOTE: Operands have to be swapped to match intrinsic definition.
2018 Rep
= Builder
.CreateShuffleVector(RHS
, LHS
,
2019 makeArrayRef(Indices
, NumElts
));
2020 Rep
= Builder
.CreateBitCast(Rep
, CI
->getType());
2021 } else if (IsX86
&& Name
== "avx512.kand.w") {
2022 Value
*LHS
= getX86MaskVec(Builder
, CI
->getArgOperand(0), 16);
2023 Value
*RHS
= getX86MaskVec(Builder
, CI
->getArgOperand(1), 16);
2024 Rep
= Builder
.CreateAnd(LHS
, RHS
);
2025 Rep
= Builder
.CreateBitCast(Rep
, CI
->getType());
2026 } else if (IsX86
&& Name
== "avx512.kandn.w") {
2027 Value
*LHS
= getX86MaskVec(Builder
, CI
->getArgOperand(0), 16);
2028 Value
*RHS
= getX86MaskVec(Builder
, CI
->getArgOperand(1), 16);
2029 LHS
= Builder
.CreateNot(LHS
);
2030 Rep
= Builder
.CreateAnd(LHS
, RHS
);
2031 Rep
= Builder
.CreateBitCast(Rep
, CI
->getType());
2032 } else if (IsX86
&& Name
== "avx512.kor.w") {
2033 Value
*LHS
= getX86MaskVec(Builder
, CI
->getArgOperand(0), 16);
2034 Value
*RHS
= getX86MaskVec(Builder
, CI
->getArgOperand(1), 16);
2035 Rep
= Builder
.CreateOr(LHS
, RHS
);
2036 Rep
= Builder
.CreateBitCast(Rep
, CI
->getType());
2037 } else if (IsX86
&& Name
== "avx512.kxor.w") {
2038 Value
*LHS
= getX86MaskVec(Builder
, CI
->getArgOperand(0), 16);
2039 Value
*RHS
= getX86MaskVec(Builder
, CI
->getArgOperand(1), 16);
2040 Rep
= Builder
.CreateXor(LHS
, RHS
);
2041 Rep
= Builder
.CreateBitCast(Rep
, CI
->getType());
2042 } else if (IsX86
&& Name
== "avx512.kxnor.w") {
2043 Value
*LHS
= getX86MaskVec(Builder
, CI
->getArgOperand(0), 16);
2044 Value
*RHS
= getX86MaskVec(Builder
, CI
->getArgOperand(1), 16);
2045 LHS
= Builder
.CreateNot(LHS
);
2046 Rep
= Builder
.CreateXor(LHS
, RHS
);
2047 Rep
= Builder
.CreateBitCast(Rep
, CI
->getType());
2048 } else if (IsX86
&& Name
== "avx512.knot.w") {
2049 Rep
= getX86MaskVec(Builder
, CI
->getArgOperand(0), 16);
2050 Rep
= Builder
.CreateNot(Rep
);
2051 Rep
= Builder
.CreateBitCast(Rep
, CI
->getType());
2053 (Name
== "avx512.kortestz.w" || Name
== "avx512.kortestc.w")) {
2054 Value
*LHS
= getX86MaskVec(Builder
, CI
->getArgOperand(0), 16);
2055 Value
*RHS
= getX86MaskVec(Builder
, CI
->getArgOperand(1), 16);
2056 Rep
= Builder
.CreateOr(LHS
, RHS
);
2057 Rep
= Builder
.CreateBitCast(Rep
, Builder
.getInt16Ty());
2059 if (Name
[14] == 'c')
2060 C
= ConstantInt::getAllOnesValue(Builder
.getInt16Ty());
2062 C
= ConstantInt::getNullValue(Builder
.getInt16Ty());
2063 Rep
= Builder
.CreateICmpEQ(Rep
, C
);
2064 Rep
= Builder
.CreateZExt(Rep
, Builder
.getInt32Ty());
2065 } else if (IsX86
&& (Name
== "sse.add.ss" || Name
== "sse2.add.sd" ||
2066 Name
== "sse.sub.ss" || Name
== "sse2.sub.sd" ||
2067 Name
== "sse.mul.ss" || Name
== "sse2.mul.sd" ||
2068 Name
== "sse.div.ss" || Name
== "sse2.div.sd")) {
2069 Type
*I32Ty
= Type::getInt32Ty(C
);
2070 Value
*Elt0
= Builder
.CreateExtractElement(CI
->getArgOperand(0),
2071 ConstantInt::get(I32Ty
, 0));
2072 Value
*Elt1
= Builder
.CreateExtractElement(CI
->getArgOperand(1),
2073 ConstantInt::get(I32Ty
, 0));
2075 if (Name
.contains(".add."))
2076 EltOp
= Builder
.CreateFAdd(Elt0
, Elt1
);
2077 else if (Name
.contains(".sub."))
2078 EltOp
= Builder
.CreateFSub(Elt0
, Elt1
);
2079 else if (Name
.contains(".mul."))
2080 EltOp
= Builder
.CreateFMul(Elt0
, Elt1
);
2082 EltOp
= Builder
.CreateFDiv(Elt0
, Elt1
);
2083 Rep
= Builder
.CreateInsertElement(CI
->getArgOperand(0), EltOp
,
2084 ConstantInt::get(I32Ty
, 0));
2085 } else if (IsX86
&& Name
.startswith("avx512.mask.pcmp")) {
2086 // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt."
2087 bool CmpEq
= Name
[16] == 'e';
2088 Rep
= upgradeMaskedCompare(Builder
, *CI
, CmpEq
? 0 : 6, true);
2089 } else if (IsX86
&& Name
.startswith("avx512.mask.vpshufbitqmb.")) {
2090 Type
*OpTy
= CI
->getArgOperand(0)->getType();
2091 unsigned VecWidth
= OpTy
->getPrimitiveSizeInBits();
2094 default: llvm_unreachable("Unexpected intrinsic");
2095 case 128: IID
= Intrinsic::x86_avx512_vpshufbitqmb_128
; break;
2096 case 256: IID
= Intrinsic::x86_avx512_vpshufbitqmb_256
; break;
2097 case 512: IID
= Intrinsic::x86_avx512_vpshufbitqmb_512
; break;
2100 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(F
->getParent(), IID
),
2101 { CI
->getOperand(0), CI
->getArgOperand(1) });
2102 Rep
= ApplyX86MaskOn1BitsVec(Builder
, Rep
, CI
->getArgOperand(2));
2103 } else if (IsX86
&& Name
.startswith("avx512.mask.fpclass.p")) {
2104 Type
*OpTy
= CI
->getArgOperand(0)->getType();
2105 unsigned VecWidth
= OpTy
->getPrimitiveSizeInBits();
2106 unsigned EltWidth
= OpTy
->getScalarSizeInBits();
2108 if (VecWidth
== 128 && EltWidth
== 32)
2109 IID
= Intrinsic::x86_avx512_fpclass_ps_128
;
2110 else if (VecWidth
== 256 && EltWidth
== 32)
2111 IID
= Intrinsic::x86_avx512_fpclass_ps_256
;
2112 else if (VecWidth
== 512 && EltWidth
== 32)
2113 IID
= Intrinsic::x86_avx512_fpclass_ps_512
;
2114 else if (VecWidth
== 128 && EltWidth
== 64)
2115 IID
= Intrinsic::x86_avx512_fpclass_pd_128
;
2116 else if (VecWidth
== 256 && EltWidth
== 64)
2117 IID
= Intrinsic::x86_avx512_fpclass_pd_256
;
2118 else if (VecWidth
== 512 && EltWidth
== 64)
2119 IID
= Intrinsic::x86_avx512_fpclass_pd_512
;
2121 llvm_unreachable("Unexpected intrinsic");
2123 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(F
->getParent(), IID
),
2124 { CI
->getOperand(0), CI
->getArgOperand(1) });
2125 Rep
= ApplyX86MaskOn1BitsVec(Builder
, Rep
, CI
->getArgOperand(2));
2126 } else if (IsX86
&& Name
.startswith("avx512.cmp.p")) {
2127 SmallVector
<Value
*, 4> Args(CI
->arg_operands().begin(),
2128 CI
->arg_operands().end());
2129 Type
*OpTy
= Args
[0]->getType();
2130 unsigned VecWidth
= OpTy
->getPrimitiveSizeInBits();
2131 unsigned EltWidth
= OpTy
->getScalarSizeInBits();
2133 if (VecWidth
== 128 && EltWidth
== 32)
2134 IID
= Intrinsic::x86_avx512_mask_cmp_ps_128
;
2135 else if (VecWidth
== 256 && EltWidth
== 32)
2136 IID
= Intrinsic::x86_avx512_mask_cmp_ps_256
;
2137 else if (VecWidth
== 512 && EltWidth
== 32)
2138 IID
= Intrinsic::x86_avx512_mask_cmp_ps_512
;
2139 else if (VecWidth
== 128 && EltWidth
== 64)
2140 IID
= Intrinsic::x86_avx512_mask_cmp_pd_128
;
2141 else if (VecWidth
== 256 && EltWidth
== 64)
2142 IID
= Intrinsic::x86_avx512_mask_cmp_pd_256
;
2143 else if (VecWidth
== 512 && EltWidth
== 64)
2144 IID
= Intrinsic::x86_avx512_mask_cmp_pd_512
;
2146 llvm_unreachable("Unexpected intrinsic");
2148 Value
*Mask
= Constant::getAllOnesValue(CI
->getType());
2149 if (VecWidth
== 512)
2150 std::swap(Mask
, Args
.back());
2151 Args
.push_back(Mask
);
2153 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(F
->getParent(), IID
),
2155 } else if (IsX86
&& Name
.startswith("avx512.mask.cmp.")) {
2156 // Integer compare intrinsics.
2157 unsigned Imm
= cast
<ConstantInt
>(CI
->getArgOperand(2))->getZExtValue();
2158 Rep
= upgradeMaskedCompare(Builder
, *CI
, Imm
, true);
2159 } else if (IsX86
&& Name
.startswith("avx512.mask.ucmp.")) {
2160 unsigned Imm
= cast
<ConstantInt
>(CI
->getArgOperand(2))->getZExtValue();
2161 Rep
= upgradeMaskedCompare(Builder
, *CI
, Imm
, false);
2162 } else if (IsX86
&& (Name
.startswith("avx512.cvtb2mask.") ||
2163 Name
.startswith("avx512.cvtw2mask.") ||
2164 Name
.startswith("avx512.cvtd2mask.") ||
2165 Name
.startswith("avx512.cvtq2mask."))) {
2166 Value
*Op
= CI
->getArgOperand(0);
2167 Value
*Zero
= llvm::Constant::getNullValue(Op
->getType());
2168 Rep
= Builder
.CreateICmp(ICmpInst::ICMP_SLT
, Op
, Zero
);
2169 Rep
= ApplyX86MaskOn1BitsVec(Builder
, Rep
, nullptr);
2170 } else if(IsX86
&& (Name
== "ssse3.pabs.b.128" ||
2171 Name
== "ssse3.pabs.w.128" ||
2172 Name
== "ssse3.pabs.d.128" ||
2173 Name
.startswith("avx2.pabs") ||
2174 Name
.startswith("avx512.mask.pabs"))) {
2175 Rep
= upgradeAbs(Builder
, *CI
);
2176 } else if (IsX86
&& (Name
== "sse41.pmaxsb" ||
2177 Name
== "sse2.pmaxs.w" ||
2178 Name
== "sse41.pmaxsd" ||
2179 Name
.startswith("avx2.pmaxs") ||
2180 Name
.startswith("avx512.mask.pmaxs"))) {
2181 Rep
= UpgradeX86BinaryIntrinsics(Builder
, *CI
, Intrinsic::smax
);
2182 } else if (IsX86
&& (Name
== "sse2.pmaxu.b" ||
2183 Name
== "sse41.pmaxuw" ||
2184 Name
== "sse41.pmaxud" ||
2185 Name
.startswith("avx2.pmaxu") ||
2186 Name
.startswith("avx512.mask.pmaxu"))) {
2187 Rep
= UpgradeX86BinaryIntrinsics(Builder
, *CI
, Intrinsic::umax
);
2188 } else if (IsX86
&& (Name
== "sse41.pminsb" ||
2189 Name
== "sse2.pmins.w" ||
2190 Name
== "sse41.pminsd" ||
2191 Name
.startswith("avx2.pmins") ||
2192 Name
.startswith("avx512.mask.pmins"))) {
2193 Rep
= UpgradeX86BinaryIntrinsics(Builder
, *CI
, Intrinsic::smin
);
2194 } else if (IsX86
&& (Name
== "sse2.pminu.b" ||
2195 Name
== "sse41.pminuw" ||
2196 Name
== "sse41.pminud" ||
2197 Name
.startswith("avx2.pminu") ||
2198 Name
.startswith("avx512.mask.pminu"))) {
2199 Rep
= UpgradeX86BinaryIntrinsics(Builder
, *CI
, Intrinsic::umin
);
2200 } else if (IsX86
&& (Name
== "sse2.pmulu.dq" ||
2201 Name
== "avx2.pmulu.dq" ||
2202 Name
== "avx512.pmulu.dq.512" ||
2203 Name
.startswith("avx512.mask.pmulu.dq."))) {
2204 Rep
= upgradePMULDQ(Builder
, *CI
, /*Signed*/false);
2205 } else if (IsX86
&& (Name
== "sse41.pmuldq" ||
2206 Name
== "avx2.pmul.dq" ||
2207 Name
== "avx512.pmul.dq.512" ||
2208 Name
.startswith("avx512.mask.pmul.dq."))) {
2209 Rep
= upgradePMULDQ(Builder
, *CI
, /*Signed*/true);
2210 } else if (IsX86
&& (Name
== "sse.cvtsi2ss" ||
2211 Name
== "sse2.cvtsi2sd" ||
2212 Name
== "sse.cvtsi642ss" ||
2213 Name
== "sse2.cvtsi642sd")) {
2214 Rep
= Builder
.CreateSIToFP(
2215 CI
->getArgOperand(1),
2216 cast
<VectorType
>(CI
->getType())->getElementType());
2217 Rep
= Builder
.CreateInsertElement(CI
->getArgOperand(0), Rep
, (uint64_t)0);
2218 } else if (IsX86
&& Name
== "avx512.cvtusi2sd") {
2219 Rep
= Builder
.CreateUIToFP(
2220 CI
->getArgOperand(1),
2221 cast
<VectorType
>(CI
->getType())->getElementType());
2222 Rep
= Builder
.CreateInsertElement(CI
->getArgOperand(0), Rep
, (uint64_t)0);
2223 } else if (IsX86
&& Name
== "sse2.cvtss2sd") {
2224 Rep
= Builder
.CreateExtractElement(CI
->getArgOperand(1), (uint64_t)0);
2225 Rep
= Builder
.CreateFPExt(
2226 Rep
, cast
<VectorType
>(CI
->getType())->getElementType());
2227 Rep
= Builder
.CreateInsertElement(CI
->getArgOperand(0), Rep
, (uint64_t)0);
2228 } else if (IsX86
&& (Name
== "sse2.cvtdq2pd" ||
2229 Name
== "sse2.cvtdq2ps" ||
2230 Name
== "avx.cvtdq2.pd.256" ||
2231 Name
== "avx.cvtdq2.ps.256" ||
2232 Name
.startswith("avx512.mask.cvtdq2pd.") ||
2233 Name
.startswith("avx512.mask.cvtudq2pd.") ||
2234 Name
.startswith("avx512.mask.cvtdq2ps.") ||
2235 Name
.startswith("avx512.mask.cvtudq2ps.") ||
2236 Name
.startswith("avx512.mask.cvtqq2pd.") ||
2237 Name
.startswith("avx512.mask.cvtuqq2pd.") ||
2238 Name
== "avx512.mask.cvtqq2ps.256" ||
2239 Name
== "avx512.mask.cvtqq2ps.512" ||
2240 Name
== "avx512.mask.cvtuqq2ps.256" ||
2241 Name
== "avx512.mask.cvtuqq2ps.512" ||
2242 Name
== "sse2.cvtps2pd" ||
2243 Name
== "avx.cvt.ps2.pd.256" ||
2244 Name
== "avx512.mask.cvtps2pd.128" ||
2245 Name
== "avx512.mask.cvtps2pd.256")) {
2246 auto *DstTy
= cast
<FixedVectorType
>(CI
->getType());
2247 Rep
= CI
->getArgOperand(0);
2248 auto *SrcTy
= cast
<FixedVectorType
>(Rep
->getType());
2250 unsigned NumDstElts
= DstTy
->getNumElements();
2251 if (NumDstElts
< SrcTy
->getNumElements()) {
2252 assert(NumDstElts
== 2 && "Unexpected vector size");
2253 Rep
= Builder
.CreateShuffleVector(Rep
, Rep
, ArrayRef
<int>{0, 1});
2256 bool IsPS2PD
= SrcTy
->getElementType()->isFloatTy();
2257 bool IsUnsigned
= (StringRef::npos
!= Name
.find("cvtu"));
2259 Rep
= Builder
.CreateFPExt(Rep
, DstTy
, "cvtps2pd");
2260 else if (CI
->getNumArgOperands() == 4 &&
2261 (!isa
<ConstantInt
>(CI
->getArgOperand(3)) ||
2262 cast
<ConstantInt
>(CI
->getArgOperand(3))->getZExtValue() != 4)) {
2263 Intrinsic::ID IID
= IsUnsigned
? Intrinsic::x86_avx512_uitofp_round
2264 : Intrinsic::x86_avx512_sitofp_round
;
2265 Function
*F
= Intrinsic::getDeclaration(CI
->getModule(), IID
,
2267 Rep
= Builder
.CreateCall(F
, { Rep
, CI
->getArgOperand(3) });
2269 Rep
= IsUnsigned
? Builder
.CreateUIToFP(Rep
, DstTy
, "cvt")
2270 : Builder
.CreateSIToFP(Rep
, DstTy
, "cvt");
2273 if (CI
->getNumArgOperands() >= 3)
2274 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(2), Rep
,
2275 CI
->getArgOperand(1));
2276 } else if (IsX86
&& (Name
.startswith("avx512.mask.vcvtph2ps.") ||
2277 Name
.startswith("vcvtph2ps."))) {
2278 auto *DstTy
= cast
<FixedVectorType
>(CI
->getType());
2279 Rep
= CI
->getArgOperand(0);
2280 auto *SrcTy
= cast
<FixedVectorType
>(Rep
->getType());
2281 unsigned NumDstElts
= DstTy
->getNumElements();
2282 if (NumDstElts
!= SrcTy
->getNumElements()) {
2283 assert(NumDstElts
== 4 && "Unexpected vector size");
2284 Rep
= Builder
.CreateShuffleVector(Rep
, Rep
, ArrayRef
<int>{0, 1, 2, 3});
2286 Rep
= Builder
.CreateBitCast(
2287 Rep
, FixedVectorType::get(Type::getHalfTy(C
), NumDstElts
));
2288 Rep
= Builder
.CreateFPExt(Rep
, DstTy
, "cvtph2ps");
2289 if (CI
->getNumArgOperands() >= 3)
2290 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(2), Rep
,
2291 CI
->getArgOperand(1));
2292 } else if (IsX86
&& (Name
.startswith("avx512.mask.loadu."))) {
2293 Rep
= UpgradeMaskedLoad(Builder
, CI
->getArgOperand(0),
2294 CI
->getArgOperand(1), CI
->getArgOperand(2),
2296 } else if (IsX86
&& (Name
.startswith("avx512.mask.load."))) {
2297 Rep
= UpgradeMaskedLoad(Builder
, CI
->getArgOperand(0),
2298 CI
->getArgOperand(1),CI
->getArgOperand(2),
2300 } else if (IsX86
&& Name
.startswith("avx512.mask.expand.load.")) {
2301 auto *ResultTy
= cast
<FixedVectorType
>(CI
->getType());
2302 Type
*PtrTy
= ResultTy
->getElementType();
2304 // Cast the pointer to element type.
2305 Value
*Ptr
= Builder
.CreateBitCast(CI
->getOperand(0),
2306 llvm::PointerType::getUnqual(PtrTy
));
2308 Value
*MaskVec
= getX86MaskVec(Builder
, CI
->getArgOperand(2),
2309 ResultTy
->getNumElements());
2311 Function
*ELd
= Intrinsic::getDeclaration(F
->getParent(),
2312 Intrinsic::masked_expandload
,
2314 Rep
= Builder
.CreateCall(ELd
, { Ptr
, MaskVec
, CI
->getOperand(1) });
2315 } else if (IsX86
&& Name
.startswith("avx512.mask.compress.store.")) {
2316 auto *ResultTy
= cast
<VectorType
>(CI
->getArgOperand(1)->getType());
2317 Type
*PtrTy
= ResultTy
->getElementType();
2319 // Cast the pointer to element type.
2320 Value
*Ptr
= Builder
.CreateBitCast(CI
->getOperand(0),
2321 llvm::PointerType::getUnqual(PtrTy
));
2324 getX86MaskVec(Builder
, CI
->getArgOperand(2),
2325 cast
<FixedVectorType
>(ResultTy
)->getNumElements());
2327 Function
*CSt
= Intrinsic::getDeclaration(F
->getParent(),
2328 Intrinsic::masked_compressstore
,
2330 Rep
= Builder
.CreateCall(CSt
, { CI
->getArgOperand(1), Ptr
, MaskVec
});
2331 } else if (IsX86
&& (Name
.startswith("avx512.mask.compress.") ||
2332 Name
.startswith("avx512.mask.expand."))) {
2333 auto *ResultTy
= cast
<FixedVectorType
>(CI
->getType());
2335 Value
*MaskVec
= getX86MaskVec(Builder
, CI
->getArgOperand(2),
2336 ResultTy
->getNumElements());
2338 bool IsCompress
= Name
[12] == 'c';
2339 Intrinsic::ID IID
= IsCompress
? Intrinsic::x86_avx512_mask_compress
2340 : Intrinsic::x86_avx512_mask_expand
;
2341 Function
*Intr
= Intrinsic::getDeclaration(F
->getParent(), IID
, ResultTy
);
2342 Rep
= Builder
.CreateCall(Intr
, { CI
->getOperand(0), CI
->getOperand(1),
2344 } else if (IsX86
&& Name
.startswith("xop.vpcom")) {
2346 if (Name
.endswith("ub") || Name
.endswith("uw") || Name
.endswith("ud") ||
2347 Name
.endswith("uq"))
2349 else if (Name
.endswith("b") || Name
.endswith("w") || Name
.endswith("d") ||
2353 llvm_unreachable("Unknown suffix");
2356 if (CI
->getNumArgOperands() == 3) {
2357 Imm
= cast
<ConstantInt
>(CI
->getArgOperand(2))->getZExtValue();
2359 Name
= Name
.substr(9); // strip off "xop.vpcom"
2360 if (Name
.startswith("lt"))
2362 else if (Name
.startswith("le"))
2364 else if (Name
.startswith("gt"))
2366 else if (Name
.startswith("ge"))
2368 else if (Name
.startswith("eq"))
2370 else if (Name
.startswith("ne"))
2372 else if (Name
.startswith("false"))
2374 else if (Name
.startswith("true"))
2377 llvm_unreachable("Unknown condition");
2380 Rep
= upgradeX86vpcom(Builder
, *CI
, Imm
, IsSigned
);
2381 } else if (IsX86
&& Name
.startswith("xop.vpcmov")) {
2382 Value
*Sel
= CI
->getArgOperand(2);
2383 Value
*NotSel
= Builder
.CreateNot(Sel
);
2384 Value
*Sel0
= Builder
.CreateAnd(CI
->getArgOperand(0), Sel
);
2385 Value
*Sel1
= Builder
.CreateAnd(CI
->getArgOperand(1), NotSel
);
2386 Rep
= Builder
.CreateOr(Sel0
, Sel1
);
2387 } else if (IsX86
&& (Name
.startswith("xop.vprot") ||
2388 Name
.startswith("avx512.prol") ||
2389 Name
.startswith("avx512.mask.prol"))) {
2390 Rep
= upgradeX86Rotate(Builder
, *CI
, false);
2391 } else if (IsX86
&& (Name
.startswith("avx512.pror") ||
2392 Name
.startswith("avx512.mask.pror"))) {
2393 Rep
= upgradeX86Rotate(Builder
, *CI
, true);
2394 } else if (IsX86
&& (Name
.startswith("avx512.vpshld.") ||
2395 Name
.startswith("avx512.mask.vpshld") ||
2396 Name
.startswith("avx512.maskz.vpshld"))) {
2397 bool ZeroMask
= Name
[11] == 'z';
2398 Rep
= upgradeX86ConcatShift(Builder
, *CI
, false, ZeroMask
);
2399 } else if (IsX86
&& (Name
.startswith("avx512.vpshrd.") ||
2400 Name
.startswith("avx512.mask.vpshrd") ||
2401 Name
.startswith("avx512.maskz.vpshrd"))) {
2402 bool ZeroMask
= Name
[11] == 'z';
2403 Rep
= upgradeX86ConcatShift(Builder
, *CI
, true, ZeroMask
);
2404 } else if (IsX86
&& Name
== "sse42.crc32.64.8") {
2405 Function
*CRC32
= Intrinsic::getDeclaration(F
->getParent(),
2406 Intrinsic::x86_sse42_crc32_32_8
);
2407 Value
*Trunc0
= Builder
.CreateTrunc(CI
->getArgOperand(0), Type::getInt32Ty(C
));
2408 Rep
= Builder
.CreateCall(CRC32
, {Trunc0
, CI
->getArgOperand(1)});
2409 Rep
= Builder
.CreateZExt(Rep
, CI
->getType(), "");
2410 } else if (IsX86
&& (Name
.startswith("avx.vbroadcast.s") ||
2411 Name
.startswith("avx512.vbroadcast.s"))) {
2412 // Replace broadcasts with a series of insertelements.
2413 auto *VecTy
= cast
<FixedVectorType
>(CI
->getType());
2414 Type
*EltTy
= VecTy
->getElementType();
2415 unsigned EltNum
= VecTy
->getNumElements();
2416 Value
*Cast
= Builder
.CreateBitCast(CI
->getArgOperand(0),
2417 EltTy
->getPointerTo());
2418 Value
*Load
= Builder
.CreateLoad(EltTy
, Cast
);
2419 Type
*I32Ty
= Type::getInt32Ty(C
);
2420 Rep
= UndefValue::get(VecTy
);
2421 for (unsigned I
= 0; I
< EltNum
; ++I
)
2422 Rep
= Builder
.CreateInsertElement(Rep
, Load
,
2423 ConstantInt::get(I32Ty
, I
));
2424 } else if (IsX86
&& (Name
.startswith("sse41.pmovsx") ||
2425 Name
.startswith("sse41.pmovzx") ||
2426 Name
.startswith("avx2.pmovsx") ||
2427 Name
.startswith("avx2.pmovzx") ||
2428 Name
.startswith("avx512.mask.pmovsx") ||
2429 Name
.startswith("avx512.mask.pmovzx"))) {
2430 auto *DstTy
= cast
<FixedVectorType
>(CI
->getType());
2431 unsigned NumDstElts
= DstTy
->getNumElements();
2433 // Extract a subvector of the first NumDstElts lanes and sign/zero extend.
2434 SmallVector
<int, 8> ShuffleMask(NumDstElts
);
2435 for (unsigned i
= 0; i
!= NumDstElts
; ++i
)
2439 Builder
.CreateShuffleVector(CI
->getArgOperand(0), ShuffleMask
);
2441 bool DoSext
= (StringRef::npos
!= Name
.find("pmovsx"));
2442 Rep
= DoSext
? Builder
.CreateSExt(SV
, DstTy
)
2443 : Builder
.CreateZExt(SV
, DstTy
);
2444 // If there are 3 arguments, it's a masked intrinsic so we need a select.
2445 if (CI
->getNumArgOperands() == 3)
2446 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(2), Rep
,
2447 CI
->getArgOperand(1));
2448 } else if (Name
== "avx512.mask.pmov.qd.256" ||
2449 Name
== "avx512.mask.pmov.qd.512" ||
2450 Name
== "avx512.mask.pmov.wb.256" ||
2451 Name
== "avx512.mask.pmov.wb.512") {
2452 Type
*Ty
= CI
->getArgOperand(1)->getType();
2453 Rep
= Builder
.CreateTrunc(CI
->getArgOperand(0), Ty
);
2454 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(2), Rep
,
2455 CI
->getArgOperand(1));
2456 } else if (IsX86
&& (Name
.startswith("avx.vbroadcastf128") ||
2457 Name
== "avx2.vbroadcasti128")) {
2458 // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.
2459 Type
*EltTy
= cast
<VectorType
>(CI
->getType())->getElementType();
2460 unsigned NumSrcElts
= 128 / EltTy
->getPrimitiveSizeInBits();
2461 auto *VT
= FixedVectorType::get(EltTy
, NumSrcElts
);
2462 Value
*Op
= Builder
.CreatePointerCast(CI
->getArgOperand(0),
2463 PointerType::getUnqual(VT
));
2464 Value
*Load
= Builder
.CreateAlignedLoad(VT
, Op
, Align(1));
2465 if (NumSrcElts
== 2)
2466 Rep
= Builder
.CreateShuffleVector(Load
, ArrayRef
<int>{0, 1, 0, 1});
2468 Rep
= Builder
.CreateShuffleVector(
2469 Load
, ArrayRef
<int>{0, 1, 2, 3, 0, 1, 2, 3});
2470 } else if (IsX86
&& (Name
.startswith("avx512.mask.shuf.i") ||
2471 Name
.startswith("avx512.mask.shuf.f"))) {
2472 unsigned Imm
= cast
<ConstantInt
>(CI
->getArgOperand(2))->getZExtValue();
2473 Type
*VT
= CI
->getType();
2474 unsigned NumLanes
= VT
->getPrimitiveSizeInBits() / 128;
2475 unsigned NumElementsInLane
= 128 / VT
->getScalarSizeInBits();
2476 unsigned ControlBitsMask
= NumLanes
- 1;
2477 unsigned NumControlBits
= NumLanes
/ 2;
2478 SmallVector
<int, 8> ShuffleMask(0);
2480 for (unsigned l
= 0; l
!= NumLanes
; ++l
) {
2481 unsigned LaneMask
= (Imm
>> (l
* NumControlBits
)) & ControlBitsMask
;
2482 // We actually need the other source.
2483 if (l
>= NumLanes
/ 2)
2484 LaneMask
+= NumLanes
;
2485 for (unsigned i
= 0; i
!= NumElementsInLane
; ++i
)
2486 ShuffleMask
.push_back(LaneMask
* NumElementsInLane
+ i
);
2488 Rep
= Builder
.CreateShuffleVector(CI
->getArgOperand(0),
2489 CI
->getArgOperand(1), ShuffleMask
);
2490 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(4), Rep
,
2491 CI
->getArgOperand(3));
2492 }else if (IsX86
&& (Name
.startswith("avx512.mask.broadcastf") ||
2493 Name
.startswith("avx512.mask.broadcasti"))) {
2494 unsigned NumSrcElts
=
2495 cast
<FixedVectorType
>(CI
->getArgOperand(0)->getType())
2497 unsigned NumDstElts
=
2498 cast
<FixedVectorType
>(CI
->getType())->getNumElements();
2500 SmallVector
<int, 8> ShuffleMask(NumDstElts
);
2501 for (unsigned i
= 0; i
!= NumDstElts
; ++i
)
2502 ShuffleMask
[i
] = i
% NumSrcElts
;
2504 Rep
= Builder
.CreateShuffleVector(CI
->getArgOperand(0),
2505 CI
->getArgOperand(0),
2507 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(2), Rep
,
2508 CI
->getArgOperand(1));
2509 } else if (IsX86
&& (Name
.startswith("avx2.pbroadcast") ||
2510 Name
.startswith("avx2.vbroadcast") ||
2511 Name
.startswith("avx512.pbroadcast") ||
2512 Name
.startswith("avx512.mask.broadcast.s"))) {
2513 // Replace vp?broadcasts with a vector shuffle.
2514 Value
*Op
= CI
->getArgOperand(0);
2515 ElementCount EC
= cast
<VectorType
>(CI
->getType())->getElementCount();
2516 Type
*MaskTy
= VectorType::get(Type::getInt32Ty(C
), EC
);
2517 SmallVector
<int, 8> M
;
2518 ShuffleVectorInst::getShuffleMask(Constant::getNullValue(MaskTy
), M
);
2519 Rep
= Builder
.CreateShuffleVector(Op
, M
);
2521 if (CI
->getNumArgOperands() == 3)
2522 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(2), Rep
,
2523 CI
->getArgOperand(1));
2524 } else if (IsX86
&& (Name
.startswith("sse2.padds.") ||
2525 Name
.startswith("avx2.padds.") ||
2526 Name
.startswith("avx512.padds.") ||
2527 Name
.startswith("avx512.mask.padds."))) {
2528 Rep
= UpgradeX86BinaryIntrinsics(Builder
, *CI
, Intrinsic::sadd_sat
);
2529 } else if (IsX86
&& (Name
.startswith("sse2.psubs.") ||
2530 Name
.startswith("avx2.psubs.") ||
2531 Name
.startswith("avx512.psubs.") ||
2532 Name
.startswith("avx512.mask.psubs."))) {
2533 Rep
= UpgradeX86BinaryIntrinsics(Builder
, *CI
, Intrinsic::ssub_sat
);
2534 } else if (IsX86
&& (Name
.startswith("sse2.paddus.") ||
2535 Name
.startswith("avx2.paddus.") ||
2536 Name
.startswith("avx512.mask.paddus."))) {
2537 Rep
= UpgradeX86BinaryIntrinsics(Builder
, *CI
, Intrinsic::uadd_sat
);
2538 } else if (IsX86
&& (Name
.startswith("sse2.psubus.") ||
2539 Name
.startswith("avx2.psubus.") ||
2540 Name
.startswith("avx512.mask.psubus."))) {
2541 Rep
= UpgradeX86BinaryIntrinsics(Builder
, *CI
, Intrinsic::usub_sat
);
2542 } else if (IsX86
&& Name
.startswith("avx512.mask.palignr.")) {
2543 Rep
= UpgradeX86ALIGNIntrinsics(Builder
, CI
->getArgOperand(0),
2544 CI
->getArgOperand(1),
2545 CI
->getArgOperand(2),
2546 CI
->getArgOperand(3),
2547 CI
->getArgOperand(4),
2549 } else if (IsX86
&& Name
.startswith("avx512.mask.valign.")) {
2550 Rep
= UpgradeX86ALIGNIntrinsics(Builder
, CI
->getArgOperand(0),
2551 CI
->getArgOperand(1),
2552 CI
->getArgOperand(2),
2553 CI
->getArgOperand(3),
2554 CI
->getArgOperand(4),
2556 } else if (IsX86
&& (Name
== "sse2.psll.dq" ||
2557 Name
== "avx2.psll.dq")) {
2558 // 128/256-bit shift left specified in bits.
2559 unsigned Shift
= cast
<ConstantInt
>(CI
->getArgOperand(1))->getZExtValue();
2560 Rep
= UpgradeX86PSLLDQIntrinsics(Builder
, CI
->getArgOperand(0),
2561 Shift
/ 8); // Shift is in bits.
2562 } else if (IsX86
&& (Name
== "sse2.psrl.dq" ||
2563 Name
== "avx2.psrl.dq")) {
2564 // 128/256-bit shift right specified in bits.
2565 unsigned Shift
= cast
<ConstantInt
>(CI
->getArgOperand(1))->getZExtValue();
2566 Rep
= UpgradeX86PSRLDQIntrinsics(Builder
, CI
->getArgOperand(0),
2567 Shift
/ 8); // Shift is in bits.
2568 } else if (IsX86
&& (Name
== "sse2.psll.dq.bs" ||
2569 Name
== "avx2.psll.dq.bs" ||
2570 Name
== "avx512.psll.dq.512")) {
2571 // 128/256/512-bit shift left specified in bytes.
2572 unsigned Shift
= cast
<ConstantInt
>(CI
->getArgOperand(1))->getZExtValue();
2573 Rep
= UpgradeX86PSLLDQIntrinsics(Builder
, CI
->getArgOperand(0), Shift
);
2574 } else if (IsX86
&& (Name
== "sse2.psrl.dq.bs" ||
2575 Name
== "avx2.psrl.dq.bs" ||
2576 Name
== "avx512.psrl.dq.512")) {
2577 // 128/256/512-bit shift right specified in bytes.
2578 unsigned Shift
= cast
<ConstantInt
>(CI
->getArgOperand(1))->getZExtValue();
2579 Rep
= UpgradeX86PSRLDQIntrinsics(Builder
, CI
->getArgOperand(0), Shift
);
2580 } else if (IsX86
&& (Name
== "sse41.pblendw" ||
2581 Name
.startswith("sse41.blendp") ||
2582 Name
.startswith("avx.blend.p") ||
2583 Name
== "avx2.pblendw" ||
2584 Name
.startswith("avx2.pblendd."))) {
2585 Value
*Op0
= CI
->getArgOperand(0);
2586 Value
*Op1
= CI
->getArgOperand(1);
2587 unsigned Imm
= cast
<ConstantInt
>(CI
->getArgOperand(2))->getZExtValue();
2588 auto *VecTy
= cast
<FixedVectorType
>(CI
->getType());
2589 unsigned NumElts
= VecTy
->getNumElements();
2591 SmallVector
<int, 16> Idxs(NumElts
);
2592 for (unsigned i
= 0; i
!= NumElts
; ++i
)
2593 Idxs
[i
] = ((Imm
>> (i
%8)) & 1) ? i
+ NumElts
: i
;
2595 Rep
= Builder
.CreateShuffleVector(Op0
, Op1
, Idxs
);
2596 } else if (IsX86
&& (Name
.startswith("avx.vinsertf128.") ||
2597 Name
== "avx2.vinserti128" ||
2598 Name
.startswith("avx512.mask.insert"))) {
2599 Value
*Op0
= CI
->getArgOperand(0);
2600 Value
*Op1
= CI
->getArgOperand(1);
2601 unsigned Imm
= cast
<ConstantInt
>(CI
->getArgOperand(2))->getZExtValue();
2602 unsigned DstNumElts
=
2603 cast
<FixedVectorType
>(CI
->getType())->getNumElements();
2604 unsigned SrcNumElts
=
2605 cast
<FixedVectorType
>(Op1
->getType())->getNumElements();
2606 unsigned Scale
= DstNumElts
/ SrcNumElts
;
2608 // Mask off the high bits of the immediate value; hardware ignores those.
2611 // Extend the second operand into a vector the size of the destination.
2612 SmallVector
<int, 8> Idxs(DstNumElts
);
2613 for (unsigned i
= 0; i
!= SrcNumElts
; ++i
)
2615 for (unsigned i
= SrcNumElts
; i
!= DstNumElts
; ++i
)
2616 Idxs
[i
] = SrcNumElts
;
2617 Rep
= Builder
.CreateShuffleVector(Op1
, Idxs
);
2619 // Insert the second operand into the first operand.
2621 // Note that there is no guarantee that instruction lowering will actually
2622 // produce a vinsertf128 instruction for the created shuffles. In
2623 // particular, the 0 immediate case involves no lane changes, so it can
2624 // be handled as a blend.
2626 // Example of shuffle mask for 32-bit elements:
2627 // Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
2628 // Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 >
2630 // First fill with identify mask.
2631 for (unsigned i
= 0; i
!= DstNumElts
; ++i
)
2633 // Then replace the elements where we need to insert.
2634 for (unsigned i
= 0; i
!= SrcNumElts
; ++i
)
2635 Idxs
[i
+ Imm
* SrcNumElts
] = i
+ DstNumElts
;
2636 Rep
= Builder
.CreateShuffleVector(Op0
, Rep
, Idxs
);
2638 // If the intrinsic has a mask operand, handle that.
2639 if (CI
->getNumArgOperands() == 5)
2640 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(4), Rep
,
2641 CI
->getArgOperand(3));
2642 } else if (IsX86
&& (Name
.startswith("avx.vextractf128.") ||
2643 Name
== "avx2.vextracti128" ||
2644 Name
.startswith("avx512.mask.vextract"))) {
2645 Value
*Op0
= CI
->getArgOperand(0);
2646 unsigned Imm
= cast
<ConstantInt
>(CI
->getArgOperand(1))->getZExtValue();
2647 unsigned DstNumElts
=
2648 cast
<FixedVectorType
>(CI
->getType())->getNumElements();
2649 unsigned SrcNumElts
=
2650 cast
<FixedVectorType
>(Op0
->getType())->getNumElements();
2651 unsigned Scale
= SrcNumElts
/ DstNumElts
;
2653 // Mask off the high bits of the immediate value; hardware ignores those.
2656 // Get indexes for the subvector of the input vector.
2657 SmallVector
<int, 8> Idxs(DstNumElts
);
2658 for (unsigned i
= 0; i
!= DstNumElts
; ++i
) {
2659 Idxs
[i
] = i
+ (Imm
* DstNumElts
);
2661 Rep
= Builder
.CreateShuffleVector(Op0
, Op0
, Idxs
);
2663 // If the intrinsic has a mask operand, handle that.
2664 if (CI
->getNumArgOperands() == 4)
2665 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
2666 CI
->getArgOperand(2));
2667 } else if (!IsX86
&& Name
== "stackprotectorcheck") {
2669 } else if (IsX86
&& (Name
.startswith("avx512.mask.perm.df.") ||
2670 Name
.startswith("avx512.mask.perm.di."))) {
2671 Value
*Op0
= CI
->getArgOperand(0);
2672 unsigned Imm
= cast
<ConstantInt
>(CI
->getArgOperand(1))->getZExtValue();
2673 auto *VecTy
= cast
<FixedVectorType
>(CI
->getType());
2674 unsigned NumElts
= VecTy
->getNumElements();
2676 SmallVector
<int, 8> Idxs(NumElts
);
2677 for (unsigned i
= 0; i
!= NumElts
; ++i
)
2678 Idxs
[i
] = (i
& ~0x3) + ((Imm
>> (2 * (i
& 0x3))) & 3);
2680 Rep
= Builder
.CreateShuffleVector(Op0
, Op0
, Idxs
);
2682 if (CI
->getNumArgOperands() == 4)
2683 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
2684 CI
->getArgOperand(2));
2685 } else if (IsX86
&& (Name
.startswith("avx.vperm2f128.") ||
2686 Name
== "avx2.vperm2i128")) {
2687 // The immediate permute control byte looks like this:
2688 // [1:0] - select 128 bits from sources for low half of destination
2690 // [3] - zero low half of destination
2691 // [5:4] - select 128 bits from sources for high half of destination
2693 // [7] - zero high half of destination
2695 uint8_t Imm
= cast
<ConstantInt
>(CI
->getArgOperand(2))->getZExtValue();
2697 unsigned NumElts
= cast
<FixedVectorType
>(CI
->getType())->getNumElements();
2698 unsigned HalfSize
= NumElts
/ 2;
2699 SmallVector
<int, 8> ShuffleMask(NumElts
);
2701 // Determine which operand(s) are actually in use for this instruction.
2702 Value
*V0
= (Imm
& 0x02) ? CI
->getArgOperand(1) : CI
->getArgOperand(0);
2703 Value
*V1
= (Imm
& 0x20) ? CI
->getArgOperand(1) : CI
->getArgOperand(0);
2705 // If needed, replace operands based on zero mask.
2706 V0
= (Imm
& 0x08) ? ConstantAggregateZero::get(CI
->getType()) : V0
;
2707 V1
= (Imm
& 0x80) ? ConstantAggregateZero::get(CI
->getType()) : V1
;
2709 // Permute low half of result.
2710 unsigned StartIndex
= (Imm
& 0x01) ? HalfSize
: 0;
2711 for (unsigned i
= 0; i
< HalfSize
; ++i
)
2712 ShuffleMask
[i
] = StartIndex
+ i
;
2714 // Permute high half of result.
2715 StartIndex
= (Imm
& 0x10) ? HalfSize
: 0;
2716 for (unsigned i
= 0; i
< HalfSize
; ++i
)
2717 ShuffleMask
[i
+ HalfSize
] = NumElts
+ StartIndex
+ i
;
2719 Rep
= Builder
.CreateShuffleVector(V0
, V1
, ShuffleMask
);
2721 } else if (IsX86
&& (Name
.startswith("avx.vpermil.") ||
2722 Name
== "sse2.pshuf.d" ||
2723 Name
.startswith("avx512.mask.vpermil.p") ||
2724 Name
.startswith("avx512.mask.pshuf.d."))) {
2725 Value
*Op0
= CI
->getArgOperand(0);
2726 unsigned Imm
= cast
<ConstantInt
>(CI
->getArgOperand(1))->getZExtValue();
2727 auto *VecTy
= cast
<FixedVectorType
>(CI
->getType());
2728 unsigned NumElts
= VecTy
->getNumElements();
2729 // Calculate the size of each index in the immediate.
2730 unsigned IdxSize
= 64 / VecTy
->getScalarSizeInBits();
2731 unsigned IdxMask
= ((1 << IdxSize
) - 1);
2733 SmallVector
<int, 8> Idxs(NumElts
);
2734 // Lookup the bits for this element, wrapping around the immediate every
2735 // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need
2736 // to offset by the first index of each group.
2737 for (unsigned i
= 0; i
!= NumElts
; ++i
)
2738 Idxs
[i
] = ((Imm
>> ((i
* IdxSize
) % 8)) & IdxMask
) | (i
& ~IdxMask
);
2740 Rep
= Builder
.CreateShuffleVector(Op0
, Op0
, Idxs
);
2742 if (CI
->getNumArgOperands() == 4)
2743 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
2744 CI
->getArgOperand(2));
2745 } else if (IsX86
&& (Name
== "sse2.pshufl.w" ||
2746 Name
.startswith("avx512.mask.pshufl.w."))) {
2747 Value
*Op0
= CI
->getArgOperand(0);
2748 unsigned Imm
= cast
<ConstantInt
>(CI
->getArgOperand(1))->getZExtValue();
2749 unsigned NumElts
= cast
<FixedVectorType
>(CI
->getType())->getNumElements();
2751 SmallVector
<int, 16> Idxs(NumElts
);
2752 for (unsigned l
= 0; l
!= NumElts
; l
+= 8) {
2753 for (unsigned i
= 0; i
!= 4; ++i
)
2754 Idxs
[i
+ l
] = ((Imm
>> (2 * i
)) & 0x3) + l
;
2755 for (unsigned i
= 4; i
!= 8; ++i
)
2756 Idxs
[i
+ l
] = i
+ l
;
2759 Rep
= Builder
.CreateShuffleVector(Op0
, Op0
, Idxs
);
2761 if (CI
->getNumArgOperands() == 4)
2762 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
2763 CI
->getArgOperand(2));
2764 } else if (IsX86
&& (Name
== "sse2.pshufh.w" ||
2765 Name
.startswith("avx512.mask.pshufh.w."))) {
2766 Value
*Op0
= CI
->getArgOperand(0);
2767 unsigned Imm
= cast
<ConstantInt
>(CI
->getArgOperand(1))->getZExtValue();
2768 unsigned NumElts
= cast
<FixedVectorType
>(CI
->getType())->getNumElements();
2770 SmallVector
<int, 16> Idxs(NumElts
);
2771 for (unsigned l
= 0; l
!= NumElts
; l
+= 8) {
2772 for (unsigned i
= 0; i
!= 4; ++i
)
2773 Idxs
[i
+ l
] = i
+ l
;
2774 for (unsigned i
= 0; i
!= 4; ++i
)
2775 Idxs
[i
+ l
+ 4] = ((Imm
>> (2 * i
)) & 0x3) + 4 + l
;
2778 Rep
= Builder
.CreateShuffleVector(Op0
, Op0
, Idxs
);
2780 if (CI
->getNumArgOperands() == 4)
2781 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
2782 CI
->getArgOperand(2));
2783 } else if (IsX86
&& Name
.startswith("avx512.mask.shuf.p")) {
2784 Value
*Op0
= CI
->getArgOperand(0);
2785 Value
*Op1
= CI
->getArgOperand(1);
2786 unsigned Imm
= cast
<ConstantInt
>(CI
->getArgOperand(2))->getZExtValue();
2787 unsigned NumElts
= cast
<FixedVectorType
>(CI
->getType())->getNumElements();
2789 unsigned NumLaneElts
= 128/CI
->getType()->getScalarSizeInBits();
2790 unsigned HalfLaneElts
= NumLaneElts
/ 2;
2792 SmallVector
<int, 16> Idxs(NumElts
);
2793 for (unsigned i
= 0; i
!= NumElts
; ++i
) {
2794 // Base index is the starting element of the lane.
2795 Idxs
[i
] = i
- (i
% NumLaneElts
);
2796 // If we are half way through the lane switch to the other source.
2797 if ((i
% NumLaneElts
) >= HalfLaneElts
)
2799 // Now select the specific element. By adding HalfLaneElts bits from
2800 // the immediate. Wrapping around the immediate every 8-bits.
2801 Idxs
[i
] += (Imm
>> ((i
* HalfLaneElts
) % 8)) & ((1 << HalfLaneElts
) - 1);
2804 Rep
= Builder
.CreateShuffleVector(Op0
, Op1
, Idxs
);
2806 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(4), Rep
,
2807 CI
->getArgOperand(3));
2808 } else if (IsX86
&& (Name
.startswith("avx512.mask.movddup") ||
2809 Name
.startswith("avx512.mask.movshdup") ||
2810 Name
.startswith("avx512.mask.movsldup"))) {
2811 Value
*Op0
= CI
->getArgOperand(0);
2812 unsigned NumElts
= cast
<FixedVectorType
>(CI
->getType())->getNumElements();
2813 unsigned NumLaneElts
= 128/CI
->getType()->getScalarSizeInBits();
2815 unsigned Offset
= 0;
2816 if (Name
.startswith("avx512.mask.movshdup."))
2819 SmallVector
<int, 16> Idxs(NumElts
);
2820 for (unsigned l
= 0; l
!= NumElts
; l
+= NumLaneElts
)
2821 for (unsigned i
= 0; i
!= NumLaneElts
; i
+= 2) {
2822 Idxs
[i
+ l
+ 0] = i
+ l
+ Offset
;
2823 Idxs
[i
+ l
+ 1] = i
+ l
+ Offset
;
2826 Rep
= Builder
.CreateShuffleVector(Op0
, Op0
, Idxs
);
2828 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(2), Rep
,
2829 CI
->getArgOperand(1));
2830 } else if (IsX86
&& (Name
.startswith("avx512.mask.punpckl") ||
2831 Name
.startswith("avx512.mask.unpckl."))) {
2832 Value
*Op0
= CI
->getArgOperand(0);
2833 Value
*Op1
= CI
->getArgOperand(1);
2834 int NumElts
= cast
<FixedVectorType
>(CI
->getType())->getNumElements();
2835 int NumLaneElts
= 128/CI
->getType()->getScalarSizeInBits();
2837 SmallVector
<int, 64> Idxs(NumElts
);
2838 for (int l
= 0; l
!= NumElts
; l
+= NumLaneElts
)
2839 for (int i
= 0; i
!= NumLaneElts
; ++i
)
2840 Idxs
[i
+ l
] = l
+ (i
/ 2) + NumElts
* (i
% 2);
2842 Rep
= Builder
.CreateShuffleVector(Op0
, Op1
, Idxs
);
2844 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
2845 CI
->getArgOperand(2));
2846 } else if (IsX86
&& (Name
.startswith("avx512.mask.punpckh") ||
2847 Name
.startswith("avx512.mask.unpckh."))) {
2848 Value
*Op0
= CI
->getArgOperand(0);
2849 Value
*Op1
= CI
->getArgOperand(1);
2850 int NumElts
= cast
<FixedVectorType
>(CI
->getType())->getNumElements();
2851 int NumLaneElts
= 128/CI
->getType()->getScalarSizeInBits();
2853 SmallVector
<int, 64> Idxs(NumElts
);
2854 for (int l
= 0; l
!= NumElts
; l
+= NumLaneElts
)
2855 for (int i
= 0; i
!= NumLaneElts
; ++i
)
2856 Idxs
[i
+ l
] = (NumLaneElts
/ 2) + l
+ (i
/ 2) + NumElts
* (i
% 2);
2858 Rep
= Builder
.CreateShuffleVector(Op0
, Op1
, Idxs
);
2860 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
2861 CI
->getArgOperand(2));
2862 } else if (IsX86
&& (Name
.startswith("avx512.mask.and.") ||
2863 Name
.startswith("avx512.mask.pand."))) {
2864 VectorType
*FTy
= cast
<VectorType
>(CI
->getType());
2865 VectorType
*ITy
= VectorType::getInteger(FTy
);
2866 Rep
= Builder
.CreateAnd(Builder
.CreateBitCast(CI
->getArgOperand(0), ITy
),
2867 Builder
.CreateBitCast(CI
->getArgOperand(1), ITy
));
2868 Rep
= Builder
.CreateBitCast(Rep
, FTy
);
2869 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
2870 CI
->getArgOperand(2));
2871 } else if (IsX86
&& (Name
.startswith("avx512.mask.andn.") ||
2872 Name
.startswith("avx512.mask.pandn."))) {
2873 VectorType
*FTy
= cast
<VectorType
>(CI
->getType());
2874 VectorType
*ITy
= VectorType::getInteger(FTy
);
2875 Rep
= Builder
.CreateNot(Builder
.CreateBitCast(CI
->getArgOperand(0), ITy
));
2876 Rep
= Builder
.CreateAnd(Rep
,
2877 Builder
.CreateBitCast(CI
->getArgOperand(1), ITy
));
2878 Rep
= Builder
.CreateBitCast(Rep
, FTy
);
2879 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
2880 CI
->getArgOperand(2));
2881 } else if (IsX86
&& (Name
.startswith("avx512.mask.or.") ||
2882 Name
.startswith("avx512.mask.por."))) {
2883 VectorType
*FTy
= cast
<VectorType
>(CI
->getType());
2884 VectorType
*ITy
= VectorType::getInteger(FTy
);
2885 Rep
= Builder
.CreateOr(Builder
.CreateBitCast(CI
->getArgOperand(0), ITy
),
2886 Builder
.CreateBitCast(CI
->getArgOperand(1), ITy
));
2887 Rep
= Builder
.CreateBitCast(Rep
, FTy
);
2888 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
2889 CI
->getArgOperand(2));
2890 } else if (IsX86
&& (Name
.startswith("avx512.mask.xor.") ||
2891 Name
.startswith("avx512.mask.pxor."))) {
2892 VectorType
*FTy
= cast
<VectorType
>(CI
->getType());
2893 VectorType
*ITy
= VectorType::getInteger(FTy
);
2894 Rep
= Builder
.CreateXor(Builder
.CreateBitCast(CI
->getArgOperand(0), ITy
),
2895 Builder
.CreateBitCast(CI
->getArgOperand(1), ITy
));
2896 Rep
= Builder
.CreateBitCast(Rep
, FTy
);
2897 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
2898 CI
->getArgOperand(2));
2899 } else if (IsX86
&& Name
.startswith("avx512.mask.padd.")) {
2900 Rep
= Builder
.CreateAdd(CI
->getArgOperand(0), CI
->getArgOperand(1));
2901 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
2902 CI
->getArgOperand(2));
2903 } else if (IsX86
&& Name
.startswith("avx512.mask.psub.")) {
2904 Rep
= Builder
.CreateSub(CI
->getArgOperand(0), CI
->getArgOperand(1));
2905 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
2906 CI
->getArgOperand(2));
2907 } else if (IsX86
&& Name
.startswith("avx512.mask.pmull.")) {
2908 Rep
= Builder
.CreateMul(CI
->getArgOperand(0), CI
->getArgOperand(1));
2909 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
2910 CI
->getArgOperand(2));
2911 } else if (IsX86
&& Name
.startswith("avx512.mask.add.p")) {
2912 if (Name
.endswith(".512")) {
2914 if (Name
[17] == 's')
2915 IID
= Intrinsic::x86_avx512_add_ps_512
;
2917 IID
= Intrinsic::x86_avx512_add_pd_512
;
2919 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(F
->getParent(), IID
),
2920 { CI
->getArgOperand(0), CI
->getArgOperand(1),
2921 CI
->getArgOperand(4) });
2923 Rep
= Builder
.CreateFAdd(CI
->getArgOperand(0), CI
->getArgOperand(1));
2925 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
2926 CI
->getArgOperand(2));
2927 } else if (IsX86
&& Name
.startswith("avx512.mask.div.p")) {
2928 if (Name
.endswith(".512")) {
2930 if (Name
[17] == 's')
2931 IID
= Intrinsic::x86_avx512_div_ps_512
;
2933 IID
= Intrinsic::x86_avx512_div_pd_512
;
2935 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(F
->getParent(), IID
),
2936 { CI
->getArgOperand(0), CI
->getArgOperand(1),
2937 CI
->getArgOperand(4) });
2939 Rep
= Builder
.CreateFDiv(CI
->getArgOperand(0), CI
->getArgOperand(1));
2941 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
2942 CI
->getArgOperand(2));
2943 } else if (IsX86
&& Name
.startswith("avx512.mask.mul.p")) {
2944 if (Name
.endswith(".512")) {
2946 if (Name
[17] == 's')
2947 IID
= Intrinsic::x86_avx512_mul_ps_512
;
2949 IID
= Intrinsic::x86_avx512_mul_pd_512
;
2951 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(F
->getParent(), IID
),
2952 { CI
->getArgOperand(0), CI
->getArgOperand(1),
2953 CI
->getArgOperand(4) });
2955 Rep
= Builder
.CreateFMul(CI
->getArgOperand(0), CI
->getArgOperand(1));
2957 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
2958 CI
->getArgOperand(2));
2959 } else if (IsX86
&& Name
.startswith("avx512.mask.sub.p")) {
2960 if (Name
.endswith(".512")) {
2962 if (Name
[17] == 's')
2963 IID
= Intrinsic::x86_avx512_sub_ps_512
;
2965 IID
= Intrinsic::x86_avx512_sub_pd_512
;
2967 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(F
->getParent(), IID
),
2968 { CI
->getArgOperand(0), CI
->getArgOperand(1),
2969 CI
->getArgOperand(4) });
2971 Rep
= Builder
.CreateFSub(CI
->getArgOperand(0), CI
->getArgOperand(1));
2973 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
2974 CI
->getArgOperand(2));
2975 } else if (IsX86
&& (Name
.startswith("avx512.mask.max.p") ||
2976 Name
.startswith("avx512.mask.min.p")) &&
2977 Name
.drop_front(18) == ".512") {
2978 bool IsDouble
= Name
[17] == 'd';
2979 bool IsMin
= Name
[13] == 'i';
2980 static const Intrinsic::ID MinMaxTbl
[2][2] = {
2981 { Intrinsic::x86_avx512_max_ps_512
, Intrinsic::x86_avx512_max_pd_512
},
2982 { Intrinsic::x86_avx512_min_ps_512
, Intrinsic::x86_avx512_min_pd_512
}
2984 Intrinsic::ID IID
= MinMaxTbl
[IsMin
][IsDouble
];
2986 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(F
->getParent(), IID
),
2987 { CI
->getArgOperand(0), CI
->getArgOperand(1),
2988 CI
->getArgOperand(4) });
2989 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
2990 CI
->getArgOperand(2));
2991 } else if (IsX86
&& Name
.startswith("avx512.mask.lzcnt.")) {
2992 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(F
->getParent(),
2995 { CI
->getArgOperand(0), Builder
.getInt1(false) });
2996 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(2), Rep
,
2997 CI
->getArgOperand(1));
2998 } else if (IsX86
&& Name
.startswith("avx512.mask.psll")) {
2999 bool IsImmediate
= Name
[16] == 'i' ||
3000 (Name
.size() > 18 && Name
[18] == 'i');
3001 bool IsVariable
= Name
[16] == 'v';
3002 char Size
= Name
[16] == '.' ? Name
[17] :
3003 Name
[17] == '.' ? Name
[18] :
3004 Name
[18] == '.' ? Name
[19] :
3008 if (IsVariable
&& Name
[17] != '.') {
3009 if (Size
== 'd' && Name
[17] == '2') // avx512.mask.psllv2.di
3010 IID
= Intrinsic::x86_avx2_psllv_q
;
3011 else if (Size
== 'd' && Name
[17] == '4') // avx512.mask.psllv4.di
3012 IID
= Intrinsic::x86_avx2_psllv_q_256
;
3013 else if (Size
== 's' && Name
[17] == '4') // avx512.mask.psllv4.si
3014 IID
= Intrinsic::x86_avx2_psllv_d
;
3015 else if (Size
== 's' && Name
[17] == '8') // avx512.mask.psllv8.si
3016 IID
= Intrinsic::x86_avx2_psllv_d_256
;
3017 else if (Size
== 'h' && Name
[17] == '8') // avx512.mask.psllv8.hi
3018 IID
= Intrinsic::x86_avx512_psllv_w_128
;
3019 else if (Size
== 'h' && Name
[17] == '1') // avx512.mask.psllv16.hi
3020 IID
= Intrinsic::x86_avx512_psllv_w_256
;
3021 else if (Name
[17] == '3' && Name
[18] == '2') // avx512.mask.psllv32hi
3022 IID
= Intrinsic::x86_avx512_psllv_w_512
;
3024 llvm_unreachable("Unexpected size");
3025 } else if (Name
.endswith(".128")) {
3026 if (Size
== 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128
3027 IID
= IsImmediate
? Intrinsic::x86_sse2_pslli_d
3028 : Intrinsic::x86_sse2_psll_d
;
3029 else if (Size
== 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.128
3030 IID
= IsImmediate
? Intrinsic::x86_sse2_pslli_q
3031 : Intrinsic::x86_sse2_psll_q
;
3032 else if (Size
== 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.128
3033 IID
= IsImmediate
? Intrinsic::x86_sse2_pslli_w
3034 : Intrinsic::x86_sse2_psll_w
;
3036 llvm_unreachable("Unexpected size");
3037 } else if (Name
.endswith(".256")) {
3038 if (Size
== 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256
3039 IID
= IsImmediate
? Intrinsic::x86_avx2_pslli_d
3040 : Intrinsic::x86_avx2_psll_d
;
3041 else if (Size
== 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.256
3042 IID
= IsImmediate
? Intrinsic::x86_avx2_pslli_q
3043 : Intrinsic::x86_avx2_psll_q
;
3044 else if (Size
== 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.256
3045 IID
= IsImmediate
? Intrinsic::x86_avx2_pslli_w
3046 : Intrinsic::x86_avx2_psll_w
;
3048 llvm_unreachable("Unexpected size");
3050 if (Size
== 'd') // psll.di.512, pslli.d, psll.d, psllv.d.512
3051 IID
= IsImmediate
? Intrinsic::x86_avx512_pslli_d_512
:
3052 IsVariable
? Intrinsic::x86_avx512_psllv_d_512
:
3053 Intrinsic::x86_avx512_psll_d_512
;
3054 else if (Size
== 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.512
3055 IID
= IsImmediate
? Intrinsic::x86_avx512_pslli_q_512
:
3056 IsVariable
? Intrinsic::x86_avx512_psllv_q_512
:
3057 Intrinsic::x86_avx512_psll_q_512
;
3058 else if (Size
== 'w') // psll.wi.512, pslli.w, psll.w
3059 IID
= IsImmediate
? Intrinsic::x86_avx512_pslli_w_512
3060 : Intrinsic::x86_avx512_psll_w_512
;
3062 llvm_unreachable("Unexpected size");
3065 Rep
= UpgradeX86MaskedShift(Builder
, *CI
, IID
);
3066 } else if (IsX86
&& Name
.startswith("avx512.mask.psrl")) {
3067 bool IsImmediate
= Name
[16] == 'i' ||
3068 (Name
.size() > 18 && Name
[18] == 'i');
3069 bool IsVariable
= Name
[16] == 'v';
3070 char Size
= Name
[16] == '.' ? Name
[17] :
3071 Name
[17] == '.' ? Name
[18] :
3072 Name
[18] == '.' ? Name
[19] :
3076 if (IsVariable
&& Name
[17] != '.') {
3077 if (Size
== 'd' && Name
[17] == '2') // avx512.mask.psrlv2.di
3078 IID
= Intrinsic::x86_avx2_psrlv_q
;
3079 else if (Size
== 'd' && Name
[17] == '4') // avx512.mask.psrlv4.di
3080 IID
= Intrinsic::x86_avx2_psrlv_q_256
;
3081 else if (Size
== 's' && Name
[17] == '4') // avx512.mask.psrlv4.si
3082 IID
= Intrinsic::x86_avx2_psrlv_d
;
3083 else if (Size
== 's' && Name
[17] == '8') // avx512.mask.psrlv8.si
3084 IID
= Intrinsic::x86_avx2_psrlv_d_256
;
3085 else if (Size
== 'h' && Name
[17] == '8') // avx512.mask.psrlv8.hi
3086 IID
= Intrinsic::x86_avx512_psrlv_w_128
;
3087 else if (Size
== 'h' && Name
[17] == '1') // avx512.mask.psrlv16.hi
3088 IID
= Intrinsic::x86_avx512_psrlv_w_256
;
3089 else if (Name
[17] == '3' && Name
[18] == '2') // avx512.mask.psrlv32hi
3090 IID
= Intrinsic::x86_avx512_psrlv_w_512
;
3092 llvm_unreachable("Unexpected size");
3093 } else if (Name
.endswith(".128")) {
3094 if (Size
== 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128
3095 IID
= IsImmediate
? Intrinsic::x86_sse2_psrli_d
3096 : Intrinsic::x86_sse2_psrl_d
;
3097 else if (Size
== 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.128
3098 IID
= IsImmediate
? Intrinsic::x86_sse2_psrli_q
3099 : Intrinsic::x86_sse2_psrl_q
;
3100 else if (Size
== 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.128
3101 IID
= IsImmediate
? Intrinsic::x86_sse2_psrli_w
3102 : Intrinsic::x86_sse2_psrl_w
;
3104 llvm_unreachable("Unexpected size");
3105 } else if (Name
.endswith(".256")) {
3106 if (Size
== 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256
3107 IID
= IsImmediate
? Intrinsic::x86_avx2_psrli_d
3108 : Intrinsic::x86_avx2_psrl_d
;
3109 else if (Size
== 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.256
3110 IID
= IsImmediate
? Intrinsic::x86_avx2_psrli_q
3111 : Intrinsic::x86_avx2_psrl_q
;
3112 else if (Size
== 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.256
3113 IID
= IsImmediate
? Intrinsic::x86_avx2_psrli_w
3114 : Intrinsic::x86_avx2_psrl_w
;
3116 llvm_unreachable("Unexpected size");
3118 if (Size
== 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.512
3119 IID
= IsImmediate
? Intrinsic::x86_avx512_psrli_d_512
:
3120 IsVariable
? Intrinsic::x86_avx512_psrlv_d_512
:
3121 Intrinsic::x86_avx512_psrl_d_512
;
3122 else if (Size
== 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.512
3123 IID
= IsImmediate
? Intrinsic::x86_avx512_psrli_q_512
:
3124 IsVariable
? Intrinsic::x86_avx512_psrlv_q_512
:
3125 Intrinsic::x86_avx512_psrl_q_512
;
3126 else if (Size
== 'w') // psrl.wi.512, psrli.w, psrl.w)
3127 IID
= IsImmediate
? Intrinsic::x86_avx512_psrli_w_512
3128 : Intrinsic::x86_avx512_psrl_w_512
;
3130 llvm_unreachable("Unexpected size");
3133 Rep
= UpgradeX86MaskedShift(Builder
, *CI
, IID
);
3134 } else if (IsX86
&& Name
.startswith("avx512.mask.psra")) {
3135 bool IsImmediate
= Name
[16] == 'i' ||
3136 (Name
.size() > 18 && Name
[18] == 'i');
3137 bool IsVariable
= Name
[16] == 'v';
3138 char Size
= Name
[16] == '.' ? Name
[17] :
3139 Name
[17] == '.' ? Name
[18] :
3140 Name
[18] == '.' ? Name
[19] :
3144 if (IsVariable
&& Name
[17] != '.') {
3145 if (Size
== 's' && Name
[17] == '4') // avx512.mask.psrav4.si
3146 IID
= Intrinsic::x86_avx2_psrav_d
;
3147 else if (Size
== 's' && Name
[17] == '8') // avx512.mask.psrav8.si
3148 IID
= Intrinsic::x86_avx2_psrav_d_256
;
3149 else if (Size
== 'h' && Name
[17] == '8') // avx512.mask.psrav8.hi
3150 IID
= Intrinsic::x86_avx512_psrav_w_128
;
3151 else if (Size
== 'h' && Name
[17] == '1') // avx512.mask.psrav16.hi
3152 IID
= Intrinsic::x86_avx512_psrav_w_256
;
3153 else if (Name
[17] == '3' && Name
[18] == '2') // avx512.mask.psrav32hi
3154 IID
= Intrinsic::x86_avx512_psrav_w_512
;
3156 llvm_unreachable("Unexpected size");
3157 } else if (Name
.endswith(".128")) {
3158 if (Size
== 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128
3159 IID
= IsImmediate
? Intrinsic::x86_sse2_psrai_d
3160 : Intrinsic::x86_sse2_psra_d
;
3161 else if (Size
== 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.128
3162 IID
= IsImmediate
? Intrinsic::x86_avx512_psrai_q_128
:
3163 IsVariable
? Intrinsic::x86_avx512_psrav_q_128
:
3164 Intrinsic::x86_avx512_psra_q_128
;
3165 else if (Size
== 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.128
3166 IID
= IsImmediate
? Intrinsic::x86_sse2_psrai_w
3167 : Intrinsic::x86_sse2_psra_w
;
3169 llvm_unreachable("Unexpected size");
3170 } else if (Name
.endswith(".256")) {
3171 if (Size
== 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256
3172 IID
= IsImmediate
? Intrinsic::x86_avx2_psrai_d
3173 : Intrinsic::x86_avx2_psra_d
;
3174 else if (Size
== 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.256
3175 IID
= IsImmediate
? Intrinsic::x86_avx512_psrai_q_256
:
3176 IsVariable
? Intrinsic::x86_avx512_psrav_q_256
:
3177 Intrinsic::x86_avx512_psra_q_256
;
3178 else if (Size
== 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.256
3179 IID
= IsImmediate
? Intrinsic::x86_avx2_psrai_w
3180 : Intrinsic::x86_avx2_psra_w
;
3182 llvm_unreachable("Unexpected size");
3184 if (Size
== 'd') // psra.di.512, psrai.d, psra.d, psrav.d.512
3185 IID
= IsImmediate
? Intrinsic::x86_avx512_psrai_d_512
:
3186 IsVariable
? Intrinsic::x86_avx512_psrav_d_512
:
3187 Intrinsic::x86_avx512_psra_d_512
;
3188 else if (Size
== 'q') // psra.qi.512, psrai.q, psra.q
3189 IID
= IsImmediate
? Intrinsic::x86_avx512_psrai_q_512
:
3190 IsVariable
? Intrinsic::x86_avx512_psrav_q_512
:
3191 Intrinsic::x86_avx512_psra_q_512
;
3192 else if (Size
== 'w') // psra.wi.512, psrai.w, psra.w
3193 IID
= IsImmediate
? Intrinsic::x86_avx512_psrai_w_512
3194 : Intrinsic::x86_avx512_psra_w_512
;
3196 llvm_unreachable("Unexpected size");
3199 Rep
= UpgradeX86MaskedShift(Builder
, *CI
, IID
);
3200 } else if (IsX86
&& Name
.startswith("avx512.mask.move.s")) {
3201 Rep
= upgradeMaskedMove(Builder
, *CI
);
3202 } else if (IsX86
&& Name
.startswith("avx512.cvtmask2")) {
3203 Rep
= UpgradeMaskToInt(Builder
, *CI
);
3204 } else if (IsX86
&& Name
.endswith(".movntdqa")) {
3205 Module
*M
= F
->getParent();
3206 MDNode
*Node
= MDNode::get(
3207 C
, ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C
), 1)));
3209 Value
*Ptr
= CI
->getArgOperand(0);
3211 // Convert the type of the pointer to a pointer to the stored type.
3212 Value
*BC
= Builder
.CreateBitCast(
3213 Ptr
, PointerType::getUnqual(CI
->getType()), "cast");
3214 LoadInst
*LI
= Builder
.CreateAlignedLoad(
3216 Align(CI
->getType()->getPrimitiveSizeInBits().getFixedSize() / 8));
3217 LI
->setMetadata(M
->getMDKindID("nontemporal"), Node
);
3219 } else if (IsX86
&& (Name
.startswith("fma.vfmadd.") ||
3220 Name
.startswith("fma.vfmsub.") ||
3221 Name
.startswith("fma.vfnmadd.") ||
3222 Name
.startswith("fma.vfnmsub."))) {
3223 bool NegMul
= Name
[6] == 'n';
3224 bool NegAcc
= NegMul
? Name
[8] == 's' : Name
[7] == 's';
3225 bool IsScalar
= NegMul
? Name
[12] == 's' : Name
[11] == 's';
3227 Value
*Ops
[] = { CI
->getArgOperand(0), CI
->getArgOperand(1),
3228 CI
->getArgOperand(2) };
3231 Ops
[0] = Builder
.CreateExtractElement(Ops
[0], (uint64_t)0);
3232 Ops
[1] = Builder
.CreateExtractElement(Ops
[1], (uint64_t)0);
3233 Ops
[2] = Builder
.CreateExtractElement(Ops
[2], (uint64_t)0);
3236 if (NegMul
&& !IsScalar
)
3237 Ops
[0] = Builder
.CreateFNeg(Ops
[0]);
3238 if (NegMul
&& IsScalar
)
3239 Ops
[1] = Builder
.CreateFNeg(Ops
[1]);
3241 Ops
[2] = Builder
.CreateFNeg(Ops
[2]);
3243 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(CI
->getModule(),
3249 Rep
= Builder
.CreateInsertElement(CI
->getArgOperand(0), Rep
,
3251 } else if (IsX86
&& Name
.startswith("fma4.vfmadd.s")) {
3252 Value
*Ops
[] = { CI
->getArgOperand(0), CI
->getArgOperand(1),
3253 CI
->getArgOperand(2) };
3255 Ops
[0] = Builder
.CreateExtractElement(Ops
[0], (uint64_t)0);
3256 Ops
[1] = Builder
.CreateExtractElement(Ops
[1], (uint64_t)0);
3257 Ops
[2] = Builder
.CreateExtractElement(Ops
[2], (uint64_t)0);
3259 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(CI
->getModule(),
3264 Rep
= Builder
.CreateInsertElement(Constant::getNullValue(CI
->getType()),
3266 } else if (IsX86
&& (Name
.startswith("avx512.mask.vfmadd.s") ||
3267 Name
.startswith("avx512.maskz.vfmadd.s") ||
3268 Name
.startswith("avx512.mask3.vfmadd.s") ||
3269 Name
.startswith("avx512.mask3.vfmsub.s") ||
3270 Name
.startswith("avx512.mask3.vfnmsub.s"))) {
3271 bool IsMask3
= Name
[11] == '3';
3272 bool IsMaskZ
= Name
[11] == 'z';
3273 // Drop the "avx512.mask." to make it easier.
3274 Name
= Name
.drop_front(IsMask3
|| IsMaskZ
? 13 : 12);
3275 bool NegMul
= Name
[2] == 'n';
3276 bool NegAcc
= NegMul
? Name
[4] == 's' : Name
[3] == 's';
3278 Value
*A
= CI
->getArgOperand(0);
3279 Value
*B
= CI
->getArgOperand(1);
3280 Value
*C
= CI
->getArgOperand(2);
3282 if (NegMul
&& (IsMask3
|| IsMaskZ
))
3283 A
= Builder
.CreateFNeg(A
);
3284 if (NegMul
&& !(IsMask3
|| IsMaskZ
))
3285 B
= Builder
.CreateFNeg(B
);
3287 C
= Builder
.CreateFNeg(C
);
3289 A
= Builder
.CreateExtractElement(A
, (uint64_t)0);
3290 B
= Builder
.CreateExtractElement(B
, (uint64_t)0);
3291 C
= Builder
.CreateExtractElement(C
, (uint64_t)0);
3293 if (!isa
<ConstantInt
>(CI
->getArgOperand(4)) ||
3294 cast
<ConstantInt
>(CI
->getArgOperand(4))->getZExtValue() != 4) {
3295 Value
*Ops
[] = { A
, B
, C
, CI
->getArgOperand(4) };
3298 if (Name
.back() == 'd')
3299 IID
= Intrinsic::x86_avx512_vfmadd_f64
;
3301 IID
= Intrinsic::x86_avx512_vfmadd_f32
;
3302 Function
*FMA
= Intrinsic::getDeclaration(CI
->getModule(), IID
);
3303 Rep
= Builder
.CreateCall(FMA
, Ops
);
3305 Function
*FMA
= Intrinsic::getDeclaration(CI
->getModule(),
3308 Rep
= Builder
.CreateCall(FMA
, { A
, B
, C
});
3311 Value
*PassThru
= IsMaskZ
? Constant::getNullValue(Rep
->getType()) :
3314 // For Mask3 with NegAcc, we need to create a new extractelement that
3315 // avoids the negation above.
3316 if (NegAcc
&& IsMask3
)
3317 PassThru
= Builder
.CreateExtractElement(CI
->getArgOperand(2),
3320 Rep
= EmitX86ScalarSelect(Builder
, CI
->getArgOperand(3),
3322 Rep
= Builder
.CreateInsertElement(CI
->getArgOperand(IsMask3
? 2 : 0),
3324 } else if (IsX86
&& (Name
.startswith("avx512.mask.vfmadd.p") ||
3325 Name
.startswith("avx512.mask.vfnmadd.p") ||
3326 Name
.startswith("avx512.mask.vfnmsub.p") ||
3327 Name
.startswith("avx512.mask3.vfmadd.p") ||
3328 Name
.startswith("avx512.mask3.vfmsub.p") ||
3329 Name
.startswith("avx512.mask3.vfnmsub.p") ||
3330 Name
.startswith("avx512.maskz.vfmadd.p"))) {
3331 bool IsMask3
= Name
[11] == '3';
3332 bool IsMaskZ
= Name
[11] == 'z';
3333 // Drop the "avx512.mask." to make it easier.
3334 Name
= Name
.drop_front(IsMask3
|| IsMaskZ
? 13 : 12);
3335 bool NegMul
= Name
[2] == 'n';
3336 bool NegAcc
= NegMul
? Name
[4] == 's' : Name
[3] == 's';
3338 Value
*A
= CI
->getArgOperand(0);
3339 Value
*B
= CI
->getArgOperand(1);
3340 Value
*C
= CI
->getArgOperand(2);
3342 if (NegMul
&& (IsMask3
|| IsMaskZ
))
3343 A
= Builder
.CreateFNeg(A
);
3344 if (NegMul
&& !(IsMask3
|| IsMaskZ
))
3345 B
= Builder
.CreateFNeg(B
);
3347 C
= Builder
.CreateFNeg(C
);
3349 if (CI
->getNumArgOperands() == 5 &&
3350 (!isa
<ConstantInt
>(CI
->getArgOperand(4)) ||
3351 cast
<ConstantInt
>(CI
->getArgOperand(4))->getZExtValue() != 4)) {
3353 // Check the character before ".512" in string.
3354 if (Name
[Name
.size()-5] == 's')
3355 IID
= Intrinsic::x86_avx512_vfmadd_ps_512
;
3357 IID
= Intrinsic::x86_avx512_vfmadd_pd_512
;
3359 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(F
->getParent(), IID
),
3360 { A
, B
, C
, CI
->getArgOperand(4) });
3362 Function
*FMA
= Intrinsic::getDeclaration(CI
->getModule(),
3365 Rep
= Builder
.CreateCall(FMA
, { A
, B
, C
});
3368 Value
*PassThru
= IsMaskZ
? llvm::Constant::getNullValue(CI
->getType()) :
3369 IsMask3
? CI
->getArgOperand(2) :
3370 CI
->getArgOperand(0);
3372 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
, PassThru
);
3373 } else if (IsX86
&& Name
.startswith("fma.vfmsubadd.p")) {
3374 unsigned VecWidth
= CI
->getType()->getPrimitiveSizeInBits();
3375 unsigned EltWidth
= CI
->getType()->getScalarSizeInBits();
3377 if (VecWidth
== 128 && EltWidth
== 32)
3378 IID
= Intrinsic::x86_fma_vfmaddsub_ps
;
3379 else if (VecWidth
== 256 && EltWidth
== 32)
3380 IID
= Intrinsic::x86_fma_vfmaddsub_ps_256
;
3381 else if (VecWidth
== 128 && EltWidth
== 64)
3382 IID
= Intrinsic::x86_fma_vfmaddsub_pd
;
3383 else if (VecWidth
== 256 && EltWidth
== 64)
3384 IID
= Intrinsic::x86_fma_vfmaddsub_pd_256
;
3386 llvm_unreachable("Unexpected intrinsic");
3388 Value
*Ops
[] = { CI
->getArgOperand(0), CI
->getArgOperand(1),
3389 CI
->getArgOperand(2) };
3390 Ops
[2] = Builder
.CreateFNeg(Ops
[2]);
3391 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(F
->getParent(), IID
),
3393 } else if (IsX86
&& (Name
.startswith("avx512.mask.vfmaddsub.p") ||
3394 Name
.startswith("avx512.mask3.vfmaddsub.p") ||
3395 Name
.startswith("avx512.maskz.vfmaddsub.p") ||
3396 Name
.startswith("avx512.mask3.vfmsubadd.p"))) {
3397 bool IsMask3
= Name
[11] == '3';
3398 bool IsMaskZ
= Name
[11] == 'z';
3399 // Drop the "avx512.mask." to make it easier.
3400 Name
= Name
.drop_front(IsMask3
|| IsMaskZ
? 13 : 12);
3401 bool IsSubAdd
= Name
[3] == 's';
3402 if (CI
->getNumArgOperands() == 5) {
3404 // Check the character before ".512" in string.
3405 if (Name
[Name
.size()-5] == 's')
3406 IID
= Intrinsic::x86_avx512_vfmaddsub_ps_512
;
3408 IID
= Intrinsic::x86_avx512_vfmaddsub_pd_512
;
3410 Value
*Ops
[] = { CI
->getArgOperand(0), CI
->getArgOperand(1),
3411 CI
->getArgOperand(2), CI
->getArgOperand(4) };
3413 Ops
[2] = Builder
.CreateFNeg(Ops
[2]);
3415 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(F
->getParent(), IID
),
3418 int NumElts
= cast
<FixedVectorType
>(CI
->getType())->getNumElements();
3420 Value
*Ops
[] = { CI
->getArgOperand(0), CI
->getArgOperand(1),
3421 CI
->getArgOperand(2) };
3423 Function
*FMA
= Intrinsic::getDeclaration(CI
->getModule(), Intrinsic::fma
,
3425 Value
*Odd
= Builder
.CreateCall(FMA
, Ops
);
3426 Ops
[2] = Builder
.CreateFNeg(Ops
[2]);
3427 Value
*Even
= Builder
.CreateCall(FMA
, Ops
);
3430 std::swap(Even
, Odd
);
3432 SmallVector
<int, 32> Idxs(NumElts
);
3433 for (int i
= 0; i
!= NumElts
; ++i
)
3434 Idxs
[i
] = i
+ (i
% 2) * NumElts
;
3436 Rep
= Builder
.CreateShuffleVector(Even
, Odd
, Idxs
);
3439 Value
*PassThru
= IsMaskZ
? llvm::Constant::getNullValue(CI
->getType()) :
3440 IsMask3
? CI
->getArgOperand(2) :
3441 CI
->getArgOperand(0);
3443 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
, PassThru
);
3444 } else if (IsX86
&& (Name
.startswith("avx512.mask.pternlog.") ||
3445 Name
.startswith("avx512.maskz.pternlog."))) {
3446 bool ZeroMask
= Name
[11] == 'z';
3447 unsigned VecWidth
= CI
->getType()->getPrimitiveSizeInBits();
3448 unsigned EltWidth
= CI
->getType()->getScalarSizeInBits();
3450 if (VecWidth
== 128 && EltWidth
== 32)
3451 IID
= Intrinsic::x86_avx512_pternlog_d_128
;
3452 else if (VecWidth
== 256 && EltWidth
== 32)
3453 IID
= Intrinsic::x86_avx512_pternlog_d_256
;
3454 else if (VecWidth
== 512 && EltWidth
== 32)
3455 IID
= Intrinsic::x86_avx512_pternlog_d_512
;
3456 else if (VecWidth
== 128 && EltWidth
== 64)
3457 IID
= Intrinsic::x86_avx512_pternlog_q_128
;
3458 else if (VecWidth
== 256 && EltWidth
== 64)
3459 IID
= Intrinsic::x86_avx512_pternlog_q_256
;
3460 else if (VecWidth
== 512 && EltWidth
== 64)
3461 IID
= Intrinsic::x86_avx512_pternlog_q_512
;
3463 llvm_unreachable("Unexpected intrinsic");
3465 Value
*Args
[] = { CI
->getArgOperand(0) , CI
->getArgOperand(1),
3466 CI
->getArgOperand(2), CI
->getArgOperand(3) };
3467 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(CI
->getModule(), IID
),
3469 Value
*PassThru
= ZeroMask
? ConstantAggregateZero::get(CI
->getType())
3470 : CI
->getArgOperand(0);
3471 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(4), Rep
, PassThru
);
3472 } else if (IsX86
&& (Name
.startswith("avx512.mask.vpmadd52") ||
3473 Name
.startswith("avx512.maskz.vpmadd52"))) {
3474 bool ZeroMask
= Name
[11] == 'z';
3475 bool High
= Name
[20] == 'h' || Name
[21] == 'h';
3476 unsigned VecWidth
= CI
->getType()->getPrimitiveSizeInBits();
3478 if (VecWidth
== 128 && !High
)
3479 IID
= Intrinsic::x86_avx512_vpmadd52l_uq_128
;
3480 else if (VecWidth
== 256 && !High
)
3481 IID
= Intrinsic::x86_avx512_vpmadd52l_uq_256
;
3482 else if (VecWidth
== 512 && !High
)
3483 IID
= Intrinsic::x86_avx512_vpmadd52l_uq_512
;
3484 else if (VecWidth
== 128 && High
)
3485 IID
= Intrinsic::x86_avx512_vpmadd52h_uq_128
;
3486 else if (VecWidth
== 256 && High
)
3487 IID
= Intrinsic::x86_avx512_vpmadd52h_uq_256
;
3488 else if (VecWidth
== 512 && High
)
3489 IID
= Intrinsic::x86_avx512_vpmadd52h_uq_512
;
3491 llvm_unreachable("Unexpected intrinsic");
3493 Value
*Args
[] = { CI
->getArgOperand(0) , CI
->getArgOperand(1),
3494 CI
->getArgOperand(2) };
3495 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(CI
->getModule(), IID
),
3497 Value
*PassThru
= ZeroMask
? ConstantAggregateZero::get(CI
->getType())
3498 : CI
->getArgOperand(0);
3499 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
, PassThru
);
3500 } else if (IsX86
&& (Name
.startswith("avx512.mask.vpermi2var.") ||
3501 Name
.startswith("avx512.mask.vpermt2var.") ||
3502 Name
.startswith("avx512.maskz.vpermt2var."))) {
3503 bool ZeroMask
= Name
[11] == 'z';
3504 bool IndexForm
= Name
[17] == 'i';
3505 Rep
= UpgradeX86VPERMT2Intrinsics(Builder
, *CI
, ZeroMask
, IndexForm
);
3506 } else if (IsX86
&& (Name
.startswith("avx512.mask.vpdpbusd.") ||
3507 Name
.startswith("avx512.maskz.vpdpbusd.") ||
3508 Name
.startswith("avx512.mask.vpdpbusds.") ||
3509 Name
.startswith("avx512.maskz.vpdpbusds."))) {
3510 bool ZeroMask
= Name
[11] == 'z';
3511 bool IsSaturating
= Name
[ZeroMask
? 21 : 20] == 's';
3512 unsigned VecWidth
= CI
->getType()->getPrimitiveSizeInBits();
3514 if (VecWidth
== 128 && !IsSaturating
)
3515 IID
= Intrinsic::x86_avx512_vpdpbusd_128
;
3516 else if (VecWidth
== 256 && !IsSaturating
)
3517 IID
= Intrinsic::x86_avx512_vpdpbusd_256
;
3518 else if (VecWidth
== 512 && !IsSaturating
)
3519 IID
= Intrinsic::x86_avx512_vpdpbusd_512
;
3520 else if (VecWidth
== 128 && IsSaturating
)
3521 IID
= Intrinsic::x86_avx512_vpdpbusds_128
;
3522 else if (VecWidth
== 256 && IsSaturating
)
3523 IID
= Intrinsic::x86_avx512_vpdpbusds_256
;
3524 else if (VecWidth
== 512 && IsSaturating
)
3525 IID
= Intrinsic::x86_avx512_vpdpbusds_512
;
3527 llvm_unreachable("Unexpected intrinsic");
3529 Value
*Args
[] = { CI
->getArgOperand(0), CI
->getArgOperand(1),
3530 CI
->getArgOperand(2) };
3531 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(CI
->getModule(), IID
),
3533 Value
*PassThru
= ZeroMask
? ConstantAggregateZero::get(CI
->getType())
3534 : CI
->getArgOperand(0);
3535 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
, PassThru
);
3536 } else if (IsX86
&& (Name
.startswith("avx512.mask.vpdpwssd.") ||
3537 Name
.startswith("avx512.maskz.vpdpwssd.") ||
3538 Name
.startswith("avx512.mask.vpdpwssds.") ||
3539 Name
.startswith("avx512.maskz.vpdpwssds."))) {
3540 bool ZeroMask
= Name
[11] == 'z';
3541 bool IsSaturating
= Name
[ZeroMask
? 21 : 20] == 's';
3542 unsigned VecWidth
= CI
->getType()->getPrimitiveSizeInBits();
3544 if (VecWidth
== 128 && !IsSaturating
)
3545 IID
= Intrinsic::x86_avx512_vpdpwssd_128
;
3546 else if (VecWidth
== 256 && !IsSaturating
)
3547 IID
= Intrinsic::x86_avx512_vpdpwssd_256
;
3548 else if (VecWidth
== 512 && !IsSaturating
)
3549 IID
= Intrinsic::x86_avx512_vpdpwssd_512
;
3550 else if (VecWidth
== 128 && IsSaturating
)
3551 IID
= Intrinsic::x86_avx512_vpdpwssds_128
;
3552 else if (VecWidth
== 256 && IsSaturating
)
3553 IID
= Intrinsic::x86_avx512_vpdpwssds_256
;
3554 else if (VecWidth
== 512 && IsSaturating
)
3555 IID
= Intrinsic::x86_avx512_vpdpwssds_512
;
3557 llvm_unreachable("Unexpected intrinsic");
3559 Value
*Args
[] = { CI
->getArgOperand(0), CI
->getArgOperand(1),
3560 CI
->getArgOperand(2) };
3561 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(CI
->getModule(), IID
),
3563 Value
*PassThru
= ZeroMask
? ConstantAggregateZero::get(CI
->getType())
3564 : CI
->getArgOperand(0);
3565 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
, PassThru
);
3566 } else if (IsX86
&& (Name
== "addcarryx.u32" || Name
== "addcarryx.u64" ||
3567 Name
== "addcarry.u32" || Name
== "addcarry.u64" ||
3568 Name
== "subborrow.u32" || Name
== "subborrow.u64")) {
3570 if (Name
[0] == 'a' && Name
.back() == '2')
3571 IID
= Intrinsic::x86_addcarry_32
;
3572 else if (Name
[0] == 'a' && Name
.back() == '4')
3573 IID
= Intrinsic::x86_addcarry_64
;
3574 else if (Name
[0] == 's' && Name
.back() == '2')
3575 IID
= Intrinsic::x86_subborrow_32
;
3576 else if (Name
[0] == 's' && Name
.back() == '4')
3577 IID
= Intrinsic::x86_subborrow_64
;
3579 llvm_unreachable("Unexpected intrinsic");
3581 // Make a call with 3 operands.
3582 Value
*Args
[] = { CI
->getArgOperand(0), CI
->getArgOperand(1),
3583 CI
->getArgOperand(2)};
3584 Value
*NewCall
= Builder
.CreateCall(
3585 Intrinsic::getDeclaration(CI
->getModule(), IID
),
3588 // Extract the second result and store it.
3589 Value
*Data
= Builder
.CreateExtractValue(NewCall
, 1);
3590 // Cast the pointer to the right type.
3591 Value
*Ptr
= Builder
.CreateBitCast(CI
->getArgOperand(3),
3592 llvm::PointerType::getUnqual(Data
->getType()));
3593 Builder
.CreateAlignedStore(Data
, Ptr
, Align(1));
3594 // Replace the original call result with the first result of the new call.
3595 Value
*CF
= Builder
.CreateExtractValue(NewCall
, 0);
3597 CI
->replaceAllUsesWith(CF
);
3599 } else if (IsX86
&& Name
.startswith("avx512.mask.") &&
3600 upgradeAVX512MaskToSelect(Name
, Builder
, *CI
, Rep
)) {
3601 // Rep will be updated by the call in the condition.
3602 } else if (IsNVVM
&& (Name
== "abs.i" || Name
== "abs.ll")) {
3603 Value
*Arg
= CI
->getArgOperand(0);
3604 Value
*Neg
= Builder
.CreateNeg(Arg
, "neg");
3605 Value
*Cmp
= Builder
.CreateICmpSGE(
3606 Arg
, llvm::Constant::getNullValue(Arg
->getType()), "abs.cond");
3607 Rep
= Builder
.CreateSelect(Cmp
, Arg
, Neg
, "abs");
3608 } else if (IsNVVM
&& (Name
.startswith("atomic.load.add.f32.p") ||
3609 Name
.startswith("atomic.load.add.f64.p"))) {
3610 Value
*Ptr
= CI
->getArgOperand(0);
3611 Value
*Val
= CI
->getArgOperand(1);
3612 Rep
= Builder
.CreateAtomicRMW(AtomicRMWInst::FAdd
, Ptr
, Val
, MaybeAlign(),
3613 AtomicOrdering::SequentiallyConsistent
);
3614 } else if (IsNVVM
&& (Name
== "max.i" || Name
== "max.ll" ||
3615 Name
== "max.ui" || Name
== "max.ull")) {
3616 Value
*Arg0
= CI
->getArgOperand(0);
3617 Value
*Arg1
= CI
->getArgOperand(1);
3618 Value
*Cmp
= Name
.endswith(".ui") || Name
.endswith(".ull")
3619 ? Builder
.CreateICmpUGE(Arg0
, Arg1
, "max.cond")
3620 : Builder
.CreateICmpSGE(Arg0
, Arg1
, "max.cond");
3621 Rep
= Builder
.CreateSelect(Cmp
, Arg0
, Arg1
, "max");
3622 } else if (IsNVVM
&& (Name
== "min.i" || Name
== "min.ll" ||
3623 Name
== "min.ui" || Name
== "min.ull")) {
3624 Value
*Arg0
= CI
->getArgOperand(0);
3625 Value
*Arg1
= CI
->getArgOperand(1);
3626 Value
*Cmp
= Name
.endswith(".ui") || Name
.endswith(".ull")
3627 ? Builder
.CreateICmpULE(Arg0
, Arg1
, "min.cond")
3628 : Builder
.CreateICmpSLE(Arg0
, Arg1
, "min.cond");
3629 Rep
= Builder
.CreateSelect(Cmp
, Arg0
, Arg1
, "min");
3630 } else if (IsNVVM
&& Name
== "clz.ll") {
3631 // llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 and returns an i64.
3632 Value
*Arg
= CI
->getArgOperand(0);
3633 Value
*Ctlz
= Builder
.CreateCall(
3634 Intrinsic::getDeclaration(F
->getParent(), Intrinsic::ctlz
,
3636 {Arg
, Builder
.getFalse()}, "ctlz");
3637 Rep
= Builder
.CreateTrunc(Ctlz
, Builder
.getInt32Ty(), "ctlz.trunc");
3638 } else if (IsNVVM
&& Name
== "popc.ll") {
3639 // llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 and returns an
3641 Value
*Arg
= CI
->getArgOperand(0);
3642 Value
*Popc
= Builder
.CreateCall(
3643 Intrinsic::getDeclaration(F
->getParent(), Intrinsic::ctpop
,
3646 Rep
= Builder
.CreateTrunc(Popc
, Builder
.getInt32Ty(), "ctpop.trunc");
3647 } else if (IsNVVM
&& Name
== "h2f") {
3648 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(
3649 F
->getParent(), Intrinsic::convert_from_fp16
,
3650 {Builder
.getFloatTy()}),
3651 CI
->getArgOperand(0), "h2f");
3653 llvm_unreachable("Unknown function for CallInst upgrade.");
3657 CI
->replaceAllUsesWith(Rep
);
3658 CI
->eraseFromParent();
3662 const auto &DefaultCase
= [&NewFn
, &CI
]() -> void {
3663 // Handle generic mangling change, but nothing else
3665 (CI
->getCalledFunction()->getName() != NewFn
->getName()) &&
3666 "Unknown function for CallInst upgrade and isn't just a name change");
3667 CI
->setCalledFunction(NewFn
);
3669 CallInst
*NewCall
= nullptr;
3670 switch (NewFn
->getIntrinsicID()) {
3675 case Intrinsic::arm_neon_vld1
:
3676 case Intrinsic::arm_neon_vld2
:
3677 case Intrinsic::arm_neon_vld3
:
3678 case Intrinsic::arm_neon_vld4
:
3679 case Intrinsic::arm_neon_vld2lane
:
3680 case Intrinsic::arm_neon_vld3lane
:
3681 case Intrinsic::arm_neon_vld4lane
:
3682 case Intrinsic::arm_neon_vst1
:
3683 case Intrinsic::arm_neon_vst2
:
3684 case Intrinsic::arm_neon_vst3
:
3685 case Intrinsic::arm_neon_vst4
:
3686 case Intrinsic::arm_neon_vst2lane
:
3687 case Intrinsic::arm_neon_vst3lane
:
3688 case Intrinsic::arm_neon_vst4lane
: {
3689 SmallVector
<Value
*, 4> Args(CI
->arg_operands().begin(),
3690 CI
->arg_operands().end());
3691 NewCall
= Builder
.CreateCall(NewFn
, Args
);
3695 case Intrinsic::arm_neon_bfdot
:
3696 case Intrinsic::arm_neon_bfmmla
:
3697 case Intrinsic::arm_neon_bfmlalb
:
3698 case Intrinsic::arm_neon_bfmlalt
:
3699 case Intrinsic::aarch64_neon_bfdot
:
3700 case Intrinsic::aarch64_neon_bfmmla
:
3701 case Intrinsic::aarch64_neon_bfmlalb
:
3702 case Intrinsic::aarch64_neon_bfmlalt
: {
3703 SmallVector
<Value
*, 3> Args
;
3704 assert(CI
->getNumArgOperands() == 3 &&
3705 "Mismatch between function args and call args");
3706 size_t OperandWidth
=
3707 CI
->getArgOperand(1)->getType()->getPrimitiveSizeInBits();
3708 assert((OperandWidth
== 64 || OperandWidth
== 128) &&
3709 "Unexpected operand width");
3710 Type
*NewTy
= FixedVectorType::get(Type::getBFloatTy(C
), OperandWidth
/ 16);
3711 auto Iter
= CI
->arg_operands().begin();
3712 Args
.push_back(*Iter
++);
3713 Args
.push_back(Builder
.CreateBitCast(*Iter
++, NewTy
));
3714 Args
.push_back(Builder
.CreateBitCast(*Iter
++, NewTy
));
3715 NewCall
= Builder
.CreateCall(NewFn
, Args
);
3719 case Intrinsic::bitreverse
:
3720 NewCall
= Builder
.CreateCall(NewFn
, {CI
->getArgOperand(0)});
3723 case Intrinsic::ctlz
:
3724 case Intrinsic::cttz
:
3725 assert(CI
->getNumArgOperands() == 1 &&
3726 "Mismatch between function args and call args");
3728 Builder
.CreateCall(NewFn
, {CI
->getArgOperand(0), Builder
.getFalse()});
3731 case Intrinsic::objectsize
: {
3732 Value
*NullIsUnknownSize
= CI
->getNumArgOperands() == 2
3733 ? Builder
.getFalse()
3734 : CI
->getArgOperand(2);
3736 CI
->getNumArgOperands() < 4 ? Builder
.getFalse() : CI
->getArgOperand(3);
3737 NewCall
= Builder
.CreateCall(
3738 NewFn
, {CI
->getArgOperand(0), CI
->getArgOperand(1), NullIsUnknownSize
, Dynamic
});
3742 case Intrinsic::ctpop
:
3743 NewCall
= Builder
.CreateCall(NewFn
, {CI
->getArgOperand(0)});
3746 case Intrinsic::convert_from_fp16
:
3747 NewCall
= Builder
.CreateCall(NewFn
, {CI
->getArgOperand(0)});
3750 case Intrinsic::dbg_value
:
3751 // Upgrade from the old version that had an extra offset argument.
3752 assert(CI
->getNumArgOperands() == 4);
3753 // Drop nonzero offsets instead of attempting to upgrade them.
3754 if (auto *Offset
= dyn_cast_or_null
<Constant
>(CI
->getArgOperand(1)))
3755 if (Offset
->isZeroValue()) {
3756 NewCall
= Builder
.CreateCall(
3758 {CI
->getArgOperand(0), CI
->getArgOperand(2), CI
->getArgOperand(3)});
3761 CI
->eraseFromParent();
3764 case Intrinsic::ptr_annotation
:
3765 // Upgrade from versions that lacked the annotation attribute argument.
3766 assert(CI
->getNumArgOperands() == 4 &&
3767 "Before LLVM 12.0 this intrinsic took four arguments");
3768 // Create a new call with an added null annotation attribute argument.
3769 NewCall
= Builder
.CreateCall(
3771 {CI
->getArgOperand(0), CI
->getArgOperand(1), CI
->getArgOperand(2),
3772 CI
->getArgOperand(3), Constant::getNullValue(Builder
.getInt8PtrTy())});
3773 NewCall
->takeName(CI
);
3774 CI
->replaceAllUsesWith(NewCall
);
3775 CI
->eraseFromParent();
3778 case Intrinsic::var_annotation
:
3779 // Upgrade from versions that lacked the annotation attribute argument.
3780 assert(CI
->getNumArgOperands() == 4 &&
3781 "Before LLVM 12.0 this intrinsic took four arguments");
3782 // Create a new call with an added null annotation attribute argument.
3783 NewCall
= Builder
.CreateCall(
3785 {CI
->getArgOperand(0), CI
->getArgOperand(1), CI
->getArgOperand(2),
3786 CI
->getArgOperand(3), Constant::getNullValue(Builder
.getInt8PtrTy())});
3787 CI
->eraseFromParent();
3790 case Intrinsic::x86_xop_vfrcz_ss
:
3791 case Intrinsic::x86_xop_vfrcz_sd
:
3792 NewCall
= Builder
.CreateCall(NewFn
, {CI
->getArgOperand(1)});
3795 case Intrinsic::x86_xop_vpermil2pd
:
3796 case Intrinsic::x86_xop_vpermil2ps
:
3797 case Intrinsic::x86_xop_vpermil2pd_256
:
3798 case Intrinsic::x86_xop_vpermil2ps_256
: {
3799 SmallVector
<Value
*, 4> Args(CI
->arg_operands().begin(),
3800 CI
->arg_operands().end());
3801 VectorType
*FltIdxTy
= cast
<VectorType
>(Args
[2]->getType());
3802 VectorType
*IntIdxTy
= VectorType::getInteger(FltIdxTy
);
3803 Args
[2] = Builder
.CreateBitCast(Args
[2], IntIdxTy
);
3804 NewCall
= Builder
.CreateCall(NewFn
, Args
);
3808 case Intrinsic::x86_sse41_ptestc
:
3809 case Intrinsic::x86_sse41_ptestz
:
3810 case Intrinsic::x86_sse41_ptestnzc
: {
3811 // The arguments for these intrinsics used to be v4f32, and changed
3812 // to v2i64. This is purely a nop, since those are bitwise intrinsics.
3813 // So, the only thing required is a bitcast for both arguments.
3814 // First, check the arguments have the old type.
3815 Value
*Arg0
= CI
->getArgOperand(0);
3816 if (Arg0
->getType() != FixedVectorType::get(Type::getFloatTy(C
), 4))
3819 // Old intrinsic, add bitcasts
3820 Value
*Arg1
= CI
->getArgOperand(1);
3822 auto *NewVecTy
= FixedVectorType::get(Type::getInt64Ty(C
), 2);
3824 Value
*BC0
= Builder
.CreateBitCast(Arg0
, NewVecTy
, "cast");
3825 Value
*BC1
= Builder
.CreateBitCast(Arg1
, NewVecTy
, "cast");
3827 NewCall
= Builder
.CreateCall(NewFn
, {BC0
, BC1
});
3831 case Intrinsic::x86_rdtscp
: {
3832 // This used to take 1 arguments. If we have no arguments, it is already
3834 if (CI
->getNumOperands() == 0)
3837 NewCall
= Builder
.CreateCall(NewFn
);
3838 // Extract the second result and store it.
3839 Value
*Data
= Builder
.CreateExtractValue(NewCall
, 1);
3840 // Cast the pointer to the right type.
3841 Value
*Ptr
= Builder
.CreateBitCast(CI
->getArgOperand(0),
3842 llvm::PointerType::getUnqual(Data
->getType()));
3843 Builder
.CreateAlignedStore(Data
, Ptr
, Align(1));
3844 // Replace the original call result with the first result of the new call.
3845 Value
*TSC
= Builder
.CreateExtractValue(NewCall
, 0);
3847 NewCall
->takeName(CI
);
3848 CI
->replaceAllUsesWith(TSC
);
3849 CI
->eraseFromParent();
3853 case Intrinsic::x86_sse41_insertps
:
3854 case Intrinsic::x86_sse41_dppd
:
3855 case Intrinsic::x86_sse41_dpps
:
3856 case Intrinsic::x86_sse41_mpsadbw
:
3857 case Intrinsic::x86_avx_dp_ps_256
:
3858 case Intrinsic::x86_avx2_mpsadbw
: {
3859 // Need to truncate the last argument from i32 to i8 -- this argument models
3860 // an inherently 8-bit immediate operand to these x86 instructions.
3861 SmallVector
<Value
*, 4> Args(CI
->arg_operands().begin(),
3862 CI
->arg_operands().end());
3864 // Replace the last argument with a trunc.
3865 Args
.back() = Builder
.CreateTrunc(Args
.back(), Type::getInt8Ty(C
), "trunc");
3866 NewCall
= Builder
.CreateCall(NewFn
, Args
);
3870 case Intrinsic::x86_avx512_mask_cmp_pd_128
:
3871 case Intrinsic::x86_avx512_mask_cmp_pd_256
:
3872 case Intrinsic::x86_avx512_mask_cmp_pd_512
:
3873 case Intrinsic::x86_avx512_mask_cmp_ps_128
:
3874 case Intrinsic::x86_avx512_mask_cmp_ps_256
:
3875 case Intrinsic::x86_avx512_mask_cmp_ps_512
: {
3876 SmallVector
<Value
*, 4> Args(CI
->arg_operands().begin(),
3877 CI
->arg_operands().end());
3879 cast
<FixedVectorType
>(Args
[0]->getType())->getNumElements();
3880 Args
[3] = getX86MaskVec(Builder
, Args
[3], NumElts
);
3882 NewCall
= Builder
.CreateCall(NewFn
, Args
);
3883 Value
*Res
= ApplyX86MaskOn1BitsVec(Builder
, NewCall
, nullptr);
3885 NewCall
->takeName(CI
);
3886 CI
->replaceAllUsesWith(Res
);
3887 CI
->eraseFromParent();
3891 case Intrinsic::thread_pointer
: {
3892 NewCall
= Builder
.CreateCall(NewFn
, {});
3896 case Intrinsic::invariant_start
:
3897 case Intrinsic::invariant_end
: {
3898 SmallVector
<Value
*, 4> Args(CI
->arg_operands().begin(),
3899 CI
->arg_operands().end());
3900 NewCall
= Builder
.CreateCall(NewFn
, Args
);
3903 case Intrinsic::masked_load
:
3904 case Intrinsic::masked_store
:
3905 case Intrinsic::masked_gather
:
3906 case Intrinsic::masked_scatter
: {
3907 SmallVector
<Value
*, 4> Args(CI
->arg_operands().begin(),
3908 CI
->arg_operands().end());
3909 NewCall
= Builder
.CreateCall(NewFn
, Args
);
3910 NewCall
->copyMetadata(*CI
);
3914 case Intrinsic::memcpy
:
3915 case Intrinsic::memmove
:
3916 case Intrinsic::memset
: {
3917 // We have to make sure that the call signature is what we're expecting.
3918 // We only want to change the old signatures by removing the alignment arg:
3919 // @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i32, i1)
3920 // -> @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i1)
3921 // @llvm.memset...(i8*, i8, i[32|64], i32, i1)
3922 // -> @llvm.memset...(i8*, i8, i[32|64], i1)
3923 // Note: i8*'s in the above can be any pointer type
3924 if (CI
->getNumArgOperands() != 5) {
3928 // Remove alignment argument (3), and add alignment attributes to the
3929 // dest/src pointers.
3930 Value
*Args
[4] = {CI
->getArgOperand(0), CI
->getArgOperand(1),
3931 CI
->getArgOperand(2), CI
->getArgOperand(4)};
3932 NewCall
= Builder
.CreateCall(NewFn
, Args
);
3933 auto *MemCI
= cast
<MemIntrinsic
>(NewCall
);
3934 // All mem intrinsics support dest alignment.
3935 const ConstantInt
*Align
= cast
<ConstantInt
>(CI
->getArgOperand(3));
3936 MemCI
->setDestAlignment(Align
->getMaybeAlignValue());
3937 // Memcpy/Memmove also support source alignment.
3938 if (auto *MTI
= dyn_cast
<MemTransferInst
>(MemCI
))
3939 MTI
->setSourceAlignment(Align
->getMaybeAlignValue());
3943 assert(NewCall
&& "Should have either set this variable or returned through "
3944 "the default case");
3945 NewCall
->takeName(CI
);
3946 CI
->replaceAllUsesWith(NewCall
);
3947 CI
->eraseFromParent();
3950 void llvm::UpgradeCallsToIntrinsic(Function
*F
) {
3951 assert(F
&& "Illegal attempt to upgrade a non-existent intrinsic.");
3953 // Check if this function should be upgraded and get the replacement function
3956 if (UpgradeIntrinsicFunction(F
, NewFn
)) {
3957 // Replace all users of the old function with the new function or new
3958 // instructions. This is not a range loop because the call is deleted.
3959 for (User
*U
: make_early_inc_range(F
->users()))
3960 if (CallInst
*CI
= dyn_cast
<CallInst
>(U
))
3961 UpgradeIntrinsicCall(CI
, NewFn
);
3963 // Remove old function, no longer used, from the module.
3964 F
->eraseFromParent();
3968 MDNode
*llvm::UpgradeTBAANode(MDNode
&MD
) {
3969 // Check if the tag uses struct-path aware TBAA format.
3970 if (isa
<MDNode
>(MD
.getOperand(0)) && MD
.getNumOperands() >= 3)
3973 auto &Context
= MD
.getContext();
3974 if (MD
.getNumOperands() == 3) {
3975 Metadata
*Elts
[] = {MD
.getOperand(0), MD
.getOperand(1)};
3976 MDNode
*ScalarType
= MDNode::get(Context
, Elts
);
3977 // Create a MDNode <ScalarType, ScalarType, offset 0, const>
3978 Metadata
*Elts2
[] = {ScalarType
, ScalarType
,
3979 ConstantAsMetadata::get(
3980 Constant::getNullValue(Type::getInt64Ty(Context
))),
3982 return MDNode::get(Context
, Elts2
);
3984 // Create a MDNode <MD, MD, offset 0>
3985 Metadata
*Elts
[] = {&MD
, &MD
, ConstantAsMetadata::get(Constant::getNullValue(
3986 Type::getInt64Ty(Context
)))};
3987 return MDNode::get(Context
, Elts
);
3990 Instruction
*llvm::UpgradeBitCastInst(unsigned Opc
, Value
*V
, Type
*DestTy
,
3991 Instruction
*&Temp
) {
3992 if (Opc
!= Instruction::BitCast
)
3996 Type
*SrcTy
= V
->getType();
3997 if (SrcTy
->isPtrOrPtrVectorTy() && DestTy
->isPtrOrPtrVectorTy() &&
3998 SrcTy
->getPointerAddressSpace() != DestTy
->getPointerAddressSpace()) {
3999 LLVMContext
&Context
= V
->getContext();
4001 // We have no information about target data layout, so we assume that
4002 // the maximum pointer size is 64bit.
4003 Type
*MidTy
= Type::getInt64Ty(Context
);
4004 Temp
= CastInst::Create(Instruction::PtrToInt
, V
, MidTy
);
4006 return CastInst::Create(Instruction::IntToPtr
, Temp
, DestTy
);
4012 Value
*llvm::UpgradeBitCastExpr(unsigned Opc
, Constant
*C
, Type
*DestTy
) {
4013 if (Opc
!= Instruction::BitCast
)
4016 Type
*SrcTy
= C
->getType();
4017 if (SrcTy
->isPtrOrPtrVectorTy() && DestTy
->isPtrOrPtrVectorTy() &&
4018 SrcTy
->getPointerAddressSpace() != DestTy
->getPointerAddressSpace()) {
4019 LLVMContext
&Context
= C
->getContext();
4021 // We have no information about target data layout, so we assume that
4022 // the maximum pointer size is 64bit.
4023 Type
*MidTy
= Type::getInt64Ty(Context
);
4025 return ConstantExpr::getIntToPtr(ConstantExpr::getPtrToInt(C
, MidTy
),
4032 /// Check the debug info version number, if it is out-dated, drop the debug
4033 /// info. Return true if module is modified.
4034 bool llvm::UpgradeDebugInfo(Module
&M
) {
4035 unsigned Version
= getDebugMetadataVersionFromModule(M
);
4036 if (Version
== DEBUG_METADATA_VERSION
) {
4037 bool BrokenDebugInfo
= false;
4038 if (verifyModule(M
, &llvm::errs(), &BrokenDebugInfo
))
4039 report_fatal_error("Broken module found, compilation aborted!");
4040 if (!BrokenDebugInfo
)
4041 // Everything is ok.
4044 // Diagnose malformed debug info.
4045 DiagnosticInfoIgnoringInvalidDebugMetadata
Diag(M
);
4046 M
.getContext().diagnose(Diag
);
4049 bool Modified
= StripDebugInfo(M
);
4050 if (Modified
&& Version
!= DEBUG_METADATA_VERSION
) {
4051 // Diagnose a version mismatch.
4052 DiagnosticInfoDebugMetadataVersion
DiagVersion(M
, Version
);
4053 M
.getContext().diagnose(DiagVersion
);
4058 /// This checks for objc retain release marker which should be upgraded. It
4059 /// returns true if module is modified.
4060 static bool UpgradeRetainReleaseMarker(Module
&M
) {
4061 bool Changed
= false;
4062 const char *MarkerKey
= "clang.arc.retainAutoreleasedReturnValueMarker";
4063 NamedMDNode
*ModRetainReleaseMarker
= M
.getNamedMetadata(MarkerKey
);
4064 if (ModRetainReleaseMarker
) {
4065 MDNode
*Op
= ModRetainReleaseMarker
->getOperand(0);
4067 MDString
*ID
= dyn_cast_or_null
<MDString
>(Op
->getOperand(0));
4069 SmallVector
<StringRef
, 4> ValueComp
;
4070 ID
->getString().split(ValueComp
, "#");
4071 if (ValueComp
.size() == 2) {
4072 std::string NewValue
= ValueComp
[0].str() + ";" + ValueComp
[1].str();
4073 ID
= MDString::get(M
.getContext(), NewValue
);
4075 M
.addModuleFlag(Module::Error
, MarkerKey
, ID
);
4076 M
.eraseNamedMetadata(ModRetainReleaseMarker
);
4084 void llvm::UpgradeARCRuntime(Module
&M
) {
4085 // This lambda converts normal function calls to ARC runtime functions to
4087 auto UpgradeToIntrinsic
= [&](const char *OldFunc
,
4088 llvm::Intrinsic::ID IntrinsicFunc
) {
4089 Function
*Fn
= M
.getFunction(OldFunc
);
4094 Function
*NewFn
= llvm::Intrinsic::getDeclaration(&M
, IntrinsicFunc
);
4096 for (User
*U
: make_early_inc_range(Fn
->users())) {
4097 CallInst
*CI
= dyn_cast
<CallInst
>(U
);
4098 if (!CI
|| CI
->getCalledFunction() != Fn
)
4101 IRBuilder
<> Builder(CI
->getParent(), CI
->getIterator());
4102 FunctionType
*NewFuncTy
= NewFn
->getFunctionType();
4103 SmallVector
<Value
*, 2> Args
;
4105 // Don't upgrade the intrinsic if it's not valid to bitcast the return
4106 // value to the return type of the old function.
4107 if (NewFuncTy
->getReturnType() != CI
->getType() &&
4108 !CastInst::castIsValid(Instruction::BitCast
, CI
,
4109 NewFuncTy
->getReturnType()))
4112 bool InvalidCast
= false;
4114 for (unsigned I
= 0, E
= CI
->getNumArgOperands(); I
!= E
; ++I
) {
4115 Value
*Arg
= CI
->getArgOperand(I
);
4117 // Bitcast argument to the parameter type of the new function if it's
4118 // not a variadic argument.
4119 if (I
< NewFuncTy
->getNumParams()) {
4120 // Don't upgrade the intrinsic if it's not valid to bitcast the argument
4121 // to the parameter type of the new function.
4122 if (!CastInst::castIsValid(Instruction::BitCast
, Arg
,
4123 NewFuncTy
->getParamType(I
))) {
4127 Arg
= Builder
.CreateBitCast(Arg
, NewFuncTy
->getParamType(I
));
4129 Args
.push_back(Arg
);
4135 // Create a call instruction that calls the new function.
4136 CallInst
*NewCall
= Builder
.CreateCall(NewFuncTy
, NewFn
, Args
);
4137 NewCall
->setTailCallKind(cast
<CallInst
>(CI
)->getTailCallKind());
4138 NewCall
->takeName(CI
);
4140 // Bitcast the return value back to the type of the old call.
4141 Value
*NewRetVal
= Builder
.CreateBitCast(NewCall
, CI
->getType());
4143 if (!CI
->use_empty())
4144 CI
->replaceAllUsesWith(NewRetVal
);
4145 CI
->eraseFromParent();
4148 if (Fn
->use_empty())
4149 Fn
->eraseFromParent();
4152 // Unconditionally convert a call to "clang.arc.use" to a call to
4153 // "llvm.objc.clang.arc.use".
4154 UpgradeToIntrinsic("clang.arc.use", llvm::Intrinsic::objc_clang_arc_use
);
4156 // Upgrade the retain release marker. If there is no need to upgrade
4157 // the marker, that means either the module is already new enough to contain
4158 // new intrinsics or it is not ARC. There is no need to upgrade runtime call.
4159 if (!UpgradeRetainReleaseMarker(M
))
4162 std::pair
<const char *, llvm::Intrinsic::ID
> RuntimeFuncs
[] = {
4163 {"objc_autorelease", llvm::Intrinsic::objc_autorelease
},
4164 {"objc_autoreleasePoolPop", llvm::Intrinsic::objc_autoreleasePoolPop
},
4165 {"objc_autoreleasePoolPush", llvm::Intrinsic::objc_autoreleasePoolPush
},
4166 {"objc_autoreleaseReturnValue",
4167 llvm::Intrinsic::objc_autoreleaseReturnValue
},
4168 {"objc_copyWeak", llvm::Intrinsic::objc_copyWeak
},
4169 {"objc_destroyWeak", llvm::Intrinsic::objc_destroyWeak
},
4170 {"objc_initWeak", llvm::Intrinsic::objc_initWeak
},
4171 {"objc_loadWeak", llvm::Intrinsic::objc_loadWeak
},
4172 {"objc_loadWeakRetained", llvm::Intrinsic::objc_loadWeakRetained
},
4173 {"objc_moveWeak", llvm::Intrinsic::objc_moveWeak
},
4174 {"objc_release", llvm::Intrinsic::objc_release
},
4175 {"objc_retain", llvm::Intrinsic::objc_retain
},
4176 {"objc_retainAutorelease", llvm::Intrinsic::objc_retainAutorelease
},
4177 {"objc_retainAutoreleaseReturnValue",
4178 llvm::Intrinsic::objc_retainAutoreleaseReturnValue
},
4179 {"objc_retainAutoreleasedReturnValue",
4180 llvm::Intrinsic::objc_retainAutoreleasedReturnValue
},
4181 {"objc_retainBlock", llvm::Intrinsic::objc_retainBlock
},
4182 {"objc_storeStrong", llvm::Intrinsic::objc_storeStrong
},
4183 {"objc_storeWeak", llvm::Intrinsic::objc_storeWeak
},
4184 {"objc_unsafeClaimAutoreleasedReturnValue",
4185 llvm::Intrinsic::objc_unsafeClaimAutoreleasedReturnValue
},
4186 {"objc_retainedObject", llvm::Intrinsic::objc_retainedObject
},
4187 {"objc_unretainedObject", llvm::Intrinsic::objc_unretainedObject
},
4188 {"objc_unretainedPointer", llvm::Intrinsic::objc_unretainedPointer
},
4189 {"objc_retain_autorelease", llvm::Intrinsic::objc_retain_autorelease
},
4190 {"objc_sync_enter", llvm::Intrinsic::objc_sync_enter
},
4191 {"objc_sync_exit", llvm::Intrinsic::objc_sync_exit
},
4192 {"objc_arc_annotation_topdown_bbstart",
4193 llvm::Intrinsic::objc_arc_annotation_topdown_bbstart
},
4194 {"objc_arc_annotation_topdown_bbend",
4195 llvm::Intrinsic::objc_arc_annotation_topdown_bbend
},
4196 {"objc_arc_annotation_bottomup_bbstart",
4197 llvm::Intrinsic::objc_arc_annotation_bottomup_bbstart
},
4198 {"objc_arc_annotation_bottomup_bbend",
4199 llvm::Intrinsic::objc_arc_annotation_bottomup_bbend
}};
4201 for (auto &I
: RuntimeFuncs
)
4202 UpgradeToIntrinsic(I
.first
, I
.second
);
4205 bool llvm::UpgradeModuleFlags(Module
&M
) {
4206 NamedMDNode
*ModFlags
= M
.getModuleFlagsMetadata();
4210 bool HasObjCFlag
= false, HasClassProperties
= false, Changed
= false;
4211 bool HasSwiftVersionFlag
= false;
4212 uint8_t SwiftMajorVersion
, SwiftMinorVersion
;
4213 uint32_t SwiftABIVersion
;
4214 auto Int8Ty
= Type::getInt8Ty(M
.getContext());
4215 auto Int32Ty
= Type::getInt32Ty(M
.getContext());
4217 for (unsigned I
= 0, E
= ModFlags
->getNumOperands(); I
!= E
; ++I
) {
4218 MDNode
*Op
= ModFlags
->getOperand(I
);
4219 if (Op
->getNumOperands() != 3)
4221 MDString
*ID
= dyn_cast_or_null
<MDString
>(Op
->getOperand(1));
4224 if (ID
->getString() == "Objective-C Image Info Version")
4226 if (ID
->getString() == "Objective-C Class Properties")
4227 HasClassProperties
= true;
4228 // Upgrade PIC/PIE Module Flags. The module flag behavior for these two
4229 // field was Error and now they are Max.
4230 if (ID
->getString() == "PIC Level" || ID
->getString() == "PIE Level") {
4231 if (auto *Behavior
=
4232 mdconst::dyn_extract_or_null
<ConstantInt
>(Op
->getOperand(0))) {
4233 if (Behavior
->getLimitedValue() == Module::Error
) {
4234 Type
*Int32Ty
= Type::getInt32Ty(M
.getContext());
4235 Metadata
*Ops
[3] = {
4236 ConstantAsMetadata::get(ConstantInt::get(Int32Ty
, Module::Max
)),
4237 MDString::get(M
.getContext(), ID
->getString()),
4239 ModFlags
->setOperand(I
, MDNode::get(M
.getContext(), Ops
));
4244 // Upgrade Objective-C Image Info Section. Removed the whitespce in the
4245 // section name so that llvm-lto will not complain about mismatching
4246 // module flags that is functionally the same.
4247 if (ID
->getString() == "Objective-C Image Info Section") {
4248 if (auto *Value
= dyn_cast_or_null
<MDString
>(Op
->getOperand(2))) {
4249 SmallVector
<StringRef
, 4> ValueComp
;
4250 Value
->getString().split(ValueComp
, " ");
4251 if (ValueComp
.size() != 1) {
4252 std::string NewValue
;
4253 for (auto &S
: ValueComp
)
4254 NewValue
+= S
.str();
4255 Metadata
*Ops
[3] = {Op
->getOperand(0), Op
->getOperand(1),
4256 MDString::get(M
.getContext(), NewValue
)};
4257 ModFlags
->setOperand(I
, MDNode::get(M
.getContext(), Ops
));
4263 // IRUpgrader turns a i32 type "Objective-C Garbage Collection" into i8 value.
4264 // If the higher bits are set, it adds new module flag for swift info.
4265 if (ID
->getString() == "Objective-C Garbage Collection") {
4266 auto Md
= dyn_cast
<ConstantAsMetadata
>(Op
->getOperand(2));
4268 assert(Md
->getValue() && "Expected non-empty metadata");
4269 auto Type
= Md
->getValue()->getType();
4272 unsigned Val
= Md
->getValue()->getUniqueInteger().getZExtValue();
4273 if ((Val
& 0xff) != Val
) {
4274 HasSwiftVersionFlag
= true;
4275 SwiftABIVersion
= (Val
& 0xff00) >> 8;
4276 SwiftMajorVersion
= (Val
& 0xff000000) >> 24;
4277 SwiftMinorVersion
= (Val
& 0xff0000) >> 16;
4279 Metadata
*Ops
[3] = {
4280 ConstantAsMetadata::get(ConstantInt::get(Int32Ty
,Module::Error
)),
4282 ConstantAsMetadata::get(ConstantInt::get(Int8Ty
,Val
& 0xff))};
4283 ModFlags
->setOperand(I
, MDNode::get(M
.getContext(), Ops
));
4289 // "Objective-C Class Properties" is recently added for Objective-C. We
4290 // upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module
4291 // flag of value 0, so we can correclty downgrade this flag when trying to
4292 // link an ObjC bitcode without this module flag with an ObjC bitcode with
4293 // this module flag.
4294 if (HasObjCFlag
&& !HasClassProperties
) {
4295 M
.addModuleFlag(llvm::Module::Override
, "Objective-C Class Properties",
4300 if (HasSwiftVersionFlag
) {
4301 M
.addModuleFlag(Module::Error
, "Swift ABI Version",
4303 M
.addModuleFlag(Module::Error
, "Swift Major Version",
4304 ConstantInt::get(Int8Ty
, SwiftMajorVersion
));
4305 M
.addModuleFlag(Module::Error
, "Swift Minor Version",
4306 ConstantInt::get(Int8Ty
, SwiftMinorVersion
));
4313 void llvm::UpgradeSectionAttributes(Module
&M
) {
4314 auto TrimSpaces
= [](StringRef Section
) -> std::string
{
4315 SmallVector
<StringRef
, 5> Components
;
4316 Section
.split(Components
, ',');
4318 SmallString
<32> Buffer
;
4319 raw_svector_ostream
OS(Buffer
);
4321 for (auto Component
: Components
)
4322 OS
<< ',' << Component
.trim();
4324 return std::string(OS
.str().substr(1));
4327 for (auto &GV
: M
.globals()) {
4328 if (!GV
.hasSection())
4331 StringRef Section
= GV
.getSection();
4333 if (!Section
.startswith("__DATA, __objc_catlist"))
4336 // __DATA, __objc_catlist, regular, no_dead_strip
4337 // __DATA,__objc_catlist,regular,no_dead_strip
4338 GV
.setSection(TrimSpaces(Section
));
4343 // Prior to LLVM 10.0, the strictfp attribute could be used on individual
4344 // callsites within a function that did not also have the strictfp attribute.
4345 // Since 10.0, if strict FP semantics are needed within a function, the
4346 // function must have the strictfp attribute and all calls within the function
4347 // must also have the strictfp attribute. This latter restriction is
4348 // necessary to prevent unwanted libcall simplification when a function is
4349 // being cloned (such as for inlining).
4351 // The "dangling" strictfp attribute usage was only used to prevent constant
4352 // folding and other libcall simplification. The nobuiltin attribute on the
4353 // callsite has the same effect.
4354 struct StrictFPUpgradeVisitor
: public InstVisitor
<StrictFPUpgradeVisitor
> {
4355 StrictFPUpgradeVisitor() {}
4357 void visitCallBase(CallBase
&Call
) {
4358 if (!Call
.isStrictFP())
4360 if (isa
<ConstrainedFPIntrinsic
>(&Call
))
4362 // If we get here, the caller doesn't have the strictfp attribute
4363 // but this callsite does. Replace the strictfp attribute with nobuiltin.
4364 Call
.removeFnAttr(Attribute::StrictFP
);
4365 Call
.addFnAttr(Attribute::NoBuiltin
);
4370 void llvm::UpgradeFunctionAttributes(Function
&F
) {
4371 // If a function definition doesn't have the strictfp attribute,
4372 // convert any callsite strictfp attributes to nobuiltin.
4373 if (!F
.isDeclaration() && !F
.hasFnAttribute(Attribute::StrictFP
)) {
4374 StrictFPUpgradeVisitor SFPV
;
4378 if (F
.getCallingConv() == CallingConv::X86_INTR
&&
4379 !F
.arg_empty() && !F
.hasParamAttribute(0, Attribute::ByVal
)) {
4380 Type
*ByValTy
= cast
<PointerType
>(F
.getArg(0)->getType())->getElementType();
4381 Attribute NewAttr
= Attribute::getWithByValType(F
.getContext(), ByValTy
);
4382 F
.addParamAttr(0, NewAttr
);
4385 // Remove all incompatibile attributes from function.
4386 F
.removeRetAttrs(AttributeFuncs::typeIncompatible(F
.getReturnType()));
4387 for (auto &Arg
: F
.args())
4388 Arg
.removeAttrs(AttributeFuncs::typeIncompatible(Arg
.getType()));
4391 static bool isOldLoopArgument(Metadata
*MD
) {
4392 auto *T
= dyn_cast_or_null
<MDTuple
>(MD
);
4395 if (T
->getNumOperands() < 1)
4397 auto *S
= dyn_cast_or_null
<MDString
>(T
->getOperand(0));
4400 return S
->getString().startswith("llvm.vectorizer.");
4403 static MDString
*upgradeLoopTag(LLVMContext
&C
, StringRef OldTag
) {
4404 StringRef OldPrefix
= "llvm.vectorizer.";
4405 assert(OldTag
.startswith(OldPrefix
) && "Expected old prefix");
4407 if (OldTag
== "llvm.vectorizer.unroll")
4408 return MDString::get(C
, "llvm.loop.interleave.count");
4410 return MDString::get(
4411 C
, (Twine("llvm.loop.vectorize.") + OldTag
.drop_front(OldPrefix
.size()))
4415 static Metadata
*upgradeLoopArgument(Metadata
*MD
) {
4416 auto *T
= dyn_cast_or_null
<MDTuple
>(MD
);
4419 if (T
->getNumOperands() < 1)
4421 auto *OldTag
= dyn_cast_or_null
<MDString
>(T
->getOperand(0));
4424 if (!OldTag
->getString().startswith("llvm.vectorizer."))
4427 // This has an old tag. Upgrade it.
4428 SmallVector
<Metadata
*, 8> Ops
;
4429 Ops
.reserve(T
->getNumOperands());
4430 Ops
.push_back(upgradeLoopTag(T
->getContext(), OldTag
->getString()));
4431 for (unsigned I
= 1, E
= T
->getNumOperands(); I
!= E
; ++I
)
4432 Ops
.push_back(T
->getOperand(I
));
4434 return MDTuple::get(T
->getContext(), Ops
);
4437 MDNode
*llvm::upgradeInstructionLoopAttachment(MDNode
&N
) {
4438 auto *T
= dyn_cast
<MDTuple
>(&N
);
4442 if (none_of(T
->operands(), isOldLoopArgument
))
4445 SmallVector
<Metadata
*, 8> Ops
;
4446 Ops
.reserve(T
->getNumOperands());
4447 for (Metadata
*MD
: T
->operands())
4448 Ops
.push_back(upgradeLoopArgument(MD
));
4450 return MDTuple::get(T
->getContext(), Ops
);
4453 std::string
llvm::UpgradeDataLayoutString(StringRef DL
, StringRef TT
) {
4455 // For AMDGPU we uprgrade older DataLayouts to include the default globals
4456 // address space of 1.
4457 if (T
.isAMDGPU() && !DL
.contains("-G") && !DL
.startswith("G")) {
4458 return DL
.empty() ? std::string("G1") : (DL
+ "-G1").str();
4461 std::string AddrSpaces
= "-p270:32:32-p271:32:32-p272:64:64";
4462 // If X86, and the datalayout matches the expected format, add pointer size
4463 // address spaces to the datalayout.
4464 if (!T
.isX86() || DL
.contains(AddrSpaces
))
4465 return std::string(DL
);
4467 SmallVector
<StringRef
, 4> Groups
;
4468 Regex
R("(e-m:[a-z](-p:32:32)?)(-[if]64:.*$)");
4469 if (!R
.match(DL
, &Groups
))
4470 return std::string(DL
);
4472 return (Groups
[1] + AddrSpaces
+ Groups
[3]).str();
4475 void llvm::UpgradeAttributes(AttrBuilder
&B
) {
4476 StringRef FramePointer
;
4477 if (B
.contains("no-frame-pointer-elim")) {
4478 // The value can be "true" or "false".
4479 for (const auto &I
: B
.td_attrs())
4480 if (I
.first
== "no-frame-pointer-elim")
4481 FramePointer
= I
.second
== "true" ? "all" : "none";
4482 B
.removeAttribute("no-frame-pointer-elim");
4484 if (B
.contains("no-frame-pointer-elim-non-leaf")) {
4485 // The value is ignored. "no-frame-pointer-elim"="true" takes priority.
4486 if (FramePointer
!= "all")
4487 FramePointer
= "non-leaf";
4488 B
.removeAttribute("no-frame-pointer-elim-non-leaf");
4490 if (!FramePointer
.empty())
4491 B
.addAttribute("frame-pointer", FramePointer
);
4493 if (B
.contains("null-pointer-is-valid")) {
4494 // The value can be "true" or "false".
4495 bool NullPointerIsValid
= false;
4496 for (const auto &I
: B
.td_attrs())
4497 if (I
.first
== "null-pointer-is-valid")
4498 NullPointerIsValid
= I
.second
== "true";
4499 B
.removeAttribute("null-pointer-is-valid");
4500 if (NullPointerIsValid
)
4501 B
.addAttribute(Attribute::NullPointerIsValid
);