1 //===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements the auto-upgrade helper functions.
10 // This is where deprecated IR intrinsics and other IR features are updated to
11 // current specifications.
13 //===----------------------------------------------------------------------===//
15 #include "llvm/IR/AutoUpgrade.h"
16 #include "llvm/ADT/StringRef.h"
17 #include "llvm/ADT/StringSwitch.h"
18 #include "llvm/BinaryFormat/Dwarf.h"
19 #include "llvm/IR/AttributeMask.h"
20 #include "llvm/IR/Constants.h"
21 #include "llvm/IR/DebugInfo.h"
22 #include "llvm/IR/DebugInfoMetadata.h"
23 #include "llvm/IR/DiagnosticInfo.h"
24 #include "llvm/IR/Function.h"
25 #include "llvm/IR/IRBuilder.h"
26 #include "llvm/IR/InstVisitor.h"
27 #include "llvm/IR/Instruction.h"
28 #include "llvm/IR/IntrinsicInst.h"
29 #include "llvm/IR/Intrinsics.h"
30 #include "llvm/IR/IntrinsicsAArch64.h"
31 #include "llvm/IR/IntrinsicsARM.h"
32 #include "llvm/IR/IntrinsicsNVPTX.h"
33 #include "llvm/IR/IntrinsicsRISCV.h"
34 #include "llvm/IR/IntrinsicsWebAssembly.h"
35 #include "llvm/IR/IntrinsicsX86.h"
36 #include "llvm/IR/LLVMContext.h"
37 #include "llvm/IR/Metadata.h"
38 #include "llvm/IR/Module.h"
39 #include "llvm/IR/Verifier.h"
40 #include "llvm/Support/CommandLine.h"
41 #include "llvm/Support/ErrorHandling.h"
42 #include "llvm/Support/Regex.h"
43 #include "llvm/TargetParser/Triple.h"
49 DisableAutoUpgradeDebugInfo("disable-auto-upgrade-debug-info",
50 cl::desc("Disable autoupgrade of debug info"));
52 static void rename(GlobalValue
*GV
) { GV
->setName(GV
->getName() + ".old"); }
54 // Upgrade the declarations of the SSE4.1 ptest intrinsics whose arguments have
55 // changed their type from v4f32 to v2i64.
56 static bool UpgradePTESTIntrinsic(Function
* F
, Intrinsic::ID IID
,
58 // Check whether this is an old version of the function, which received
60 Type
*Arg0Type
= F
->getFunctionType()->getParamType(0);
61 if (Arg0Type
!= FixedVectorType::get(Type::getFloatTy(F
->getContext()), 4))
64 // Yes, it's old, replace it with new version.
66 NewFn
= Intrinsic::getDeclaration(F
->getParent(), IID
);
70 // Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
71 // arguments have changed their type from i32 to i8.
72 static bool UpgradeX86IntrinsicsWith8BitMask(Function
*F
, Intrinsic::ID IID
,
74 // Check that the last argument is an i32.
75 Type
*LastArgType
= F
->getFunctionType()->getParamType(
76 F
->getFunctionType()->getNumParams() - 1);
77 if (!LastArgType
->isIntegerTy(32))
80 // Move this function aside and map down.
82 NewFn
= Intrinsic::getDeclaration(F
->getParent(), IID
);
86 // Upgrade the declaration of fp compare intrinsics that change return type
87 // from scalar to vXi1 mask.
88 static bool UpgradeX86MaskedFPCompare(Function
*F
, Intrinsic::ID IID
,
90 // Check if the return type is a vector.
91 if (F
->getReturnType()->isVectorTy())
95 NewFn
= Intrinsic::getDeclaration(F
->getParent(), IID
);
99 static bool UpgradeX86BF16Intrinsic(Function
*F
, Intrinsic::ID IID
,
101 if (F
->getReturnType()->getScalarType()->isBFloatTy())
105 NewFn
= Intrinsic::getDeclaration(F
->getParent(), IID
);
109 static bool UpgradeX86BF16DPIntrinsic(Function
*F
, Intrinsic::ID IID
,
111 if (F
->getFunctionType()->getParamType(1)->getScalarType()->isBFloatTy())
115 NewFn
= Intrinsic::getDeclaration(F
->getParent(), IID
);
119 static bool ShouldUpgradeX86Intrinsic(Function
*F
, StringRef Name
) {
120 // All of the intrinsics matches below should be marked with which llvm
121 // version started autoupgrading them. At some point in the future we would
122 // like to use this information to remove upgrade code for some older
123 // intrinsics. It is currently undecided how we will determine that future
125 if (Name
== "addcarryx.u32" || // Added in 8.0
126 Name
== "addcarryx.u64" || // Added in 8.0
127 Name
== "addcarry.u32" || // Added in 8.0
128 Name
== "addcarry.u64" || // Added in 8.0
129 Name
== "subborrow.u32" || // Added in 8.0
130 Name
== "subborrow.u64" || // Added in 8.0
131 Name
.startswith("sse2.padds.") || // Added in 8.0
132 Name
.startswith("sse2.psubs.") || // Added in 8.0
133 Name
.startswith("sse2.paddus.") || // Added in 8.0
134 Name
.startswith("sse2.psubus.") || // Added in 8.0
135 Name
.startswith("avx2.padds.") || // Added in 8.0
136 Name
.startswith("avx2.psubs.") || // Added in 8.0
137 Name
.startswith("avx2.paddus.") || // Added in 8.0
138 Name
.startswith("avx2.psubus.") || // Added in 8.0
139 Name
.startswith("avx512.padds.") || // Added in 8.0
140 Name
.startswith("avx512.psubs.") || // Added in 8.0
141 Name
.startswith("avx512.mask.padds.") || // Added in 8.0
142 Name
.startswith("avx512.mask.psubs.") || // Added in 8.0
143 Name
.startswith("avx512.mask.paddus.") || // Added in 8.0
144 Name
.startswith("avx512.mask.psubus.") || // Added in 8.0
145 Name
=="ssse3.pabs.b.128" || // Added in 6.0
146 Name
=="ssse3.pabs.w.128" || // Added in 6.0
147 Name
=="ssse3.pabs.d.128" || // Added in 6.0
148 Name
.startswith("fma4.vfmadd.s") || // Added in 7.0
149 Name
.startswith("fma.vfmadd.") || // Added in 7.0
150 Name
.startswith("fma.vfmsub.") || // Added in 7.0
151 Name
.startswith("fma.vfmsubadd.") || // Added in 7.0
152 Name
.startswith("fma.vfnmadd.") || // Added in 7.0
153 Name
.startswith("fma.vfnmsub.") || // Added in 7.0
154 Name
.startswith("avx512.mask.vfmadd.") || // Added in 7.0
155 Name
.startswith("avx512.mask.vfnmadd.") || // Added in 7.0
156 Name
.startswith("avx512.mask.vfnmsub.") || // Added in 7.0
157 Name
.startswith("avx512.mask3.vfmadd.") || // Added in 7.0
158 Name
.startswith("avx512.maskz.vfmadd.") || // Added in 7.0
159 Name
.startswith("avx512.mask3.vfmsub.") || // Added in 7.0
160 Name
.startswith("avx512.mask3.vfnmsub.") || // Added in 7.0
161 Name
.startswith("avx512.mask.vfmaddsub.") || // Added in 7.0
162 Name
.startswith("avx512.maskz.vfmaddsub.") || // Added in 7.0
163 Name
.startswith("avx512.mask3.vfmaddsub.") || // Added in 7.0
164 Name
.startswith("avx512.mask3.vfmsubadd.") || // Added in 7.0
165 Name
.startswith("avx512.mask.shuf.i") || // Added in 6.0
166 Name
.startswith("avx512.mask.shuf.f") || // Added in 6.0
167 Name
.startswith("avx512.kunpck") || //added in 6.0
168 Name
.startswith("avx2.pabs.") || // Added in 6.0
169 Name
.startswith("avx512.mask.pabs.") || // Added in 6.0
170 Name
.startswith("avx512.broadcastm") || // Added in 6.0
171 Name
== "sse.sqrt.ss" || // Added in 7.0
172 Name
== "sse2.sqrt.sd" || // Added in 7.0
173 Name
.startswith("avx512.mask.sqrt.p") || // Added in 7.0
174 Name
.startswith("avx.sqrt.p") || // Added in 7.0
175 Name
.startswith("sse2.sqrt.p") || // Added in 7.0
176 Name
.startswith("sse.sqrt.p") || // Added in 7.0
177 Name
.startswith("avx512.mask.pbroadcast") || // Added in 6.0
178 Name
.startswith("sse2.pcmpeq.") || // Added in 3.1
179 Name
.startswith("sse2.pcmpgt.") || // Added in 3.1
180 Name
.startswith("avx2.pcmpeq.") || // Added in 3.1
181 Name
.startswith("avx2.pcmpgt.") || // Added in 3.1
182 Name
.startswith("avx512.mask.pcmpeq.") || // Added in 3.9
183 Name
.startswith("avx512.mask.pcmpgt.") || // Added in 3.9
184 Name
.startswith("avx.vperm2f128.") || // Added in 6.0
185 Name
== "avx2.vperm2i128" || // Added in 6.0
186 Name
== "sse.add.ss" || // Added in 4.0
187 Name
== "sse2.add.sd" || // Added in 4.0
188 Name
== "sse.sub.ss" || // Added in 4.0
189 Name
== "sse2.sub.sd" || // Added in 4.0
190 Name
== "sse.mul.ss" || // Added in 4.0
191 Name
== "sse2.mul.sd" || // Added in 4.0
192 Name
== "sse.div.ss" || // Added in 4.0
193 Name
== "sse2.div.sd" || // Added in 4.0
194 Name
== "sse41.pmaxsb" || // Added in 3.9
195 Name
== "sse2.pmaxs.w" || // Added in 3.9
196 Name
== "sse41.pmaxsd" || // Added in 3.9
197 Name
== "sse2.pmaxu.b" || // Added in 3.9
198 Name
== "sse41.pmaxuw" || // Added in 3.9
199 Name
== "sse41.pmaxud" || // Added in 3.9
200 Name
== "sse41.pminsb" || // Added in 3.9
201 Name
== "sse2.pmins.w" || // Added in 3.9
202 Name
== "sse41.pminsd" || // Added in 3.9
203 Name
== "sse2.pminu.b" || // Added in 3.9
204 Name
== "sse41.pminuw" || // Added in 3.9
205 Name
== "sse41.pminud" || // Added in 3.9
206 Name
== "avx512.kand.w" || // Added in 7.0
207 Name
== "avx512.kandn.w" || // Added in 7.0
208 Name
== "avx512.knot.w" || // Added in 7.0
209 Name
== "avx512.kor.w" || // Added in 7.0
210 Name
== "avx512.kxor.w" || // Added in 7.0
211 Name
== "avx512.kxnor.w" || // Added in 7.0
212 Name
== "avx512.kortestc.w" || // Added in 7.0
213 Name
== "avx512.kortestz.w" || // Added in 7.0
214 Name
.startswith("avx512.mask.pshuf.b.") || // Added in 4.0
215 Name
.startswith("avx2.pmax") || // Added in 3.9
216 Name
.startswith("avx2.pmin") || // Added in 3.9
217 Name
.startswith("avx512.mask.pmax") || // Added in 4.0
218 Name
.startswith("avx512.mask.pmin") || // Added in 4.0
219 Name
.startswith("avx2.vbroadcast") || // Added in 3.8
220 Name
.startswith("avx2.pbroadcast") || // Added in 3.8
221 Name
.startswith("avx.vpermil.") || // Added in 3.1
222 Name
.startswith("sse2.pshuf") || // Added in 3.9
223 Name
.startswith("avx512.pbroadcast") || // Added in 3.9
224 Name
.startswith("avx512.mask.broadcast.s") || // Added in 3.9
225 Name
.startswith("avx512.mask.movddup") || // Added in 3.9
226 Name
.startswith("avx512.mask.movshdup") || // Added in 3.9
227 Name
.startswith("avx512.mask.movsldup") || // Added in 3.9
228 Name
.startswith("avx512.mask.pshuf.d.") || // Added in 3.9
229 Name
.startswith("avx512.mask.pshufl.w.") || // Added in 3.9
230 Name
.startswith("avx512.mask.pshufh.w.") || // Added in 3.9
231 Name
.startswith("avx512.mask.shuf.p") || // Added in 4.0
232 Name
.startswith("avx512.mask.vpermil.p") || // Added in 3.9
233 Name
.startswith("avx512.mask.perm.df.") || // Added in 3.9
234 Name
.startswith("avx512.mask.perm.di.") || // Added in 3.9
235 Name
.startswith("avx512.mask.punpckl") || // Added in 3.9
236 Name
.startswith("avx512.mask.punpckh") || // Added in 3.9
237 Name
.startswith("avx512.mask.unpckl.") || // Added in 3.9
238 Name
.startswith("avx512.mask.unpckh.") || // Added in 3.9
239 Name
.startswith("avx512.mask.pand.") || // Added in 3.9
240 Name
.startswith("avx512.mask.pandn.") || // Added in 3.9
241 Name
.startswith("avx512.mask.por.") || // Added in 3.9
242 Name
.startswith("avx512.mask.pxor.") || // Added in 3.9
243 Name
.startswith("avx512.mask.and.") || // Added in 3.9
244 Name
.startswith("avx512.mask.andn.") || // Added in 3.9
245 Name
.startswith("avx512.mask.or.") || // Added in 3.9
246 Name
.startswith("avx512.mask.xor.") || // Added in 3.9
247 Name
.startswith("avx512.mask.padd.") || // Added in 4.0
248 Name
.startswith("avx512.mask.psub.") || // Added in 4.0
249 Name
.startswith("avx512.mask.pmull.") || // Added in 4.0
250 Name
.startswith("avx512.mask.cvtdq2pd.") || // Added in 4.0
251 Name
.startswith("avx512.mask.cvtudq2pd.") || // Added in 4.0
252 Name
.startswith("avx512.mask.cvtudq2ps.") || // Added in 7.0 updated 9.0
253 Name
.startswith("avx512.mask.cvtqq2pd.") || // Added in 7.0 updated 9.0
254 Name
.startswith("avx512.mask.cvtuqq2pd.") || // Added in 7.0 updated 9.0
255 Name
.startswith("avx512.mask.cvtdq2ps.") || // Added in 7.0 updated 9.0
256 Name
== "avx512.mask.vcvtph2ps.128" || // Added in 11.0
257 Name
== "avx512.mask.vcvtph2ps.256" || // Added in 11.0
258 Name
== "avx512.mask.cvtqq2ps.256" || // Added in 9.0
259 Name
== "avx512.mask.cvtqq2ps.512" || // Added in 9.0
260 Name
== "avx512.mask.cvtuqq2ps.256" || // Added in 9.0
261 Name
== "avx512.mask.cvtuqq2ps.512" || // Added in 9.0
262 Name
== "avx512.mask.cvtpd2dq.256" || // Added in 7.0
263 Name
== "avx512.mask.cvtpd2ps.256" || // Added in 7.0
264 Name
== "avx512.mask.cvttpd2dq.256" || // Added in 7.0
265 Name
== "avx512.mask.cvttps2dq.128" || // Added in 7.0
266 Name
== "avx512.mask.cvttps2dq.256" || // Added in 7.0
267 Name
== "avx512.mask.cvtps2pd.128" || // Added in 7.0
268 Name
== "avx512.mask.cvtps2pd.256" || // Added in 7.0
269 Name
== "avx512.cvtusi2sd" || // Added in 7.0
270 Name
.startswith("avx512.mask.permvar.") || // Added in 7.0
271 Name
== "sse2.pmulu.dq" || // Added in 7.0
272 Name
== "sse41.pmuldq" || // Added in 7.0
273 Name
== "avx2.pmulu.dq" || // Added in 7.0
274 Name
== "avx2.pmul.dq" || // Added in 7.0
275 Name
== "avx512.pmulu.dq.512" || // Added in 7.0
276 Name
== "avx512.pmul.dq.512" || // Added in 7.0
277 Name
.startswith("avx512.mask.pmul.dq.") || // Added in 4.0
278 Name
.startswith("avx512.mask.pmulu.dq.") || // Added in 4.0
279 Name
.startswith("avx512.mask.pmul.hr.sw.") || // Added in 7.0
280 Name
.startswith("avx512.mask.pmulh.w.") || // Added in 7.0
281 Name
.startswith("avx512.mask.pmulhu.w.") || // Added in 7.0
282 Name
.startswith("avx512.mask.pmaddw.d.") || // Added in 7.0
283 Name
.startswith("avx512.mask.pmaddubs.w.") || // Added in 7.0
284 Name
.startswith("avx512.mask.packsswb.") || // Added in 5.0
285 Name
.startswith("avx512.mask.packssdw.") || // Added in 5.0
286 Name
.startswith("avx512.mask.packuswb.") || // Added in 5.0
287 Name
.startswith("avx512.mask.packusdw.") || // Added in 5.0
288 Name
.startswith("avx512.mask.cmp.b") || // Added in 5.0
289 Name
.startswith("avx512.mask.cmp.d") || // Added in 5.0
290 Name
.startswith("avx512.mask.cmp.q") || // Added in 5.0
291 Name
.startswith("avx512.mask.cmp.w") || // Added in 5.0
292 Name
.startswith("avx512.cmp.p") || // Added in 12.0
293 Name
.startswith("avx512.mask.ucmp.") || // Added in 5.0
294 Name
.startswith("avx512.cvtb2mask.") || // Added in 7.0
295 Name
.startswith("avx512.cvtw2mask.") || // Added in 7.0
296 Name
.startswith("avx512.cvtd2mask.") || // Added in 7.0
297 Name
.startswith("avx512.cvtq2mask.") || // Added in 7.0
298 Name
.startswith("avx512.mask.vpermilvar.") || // Added in 4.0
299 Name
.startswith("avx512.mask.psll.d") || // Added in 4.0
300 Name
.startswith("avx512.mask.psll.q") || // Added in 4.0
301 Name
.startswith("avx512.mask.psll.w") || // Added in 4.0
302 Name
.startswith("avx512.mask.psra.d") || // Added in 4.0
303 Name
.startswith("avx512.mask.psra.q") || // Added in 4.0
304 Name
.startswith("avx512.mask.psra.w") || // Added in 4.0
305 Name
.startswith("avx512.mask.psrl.d") || // Added in 4.0
306 Name
.startswith("avx512.mask.psrl.q") || // Added in 4.0
307 Name
.startswith("avx512.mask.psrl.w") || // Added in 4.0
308 Name
.startswith("avx512.mask.pslli") || // Added in 4.0
309 Name
.startswith("avx512.mask.psrai") || // Added in 4.0
310 Name
.startswith("avx512.mask.psrli") || // Added in 4.0
311 Name
.startswith("avx512.mask.psllv") || // Added in 4.0
312 Name
.startswith("avx512.mask.psrav") || // Added in 4.0
313 Name
.startswith("avx512.mask.psrlv") || // Added in 4.0
314 Name
.startswith("sse41.pmovsx") || // Added in 3.8
315 Name
.startswith("sse41.pmovzx") || // Added in 3.9
316 Name
.startswith("avx2.pmovsx") || // Added in 3.9
317 Name
.startswith("avx2.pmovzx") || // Added in 3.9
318 Name
.startswith("avx512.mask.pmovsx") || // Added in 4.0
319 Name
.startswith("avx512.mask.pmovzx") || // Added in 4.0
320 Name
.startswith("avx512.mask.lzcnt.") || // Added in 5.0
321 Name
.startswith("avx512.mask.pternlog.") || // Added in 7.0
322 Name
.startswith("avx512.maskz.pternlog.") || // Added in 7.0
323 Name
.startswith("avx512.mask.vpmadd52") || // Added in 7.0
324 Name
.startswith("avx512.maskz.vpmadd52") || // Added in 7.0
325 Name
.startswith("avx512.mask.vpermi2var.") || // Added in 7.0
326 Name
.startswith("avx512.mask.vpermt2var.") || // Added in 7.0
327 Name
.startswith("avx512.maskz.vpermt2var.") || // Added in 7.0
328 Name
.startswith("avx512.mask.vpdpbusd.") || // Added in 7.0
329 Name
.startswith("avx512.maskz.vpdpbusd.") || // Added in 7.0
330 Name
.startswith("avx512.mask.vpdpbusds.") || // Added in 7.0
331 Name
.startswith("avx512.maskz.vpdpbusds.") || // Added in 7.0
332 Name
.startswith("avx512.mask.vpdpwssd.") || // Added in 7.0
333 Name
.startswith("avx512.maskz.vpdpwssd.") || // Added in 7.0
334 Name
.startswith("avx512.mask.vpdpwssds.") || // Added in 7.0
335 Name
.startswith("avx512.maskz.vpdpwssds.") || // Added in 7.0
336 Name
.startswith("avx512.mask.dbpsadbw.") || // Added in 7.0
337 Name
.startswith("avx512.mask.vpshld.") || // Added in 7.0
338 Name
.startswith("avx512.mask.vpshrd.") || // Added in 7.0
339 Name
.startswith("avx512.mask.vpshldv.") || // Added in 8.0
340 Name
.startswith("avx512.mask.vpshrdv.") || // Added in 8.0
341 Name
.startswith("avx512.maskz.vpshldv.") || // Added in 8.0
342 Name
.startswith("avx512.maskz.vpshrdv.") || // Added in 8.0
343 Name
.startswith("avx512.vpshld.") || // Added in 8.0
344 Name
.startswith("avx512.vpshrd.") || // Added in 8.0
345 Name
.startswith("avx512.mask.add.p") || // Added in 7.0. 128/256 in 4.0
346 Name
.startswith("avx512.mask.sub.p") || // Added in 7.0. 128/256 in 4.0
347 Name
.startswith("avx512.mask.mul.p") || // Added in 7.0. 128/256 in 4.0
348 Name
.startswith("avx512.mask.div.p") || // Added in 7.0. 128/256 in 4.0
349 Name
.startswith("avx512.mask.max.p") || // Added in 7.0. 128/256 in 5.0
350 Name
.startswith("avx512.mask.min.p") || // Added in 7.0. 128/256 in 5.0
351 Name
.startswith("avx512.mask.fpclass.p") || // Added in 7.0
352 Name
.startswith("avx512.mask.vpshufbitqmb.") || // Added in 8.0
353 Name
.startswith("avx512.mask.pmultishift.qb.") || // Added in 8.0
354 Name
.startswith("avx512.mask.conflict.") || // Added in 9.0
355 Name
== "avx512.mask.pmov.qd.256" || // Added in 9.0
356 Name
== "avx512.mask.pmov.qd.512" || // Added in 9.0
357 Name
== "avx512.mask.pmov.wb.256" || // Added in 9.0
358 Name
== "avx512.mask.pmov.wb.512" || // Added in 9.0
359 Name
== "sse.cvtsi2ss" || // Added in 7.0
360 Name
== "sse.cvtsi642ss" || // Added in 7.0
361 Name
== "sse2.cvtsi2sd" || // Added in 7.0
362 Name
== "sse2.cvtsi642sd" || // Added in 7.0
363 Name
== "sse2.cvtss2sd" || // Added in 7.0
364 Name
== "sse2.cvtdq2pd" || // Added in 3.9
365 Name
== "sse2.cvtdq2ps" || // Added in 7.0
366 Name
== "sse2.cvtps2pd" || // Added in 3.9
367 Name
== "avx.cvtdq2.pd.256" || // Added in 3.9
368 Name
== "avx.cvtdq2.ps.256" || // Added in 7.0
369 Name
== "avx.cvt.ps2.pd.256" || // Added in 3.9
370 Name
.startswith("vcvtph2ps.") || // Added in 11.0
371 Name
.startswith("avx.vinsertf128.") || // Added in 3.7
372 Name
== "avx2.vinserti128" || // Added in 3.7
373 Name
.startswith("avx512.mask.insert") || // Added in 4.0
374 Name
.startswith("avx.vextractf128.") || // Added in 3.7
375 Name
== "avx2.vextracti128" || // Added in 3.7
376 Name
.startswith("avx512.mask.vextract") || // Added in 4.0
377 Name
.startswith("sse4a.movnt.") || // Added in 3.9
378 Name
.startswith("avx.movnt.") || // Added in 3.2
379 Name
.startswith("avx512.storent.") || // Added in 3.9
380 Name
== "sse41.movntdqa" || // Added in 5.0
381 Name
== "avx2.movntdqa" || // Added in 5.0
382 Name
== "avx512.movntdqa" || // Added in 5.0
383 Name
== "sse2.storel.dq" || // Added in 3.9
384 Name
.startswith("sse.storeu.") || // Added in 3.9
385 Name
.startswith("sse2.storeu.") || // Added in 3.9
386 Name
.startswith("avx.storeu.") || // Added in 3.9
387 Name
.startswith("avx512.mask.storeu.") || // Added in 3.9
388 Name
.startswith("avx512.mask.store.p") || // Added in 3.9
389 Name
.startswith("avx512.mask.store.b.") || // Added in 3.9
390 Name
.startswith("avx512.mask.store.w.") || // Added in 3.9
391 Name
.startswith("avx512.mask.store.d.") || // Added in 3.9
392 Name
.startswith("avx512.mask.store.q.") || // Added in 3.9
393 Name
== "avx512.mask.store.ss" || // Added in 7.0
394 Name
.startswith("avx512.mask.loadu.") || // Added in 3.9
395 Name
.startswith("avx512.mask.load.") || // Added in 3.9
396 Name
.startswith("avx512.mask.expand.load.") || // Added in 7.0
397 Name
.startswith("avx512.mask.compress.store.") || // Added in 7.0
398 Name
.startswith("avx512.mask.expand.b") || // Added in 9.0
399 Name
.startswith("avx512.mask.expand.w") || // Added in 9.0
400 Name
.startswith("avx512.mask.expand.d") || // Added in 9.0
401 Name
.startswith("avx512.mask.expand.q") || // Added in 9.0
402 Name
.startswith("avx512.mask.expand.p") || // Added in 9.0
403 Name
.startswith("avx512.mask.compress.b") || // Added in 9.0
404 Name
.startswith("avx512.mask.compress.w") || // Added in 9.0
405 Name
.startswith("avx512.mask.compress.d") || // Added in 9.0
406 Name
.startswith("avx512.mask.compress.q") || // Added in 9.0
407 Name
.startswith("avx512.mask.compress.p") || // Added in 9.0
408 Name
== "sse42.crc32.64.8" || // Added in 3.4
409 Name
.startswith("avx.vbroadcast.s") || // Added in 3.5
410 Name
.startswith("avx512.vbroadcast.s") || // Added in 7.0
411 Name
.startswith("avx512.mask.palignr.") || // Added in 3.9
412 Name
.startswith("avx512.mask.valign.") || // Added in 4.0
413 Name
.startswith("sse2.psll.dq") || // Added in 3.7
414 Name
.startswith("sse2.psrl.dq") || // Added in 3.7
415 Name
.startswith("avx2.psll.dq") || // Added in 3.7
416 Name
.startswith("avx2.psrl.dq") || // Added in 3.7
417 Name
.startswith("avx512.psll.dq") || // Added in 3.9
418 Name
.startswith("avx512.psrl.dq") || // Added in 3.9
419 Name
== "sse41.pblendw" || // Added in 3.7
420 Name
.startswith("sse41.blendp") || // Added in 3.7
421 Name
.startswith("avx.blend.p") || // Added in 3.7
422 Name
== "avx2.pblendw" || // Added in 3.7
423 Name
.startswith("avx2.pblendd.") || // Added in 3.7
424 Name
.startswith("avx.vbroadcastf128") || // Added in 4.0
425 Name
== "avx2.vbroadcasti128" || // Added in 3.7
426 Name
.startswith("avx512.mask.broadcastf32x4.") || // Added in 6.0
427 Name
.startswith("avx512.mask.broadcastf64x2.") || // Added in 6.0
428 Name
.startswith("avx512.mask.broadcastf32x8.") || // Added in 6.0
429 Name
.startswith("avx512.mask.broadcastf64x4.") || // Added in 6.0
430 Name
.startswith("avx512.mask.broadcasti32x4.") || // Added in 6.0
431 Name
.startswith("avx512.mask.broadcasti64x2.") || // Added in 6.0
432 Name
.startswith("avx512.mask.broadcasti32x8.") || // Added in 6.0
433 Name
.startswith("avx512.mask.broadcasti64x4.") || // Added in 6.0
434 Name
== "xop.vpcmov" || // Added in 3.8
435 Name
== "xop.vpcmov.256" || // Added in 5.0
436 Name
.startswith("avx512.mask.move.s") || // Added in 4.0
437 Name
.startswith("avx512.cvtmask2") || // Added in 5.0
438 Name
.startswith("xop.vpcom") || // Added in 3.2, Updated in 9.0
439 Name
.startswith("xop.vprot") || // Added in 8.0
440 Name
.startswith("avx512.prol") || // Added in 8.0
441 Name
.startswith("avx512.pror") || // Added in 8.0
442 Name
.startswith("avx512.mask.prorv.") || // Added in 8.0
443 Name
.startswith("avx512.mask.pror.") || // Added in 8.0
444 Name
.startswith("avx512.mask.prolv.") || // Added in 8.0
445 Name
.startswith("avx512.mask.prol.") || // Added in 8.0
446 Name
.startswith("avx512.ptestm") || //Added in 6.0
447 Name
.startswith("avx512.ptestnm") || //Added in 6.0
448 Name
.startswith("avx512.mask.pavg")) // Added in 6.0
454 static bool UpgradeX86IntrinsicFunction(Function
*F
, StringRef Name
,
456 // Only handle intrinsics that start with "x86.".
457 if (!Name
.startswith("x86."))
459 // Remove "x86." prefix.
460 Name
= Name
.substr(4);
462 if (ShouldUpgradeX86Intrinsic(F
, Name
)) {
467 if (Name
== "rdtscp") { // Added in 8.0
468 // If this intrinsic has 0 operands, it's the new version.
469 if (F
->getFunctionType()->getNumParams() == 0)
473 NewFn
= Intrinsic::getDeclaration(F
->getParent(),
474 Intrinsic::x86_rdtscp
);
478 // SSE4.1 ptest functions may have an old signature.
479 if (Name
.startswith("sse41.ptest")) { // Added in 3.2
480 if (Name
.substr(11) == "c")
481 return UpgradePTESTIntrinsic(F
, Intrinsic::x86_sse41_ptestc
, NewFn
);
482 if (Name
.substr(11) == "z")
483 return UpgradePTESTIntrinsic(F
, Intrinsic::x86_sse41_ptestz
, NewFn
);
484 if (Name
.substr(11) == "nzc")
485 return UpgradePTESTIntrinsic(F
, Intrinsic::x86_sse41_ptestnzc
, NewFn
);
487 // Several blend and other instructions with masks used the wrong number of
489 if (Name
== "sse41.insertps") // Added in 3.6
490 return UpgradeX86IntrinsicsWith8BitMask(F
, Intrinsic::x86_sse41_insertps
,
492 if (Name
== "sse41.dppd") // Added in 3.6
493 return UpgradeX86IntrinsicsWith8BitMask(F
, Intrinsic::x86_sse41_dppd
,
495 if (Name
== "sse41.dpps") // Added in 3.6
496 return UpgradeX86IntrinsicsWith8BitMask(F
, Intrinsic::x86_sse41_dpps
,
498 if (Name
== "sse41.mpsadbw") // Added in 3.6
499 return UpgradeX86IntrinsicsWith8BitMask(F
, Intrinsic::x86_sse41_mpsadbw
,
501 if (Name
== "avx.dp.ps.256") // Added in 3.6
502 return UpgradeX86IntrinsicsWith8BitMask(F
, Intrinsic::x86_avx_dp_ps_256
,
504 if (Name
== "avx2.mpsadbw") // Added in 3.6
505 return UpgradeX86IntrinsicsWith8BitMask(F
, Intrinsic::x86_avx2_mpsadbw
,
507 if (Name
== "avx512.mask.cmp.pd.128") // Added in 7.0
508 return UpgradeX86MaskedFPCompare(F
, Intrinsic::x86_avx512_mask_cmp_pd_128
,
510 if (Name
== "avx512.mask.cmp.pd.256") // Added in 7.0
511 return UpgradeX86MaskedFPCompare(F
, Intrinsic::x86_avx512_mask_cmp_pd_256
,
513 if (Name
== "avx512.mask.cmp.pd.512") // Added in 7.0
514 return UpgradeX86MaskedFPCompare(F
, Intrinsic::x86_avx512_mask_cmp_pd_512
,
516 if (Name
== "avx512.mask.cmp.ps.128") // Added in 7.0
517 return UpgradeX86MaskedFPCompare(F
, Intrinsic::x86_avx512_mask_cmp_ps_128
,
519 if (Name
== "avx512.mask.cmp.ps.256") // Added in 7.0
520 return UpgradeX86MaskedFPCompare(F
, Intrinsic::x86_avx512_mask_cmp_ps_256
,
522 if (Name
== "avx512.mask.cmp.ps.512") // Added in 7.0
523 return UpgradeX86MaskedFPCompare(F
, Intrinsic::x86_avx512_mask_cmp_ps_512
,
525 if (Name
== "avx512bf16.cvtne2ps2bf16.128") // Added in 9.0
526 return UpgradeX86BF16Intrinsic(
527 F
, Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128
, NewFn
);
528 if (Name
== "avx512bf16.cvtne2ps2bf16.256") // Added in 9.0
529 return UpgradeX86BF16Intrinsic(
530 F
, Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256
, NewFn
);
531 if (Name
== "avx512bf16.cvtne2ps2bf16.512") // Added in 9.0
532 return UpgradeX86BF16Intrinsic(
533 F
, Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512
, NewFn
);
534 if (Name
== "avx512bf16.mask.cvtneps2bf16.128") // Added in 9.0
535 return UpgradeX86BF16Intrinsic(
536 F
, Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128
, NewFn
);
537 if (Name
== "avx512bf16.cvtneps2bf16.256") // Added in 9.0
538 return UpgradeX86BF16Intrinsic(
539 F
, Intrinsic::x86_avx512bf16_cvtneps2bf16_256
, NewFn
);
540 if (Name
== "avx512bf16.cvtneps2bf16.512") // Added in 9.0
541 return UpgradeX86BF16Intrinsic(
542 F
, Intrinsic::x86_avx512bf16_cvtneps2bf16_512
, NewFn
);
543 if (Name
== "avx512bf16.dpbf16ps.128") // Added in 9.0
544 return UpgradeX86BF16DPIntrinsic(
545 F
, Intrinsic::x86_avx512bf16_dpbf16ps_128
, NewFn
);
546 if (Name
== "avx512bf16.dpbf16ps.256") // Added in 9.0
547 return UpgradeX86BF16DPIntrinsic(
548 F
, Intrinsic::x86_avx512bf16_dpbf16ps_256
, NewFn
);
549 if (Name
== "avx512bf16.dpbf16ps.512") // Added in 9.0
550 return UpgradeX86BF16DPIntrinsic(
551 F
, Intrinsic::x86_avx512bf16_dpbf16ps_512
, NewFn
);
553 // frcz.ss/sd may need to have an argument dropped. Added in 3.2
554 if (Name
.startswith("xop.vfrcz.ss") && F
->arg_size() == 2) {
556 NewFn
= Intrinsic::getDeclaration(F
->getParent(),
557 Intrinsic::x86_xop_vfrcz_ss
);
560 if (Name
.startswith("xop.vfrcz.sd") && F
->arg_size() == 2) {
562 NewFn
= Intrinsic::getDeclaration(F
->getParent(),
563 Intrinsic::x86_xop_vfrcz_sd
);
566 // Upgrade any XOP PERMIL2 index operand still using a float/double vector.
567 if (Name
.startswith("xop.vpermil2")) { // Added in 3.9
568 auto Idx
= F
->getFunctionType()->getParamType(2);
569 if (Idx
->isFPOrFPVectorTy()) {
571 unsigned IdxSize
= Idx
->getPrimitiveSizeInBits();
572 unsigned EltSize
= Idx
->getScalarSizeInBits();
573 Intrinsic::ID Permil2ID
;
574 if (EltSize
== 64 && IdxSize
== 128)
575 Permil2ID
= Intrinsic::x86_xop_vpermil2pd
;
576 else if (EltSize
== 32 && IdxSize
== 128)
577 Permil2ID
= Intrinsic::x86_xop_vpermil2ps
;
578 else if (EltSize
== 64 && IdxSize
== 256)
579 Permil2ID
= Intrinsic::x86_xop_vpermil2pd_256
;
581 Permil2ID
= Intrinsic::x86_xop_vpermil2ps_256
;
582 NewFn
= Intrinsic::getDeclaration(F
->getParent(), Permil2ID
);
587 if (Name
== "seh.recoverfp") {
588 NewFn
= Intrinsic::getDeclaration(F
->getParent(), Intrinsic::eh_recoverfp
);
595 static Intrinsic::ID
ShouldUpgradeNVPTXBF16Intrinsic(StringRef Name
) {
596 if (Name
.consume_front("abs."))
597 return StringSwitch
<Intrinsic::ID
>(Name
)
598 .Case("bf16", Intrinsic::nvvm_abs_bf16
)
599 .Case("bf16x2", Intrinsic::nvvm_abs_bf16x2
)
600 .Default(Intrinsic::not_intrinsic
);
602 if (Name
.consume_front("fma.rn."))
603 return StringSwitch
<Intrinsic::ID
>(Name
)
604 .Case("bf16", Intrinsic::nvvm_fma_rn_bf16
)
605 .Case("bf16x2", Intrinsic::nvvm_fma_rn_bf16x2
)
606 .Case("ftz_bf16", Intrinsic::nvvm_fma_rn_ftz_bf16
)
607 .Case("ftz.bf16x2", Intrinsic::nvvm_fma_rn_ftz_bf16x2
)
608 .Case("ftz.relu.bf16", Intrinsic::nvvm_fma_rn_ftz_relu_bf16
)
609 .Case("ftz.relu.bf16x2", Intrinsic::nvvm_fma_rn_ftz_relu_bf16x2
)
610 .Case("ftz_sat.bf16", Intrinsic::nvvm_fma_rn_ftz_sat_bf16
)
611 .Case("ftz_sat.bf16x2", Intrinsic::nvvm_fma_rn_ftz_sat_bf16x2
)
612 .Case("relu.bf16", Intrinsic::nvvm_fma_rn_relu_bf16
)
613 .Case("relu.bf16x2", Intrinsic::nvvm_fma_rn_relu_bf16x2
)
614 .Case("sat.bf16", Intrinsic::nvvm_fma_rn_sat_bf16
)
615 .Case("sat.bf16x2", Intrinsic::nvvm_fma_rn_sat_bf16x2
)
616 .Default(Intrinsic::not_intrinsic
);
618 if (Name
.consume_front("fmax."))
619 return StringSwitch
<Intrinsic::ID
>(Name
)
620 .Case("bf16", Intrinsic::nvvm_fmax_bf16
)
621 .Case("bf16x2", Intrinsic::nvvm_fmax_bf16x2
)
622 .Case("ftz.bf16", Intrinsic::nvvm_fmax_ftz_bf16
)
623 .Case("ftz.bf16x2", Intrinsic::nvvm_fmax_ftz_bf16x2
)
624 .Case("ftz.nan.bf16", Intrinsic::nvvm_fmax_ftz_nan_bf16
)
625 .Case("ftz.nan.bf16x2", Intrinsic::nvvm_fmax_ftz_nan_bf16x2
)
626 .Case("ftz.nan.xorsign.abs.bf16",
627 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16
)
628 .Case("ftz.nan.xorsign.abs.bf16x2",
629 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16x2
)
630 .Case("ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16
)
631 .Case("ftz.xorsign.abs.bf16x2",
632 Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16x2
)
633 .Case("nan.bf16", Intrinsic::nvvm_fmax_nan_bf16
)
634 .Case("nan.bf16x2", Intrinsic::nvvm_fmax_nan_bf16x2
)
635 .Case("nan.xorsign.abs.bf16", Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16
)
636 .Case("nan.xorsign.abs.bf16x2",
637 Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16x2
)
638 .Case("xorsign.abs.bf16", Intrinsic::nvvm_fmax_xorsign_abs_bf16
)
639 .Case("xorsign.abs.bf16x2", Intrinsic::nvvm_fmax_xorsign_abs_bf16x2
)
640 .Default(Intrinsic::not_intrinsic
);
642 if (Name
.consume_front("fmin."))
643 return StringSwitch
<Intrinsic::ID
>(Name
)
644 .Case("bf16", Intrinsic::nvvm_fmin_bf16
)
645 .Case("bf16x2", Intrinsic::nvvm_fmin_bf16x2
)
646 .Case("ftz.bf16", Intrinsic::nvvm_fmin_ftz_bf16
)
647 .Case("ftz.bf16x2", Intrinsic::nvvm_fmin_ftz_bf16x2
)
648 .Case("ftz.nan_bf16", Intrinsic::nvvm_fmin_ftz_nan_bf16
)
649 .Case("ftz.nan_bf16x2", Intrinsic::nvvm_fmin_ftz_nan_bf16x2
)
650 .Case("ftz.nan.xorsign.abs.bf16",
651 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16
)
652 .Case("ftz.nan.xorsign.abs.bf16x2",
653 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16x2
)
654 .Case("ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16
)
655 .Case("ftz.xorsign.abs.bf16x2",
656 Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16x2
)
657 .Case("nan.bf16", Intrinsic::nvvm_fmin_nan_bf16
)
658 .Case("nan.bf16x2", Intrinsic::nvvm_fmin_nan_bf16x2
)
659 .Case("nan.xorsign.abs.bf16", Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16
)
660 .Case("nan.xorsign.abs.bf16x2",
661 Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16x2
)
662 .Case("xorsign.abs.bf16", Intrinsic::nvvm_fmin_xorsign_abs_bf16
)
663 .Case("xorsign.abs.bf16x2", Intrinsic::nvvm_fmin_xorsign_abs_bf16x2
)
664 .Default(Intrinsic::not_intrinsic
);
666 if (Name
.consume_front("neg."))
667 return StringSwitch
<Intrinsic::ID
>(Name
)
668 .Case("bf16", Intrinsic::nvvm_neg_bf16
)
669 .Case("bf16x2", Intrinsic::nvvm_neg_bf16x2
)
670 .Default(Intrinsic::not_intrinsic
);
672 return Intrinsic::not_intrinsic
;
675 static bool UpgradeIntrinsicFunction1(Function
*F
, Function
*&NewFn
) {
676 assert(F
&& "Illegal to upgrade a non-existent Function.");
678 // Quickly eliminate it, if it's not a candidate.
679 StringRef Name
= F
->getName();
680 if (Name
.size() <= 7 || !Name
.startswith("llvm."))
682 Name
= Name
.substr(5); // Strip off "llvm."
687 if (Name
.startswith("arm.rbit") || Name
.startswith("aarch64.rbit")) {
688 NewFn
= Intrinsic::getDeclaration(F
->getParent(), Intrinsic::bitreverse
,
689 F
->arg_begin()->getType());
692 if (Name
.startswith("aarch64.neon.frintn")) {
693 NewFn
= Intrinsic::getDeclaration(F
->getParent(), Intrinsic::roundeven
,
694 F
->arg_begin()->getType());
697 if (Name
.startswith("aarch64.neon.rbit")) {
698 NewFn
= Intrinsic::getDeclaration(F
->getParent(), Intrinsic::bitreverse
,
699 F
->arg_begin()->getType());
702 if (Name
== "aarch64.sve.bfdot.lane") {
703 NewFn
= Intrinsic::getDeclaration(F
->getParent(),
704 Intrinsic::aarch64_sve_bfdot_lane_v2
);
707 if (Name
== "aarch64.sve.bfmlalb.lane") {
708 NewFn
= Intrinsic::getDeclaration(F
->getParent(),
709 Intrinsic::aarch64_sve_bfmlalb_lane_v2
);
712 if (Name
== "aarch64.sve.bfmlalt.lane") {
713 NewFn
= Intrinsic::getDeclaration(F
->getParent(),
714 Intrinsic::aarch64_sve_bfmlalt_lane_v2
);
717 static const Regex
LdRegex("^aarch64\\.sve\\.ld[234](.nxv[a-z0-9]+|$)");
718 if (LdRegex
.match(Name
)) {
720 dyn_cast
<VectorType
>(F
->getReturnType())->getElementType();
722 dyn_cast
<VectorType
>(F
->arg_begin()->getType())->getElementCount();
723 Type
*Ty
= VectorType::get(ScalarTy
, EC
);
725 StringSwitch
<Intrinsic::ID
>(Name
)
726 .StartsWith("aarch64.sve.ld2", Intrinsic::aarch64_sve_ld2_sret
)
727 .StartsWith("aarch64.sve.ld3", Intrinsic::aarch64_sve_ld3_sret
)
728 .StartsWith("aarch64.sve.ld4", Intrinsic::aarch64_sve_ld4_sret
)
729 .Default(Intrinsic::not_intrinsic
);
730 NewFn
= Intrinsic::getDeclaration(F
->getParent(), ID
, Ty
);
733 if (Name
.startswith("aarch64.sve.tuple.get")) {
734 Type
*Tys
[] = {F
->getReturnType(), F
->arg_begin()->getType()};
735 NewFn
= Intrinsic::getDeclaration(F
->getParent(),
736 Intrinsic::vector_extract
, Tys
);
739 if (Name
.startswith("aarch64.sve.tuple.set")) {
740 auto Args
= F
->getFunctionType()->params();
741 Type
*Tys
[] = {Args
[0], Args
[2], Args
[1]};
742 NewFn
= Intrinsic::getDeclaration(F
->getParent(),
743 Intrinsic::vector_insert
, Tys
);
746 static const Regex
CreateTupleRegex(
747 "^aarch64\\.sve\\.tuple\\.create[234](.nxv[a-z0-9]+|$)");
748 if (CreateTupleRegex
.match(Name
)) {
749 auto Args
= F
->getFunctionType()->params();
750 Type
*Tys
[] = {F
->getReturnType(), Args
[1]};
751 NewFn
= Intrinsic::getDeclaration(F
->getParent(),
752 Intrinsic::vector_insert
, Tys
);
755 if (Name
.startswith("arm.neon.vclz")) {
757 F
->arg_begin()->getType(),
758 Type::getInt1Ty(F
->getContext())
760 // Can't use Intrinsic::getDeclaration here as it adds a ".i1" to
761 // the end of the name. Change name from llvm.arm.neon.vclz.* to
763 FunctionType
* fType
= FunctionType::get(F
->getReturnType(), args
, false);
764 NewFn
= Function::Create(fType
, F
->getLinkage(), F
->getAddressSpace(),
765 "llvm.ctlz." + Name
.substr(14), F
->getParent());
768 if (Name
.startswith("arm.neon.vcnt")) {
769 NewFn
= Intrinsic::getDeclaration(F
->getParent(), Intrinsic::ctpop
,
770 F
->arg_begin()->getType());
773 static const Regex
vstRegex("^arm\\.neon\\.vst([1234]|[234]lane)\\.v[a-z0-9]*$");
774 if (vstRegex
.match(Name
)) {
775 static const Intrinsic::ID StoreInts
[] = {Intrinsic::arm_neon_vst1
,
776 Intrinsic::arm_neon_vst2
,
777 Intrinsic::arm_neon_vst3
,
778 Intrinsic::arm_neon_vst4
};
780 static const Intrinsic::ID StoreLaneInts
[] = {
781 Intrinsic::arm_neon_vst2lane
, Intrinsic::arm_neon_vst3lane
,
782 Intrinsic::arm_neon_vst4lane
785 auto fArgs
= F
->getFunctionType()->params();
786 Type
*Tys
[] = {fArgs
[0], fArgs
[1]};
787 if (!Name
.contains("lane"))
788 NewFn
= Intrinsic::getDeclaration(F
->getParent(),
789 StoreInts
[fArgs
.size() - 3], Tys
);
791 NewFn
= Intrinsic::getDeclaration(F
->getParent(),
792 StoreLaneInts
[fArgs
.size() - 5], Tys
);
795 if (Name
== "aarch64.thread.pointer" || Name
== "arm.thread.pointer") {
796 NewFn
= Intrinsic::getDeclaration(F
->getParent(), Intrinsic::thread_pointer
);
799 if (Name
.startswith("arm.neon.vqadds.")) {
800 NewFn
= Intrinsic::getDeclaration(F
->getParent(), Intrinsic::sadd_sat
,
801 F
->arg_begin()->getType());
804 if (Name
.startswith("arm.neon.vqaddu.")) {
805 NewFn
= Intrinsic::getDeclaration(F
->getParent(), Intrinsic::uadd_sat
,
806 F
->arg_begin()->getType());
809 if (Name
.startswith("arm.neon.vqsubs.")) {
810 NewFn
= Intrinsic::getDeclaration(F
->getParent(), Intrinsic::ssub_sat
,
811 F
->arg_begin()->getType());
814 if (Name
.startswith("arm.neon.vqsubu.")) {
815 NewFn
= Intrinsic::getDeclaration(F
->getParent(), Intrinsic::usub_sat
,
816 F
->arg_begin()->getType());
819 if (Name
.startswith("aarch64.neon.addp")) {
820 if (F
->arg_size() != 2)
821 break; // Invalid IR.
822 VectorType
*Ty
= dyn_cast
<VectorType
>(F
->getReturnType());
823 if (Ty
&& Ty
->getElementType()->isFloatingPointTy()) {
824 NewFn
= Intrinsic::getDeclaration(F
->getParent(),
825 Intrinsic::aarch64_neon_faddp
, Ty
);
830 // Changed in 12.0: bfdot accept v4bf16 and v8bf16 instead of v8i8 and v16i8
832 if ((Name
.startswith("arm.neon.bfdot.") ||
833 Name
.startswith("aarch64.neon.bfdot.")) &&
834 Name
.endswith("i8")) {
836 StringSwitch
<Intrinsic::ID
>(Name
)
837 .Cases("arm.neon.bfdot.v2f32.v8i8",
838 "arm.neon.bfdot.v4f32.v16i8",
839 Intrinsic::arm_neon_bfdot
)
840 .Cases("aarch64.neon.bfdot.v2f32.v8i8",
841 "aarch64.neon.bfdot.v4f32.v16i8",
842 Intrinsic::aarch64_neon_bfdot
)
843 .Default(Intrinsic::not_intrinsic
);
844 if (IID
== Intrinsic::not_intrinsic
)
847 size_t OperandWidth
= F
->getReturnType()->getPrimitiveSizeInBits();
848 assert((OperandWidth
== 64 || OperandWidth
== 128) &&
849 "Unexpected operand width");
850 LLVMContext
&Ctx
= F
->getParent()->getContext();
851 std::array
<Type
*, 2> Tys
{{
853 FixedVectorType::get(Type::getBFloatTy(Ctx
), OperandWidth
/ 16)
855 NewFn
= Intrinsic::getDeclaration(F
->getParent(), IID
, Tys
);
859 // Changed in 12.0: bfmmla, bfmlalb and bfmlalt are not polymorphic anymore
860 // and accept v8bf16 instead of v16i8
861 if ((Name
.startswith("arm.neon.bfm") ||
862 Name
.startswith("aarch64.neon.bfm")) &&
863 Name
.endswith(".v4f32.v16i8")) {
865 StringSwitch
<Intrinsic::ID
>(Name
)
866 .Case("arm.neon.bfmmla.v4f32.v16i8",
867 Intrinsic::arm_neon_bfmmla
)
868 .Case("arm.neon.bfmlalb.v4f32.v16i8",
869 Intrinsic::arm_neon_bfmlalb
)
870 .Case("arm.neon.bfmlalt.v4f32.v16i8",
871 Intrinsic::arm_neon_bfmlalt
)
872 .Case("aarch64.neon.bfmmla.v4f32.v16i8",
873 Intrinsic::aarch64_neon_bfmmla
)
874 .Case("aarch64.neon.bfmlalb.v4f32.v16i8",
875 Intrinsic::aarch64_neon_bfmlalb
)
876 .Case("aarch64.neon.bfmlalt.v4f32.v16i8",
877 Intrinsic::aarch64_neon_bfmlalt
)
878 .Default(Intrinsic::not_intrinsic
);
879 if (IID
== Intrinsic::not_intrinsic
)
882 std::array
<Type
*, 0> Tys
;
883 NewFn
= Intrinsic::getDeclaration(F
->getParent(), IID
, Tys
);
887 if (Name
== "arm.mve.vctp64" &&
888 cast
<FixedVectorType
>(F
->getReturnType())->getNumElements() == 4) {
889 // A vctp64 returning a v4i1 is converted to return a v2i1. Rename the
890 // function and deal with it below in UpgradeIntrinsicCall.
894 // These too are changed to accept a v2i1 insteead of the old v4i1.
895 if (Name
== "arm.mve.mull.int.predicated.v2i64.v4i32.v4i1" ||
896 Name
== "arm.mve.vqdmull.predicated.v2i64.v4i32.v4i1" ||
897 Name
== "arm.mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1" ||
898 Name
== "arm.mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1" ||
900 "arm.mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1" ||
901 Name
== "arm.mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v4i1" ||
902 Name
== "arm.mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1" ||
903 Name
== "arm.mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1" ||
905 "arm.mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1" ||
906 Name
== "arm.mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v4i1" ||
907 Name
== "arm.cde.vcx1q.predicated.v2i64.v4i1" ||
908 Name
== "arm.cde.vcx1qa.predicated.v2i64.v4i1" ||
909 Name
== "arm.cde.vcx2q.predicated.v2i64.v4i1" ||
910 Name
== "arm.cde.vcx2qa.predicated.v2i64.v4i1" ||
911 Name
== "arm.cde.vcx3q.predicated.v2i64.v4i1" ||
912 Name
== "arm.cde.vcx3qa.predicated.v2i64.v4i1")
915 if (Name
.consume_front("amdgcn.")) {
916 if (Name
== "alignbit") {
917 // Target specific intrinsic became redundant
918 NewFn
= Intrinsic::getDeclaration(F
->getParent(), Intrinsic::fshr
,
919 {F
->getReturnType()});
923 if (Name
.startswith("atomic.inc") || Name
.startswith("atomic.dec")) {
924 // This was replaced with atomicrmw uinc_wrap and udec_wrap, so there's no
930 if (Name
.startswith("ldexp.")) {
931 // Target specific intrinsic became redundant
932 NewFn
= Intrinsic::getDeclaration(
933 F
->getParent(), Intrinsic::ldexp
,
934 {F
->getReturnType(), F
->getArg(1)->getType()});
942 if (Name
.startswith("ctlz.") && F
->arg_size() == 1) {
944 NewFn
= Intrinsic::getDeclaration(F
->getParent(), Intrinsic::ctlz
,
945 F
->arg_begin()->getType());
948 if (Name
.startswith("cttz.") && F
->arg_size() == 1) {
950 NewFn
= Intrinsic::getDeclaration(F
->getParent(), Intrinsic::cttz
,
951 F
->arg_begin()->getType());
954 if (Name
.equals("coro.end") && F
->arg_size() == 2) {
956 NewFn
= Intrinsic::getDeclaration(F
->getParent(), Intrinsic::coro_end
);
963 if (Name
.consume_front("dbg.")) {
964 if (Name
== "addr" || (Name
== "value" && F
->arg_size() == 4)) {
966 NewFn
= Intrinsic::getDeclaration(F
->getParent(), Intrinsic::dbg_value
);
969 break; // No other 'dbg.*'.
973 if (Name
.consume_front("experimental.vector.")) {
974 Intrinsic::ID ID
= StringSwitch
<Intrinsic::ID
>(Name
)
975 .StartsWith("extract.", Intrinsic::vector_extract
)
976 .StartsWith("insert.", Intrinsic::vector_insert
)
977 .Default(Intrinsic::not_intrinsic
);
978 if (ID
!= Intrinsic::not_intrinsic
) {
979 const auto *FT
= F
->getFunctionType();
980 SmallVector
<Type
*, 2> Tys
;
981 if (ID
== Intrinsic::vector_extract
)
982 // Extracting overloads the return type.
983 Tys
.push_back(FT
->getReturnType());
984 Tys
.push_back(FT
->getParamType(0));
985 if (ID
== Intrinsic::vector_insert
)
986 // Inserting overloads the inserted type.
987 Tys
.push_back(FT
->getParamType(1));
989 NewFn
= Intrinsic::getDeclaration(F
->getParent(), ID
, Tys
);
993 if (Name
.consume_front("reduce.")) {
994 SmallVector
<StringRef
, 2> Groups
;
995 static const Regex
R("^([a-z]+)\\.[a-z][0-9]+");
996 if (R
.match(Name
, &Groups
))
997 ID
= StringSwitch
<Intrinsic::ID
>(Groups
[1])
998 .Case("add", Intrinsic::vector_reduce_add
)
999 .Case("mul", Intrinsic::vector_reduce_mul
)
1000 .Case("and", Intrinsic::vector_reduce_and
)
1001 .Case("or", Intrinsic::vector_reduce_or
)
1002 .Case("xor", Intrinsic::vector_reduce_xor
)
1003 .Case("smax", Intrinsic::vector_reduce_smax
)
1004 .Case("smin", Intrinsic::vector_reduce_smin
)
1005 .Case("umax", Intrinsic::vector_reduce_umax
)
1006 .Case("umin", Intrinsic::vector_reduce_umin
)
1007 .Case("fmax", Intrinsic::vector_reduce_fmax
)
1008 .Case("fmin", Intrinsic::vector_reduce_fmin
)
1009 .Default(Intrinsic::not_intrinsic
);
1012 if (ID
== Intrinsic::not_intrinsic
) {
1013 static const Regex
R2("^v2\\.([a-z]+)\\.[fi][0-9]+");
1016 if (R2
.match(Name
, &Groups
))
1017 ID
= StringSwitch
<Intrinsic::ID
>(Groups
[1])
1018 .Case("fadd", Intrinsic::vector_reduce_fadd
)
1019 .Case("fmul", Intrinsic::vector_reduce_fmul
)
1020 .Default(Intrinsic::not_intrinsic
);
1022 if (ID
!= Intrinsic::not_intrinsic
) {
1024 auto Args
= F
->getFunctionType()->params();
1026 Intrinsic::getDeclaration(F
->getParent(), ID
, {Args
[V2
? 1 : 0]});
1029 break; // No other 'expermental.vector.reduce.*'.
1031 break; // No other 'experimental.vector.*'.
1033 break; // No other 'e*'.
1035 if (Name
.startswith("flt.rounds")) {
1037 NewFn
= Intrinsic::getDeclaration(F
->getParent(), Intrinsic::get_rounding
);
1042 if (Name
.startswith("invariant.group.barrier")) {
1043 // Rename invariant.group.barrier to launder.invariant.group
1044 auto Args
= F
->getFunctionType()->params();
1045 Type
* ObjectPtr
[1] = {Args
[0]};
1047 NewFn
= Intrinsic::getDeclaration(F
->getParent(),
1048 Intrinsic::launder_invariant_group
, ObjectPtr
);
1053 // Updating the memory intrinsics (memcpy/memmove/memset) that have an
1054 // alignment parameter to embedding the alignment as an attribute of
1055 // the pointer args.
1056 if (unsigned ID
= StringSwitch
<unsigned>(Name
)
1057 .StartsWith("memcpy.", Intrinsic::memcpy
)
1058 .StartsWith("memmove.", Intrinsic::memmove
)
1060 if (F
->arg_size() == 5) {
1062 // Get the types of dest, src, and len
1063 ArrayRef
<Type
*> ParamTypes
=
1064 F
->getFunctionType()->params().slice(0, 3);
1065 NewFn
= Intrinsic::getDeclaration(F
->getParent(), ID
, ParamTypes
);
1069 if (Name
.startswith("memset.") && F
->arg_size() == 5) {
1071 // Get the types of dest, and len
1072 const auto *FT
= F
->getFunctionType();
1073 Type
*ParamTypes
[2] = {
1074 FT
->getParamType(0), // Dest
1075 FT
->getParamType(2) // len
1077 NewFn
= Intrinsic::getDeclaration(F
->getParent(), Intrinsic::memset
,
1084 if (Name
.consume_front("nvvm.")) {
1085 // Check for nvvm intrinsics corresponding exactly to an LLVM intrinsic.
1086 if (F
->arg_size() == 1) {
1088 StringSwitch
<Intrinsic::ID
>(Name
)
1089 .Cases("brev32", "brev64", Intrinsic::bitreverse
)
1090 .Case("clz.i", Intrinsic::ctlz
)
1091 .Case("popc.i", Intrinsic::ctpop
)
1092 .Default(Intrinsic::not_intrinsic
);
1093 if (IID
!= Intrinsic::not_intrinsic
) {
1094 NewFn
= Intrinsic::getDeclaration(F
->getParent(), IID
,
1095 {F
->getReturnType()});
1100 // Check for nvvm intrinsics that need a return type adjustment.
1101 if (!F
->getReturnType()->getScalarType()->isBFloatTy()) {
1102 Intrinsic::ID IID
= ShouldUpgradeNVPTXBF16Intrinsic(Name
);
1103 if (IID
!= Intrinsic::not_intrinsic
) {
1109 // The following nvvm intrinsics correspond exactly to an LLVM idiom, but
1110 // not to an intrinsic alone. We expand them in UpgradeIntrinsicCall.
1112 // TODO: We could add lohi.i2d.
1113 bool Expand
= false;
1114 if (Name
.consume_front("abs."))
1116 Expand
= Name
== "i" || Name
== "ll";
1117 else if (Name
== "clz.ll" || Name
== "popc.ll" || Name
== "h2f")
1119 else if (Name
.consume_front("max.") || Name
.consume_front("min."))
1120 // nvvm.{min,max}.{i,ii,ui,ull}
1121 Expand
= Name
== "i" || Name
== "ll" || Name
== "ui" || Name
== "ull";
1122 else if (Name
.consume_front("atomic.load.add."))
1123 // nvvm.atomic.load.add.{f32.p,f64.p}
1124 Expand
= Name
.startswith("f32.p") || Name
.startswith("f64.p");
1132 break; // No other 'nvvm.*'.
1137 // We only need to change the name to match the mangling including the
1139 if (Name
.startswith("objectsize.")) {
1140 Type
*Tys
[2] = { F
->getReturnType(), F
->arg_begin()->getType() };
1141 if (F
->arg_size() == 2 || F
->arg_size() == 3 ||
1143 Intrinsic::getName(Intrinsic::objectsize
, Tys
, F
->getParent())) {
1145 NewFn
= Intrinsic::getDeclaration(F
->getParent(), Intrinsic::objectsize
,
1153 if (Name
.startswith("ptr.annotation.") && F
->arg_size() == 4) {
1155 NewFn
= Intrinsic::getDeclaration(
1156 F
->getParent(), Intrinsic::ptr_annotation
,
1157 {F
->arg_begin()->getType(), F
->getArg(1)->getType()});
1163 if (Name
.consume_front("riscv.")) {
1165 ID
= StringSwitch
<Intrinsic::ID
>(Name
)
1166 .Case("aes32dsi", Intrinsic::riscv_aes32dsi
)
1167 .Case("aes32dsmi", Intrinsic::riscv_aes32dsmi
)
1168 .Case("aes32esi", Intrinsic::riscv_aes32esi
)
1169 .Case("aes32esmi", Intrinsic::riscv_aes32esmi
)
1170 .Default(Intrinsic::not_intrinsic
);
1171 if (ID
!= Intrinsic::not_intrinsic
) {
1172 if (!F
->getFunctionType()->getParamType(2)->isIntegerTy(32)) {
1174 NewFn
= Intrinsic::getDeclaration(F
->getParent(), ID
);
1177 break; // No other applicable upgrades.
1180 ID
= StringSwitch
<Intrinsic::ID
>(Name
)
1181 .StartsWith("sm4ks", Intrinsic::riscv_sm4ks
)
1182 .StartsWith("sm4ed", Intrinsic::riscv_sm4ed
)
1183 .Default(Intrinsic::not_intrinsic
);
1184 if (ID
!= Intrinsic::not_intrinsic
) {
1185 if (!F
->getFunctionType()->getParamType(2)->isIntegerTy(32) ||
1186 F
->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1188 NewFn
= Intrinsic::getDeclaration(F
->getParent(), ID
);
1191 break; // No other applicable upgrades.
1194 ID
= StringSwitch
<Intrinsic::ID
>(Name
)
1195 .StartsWith("sha256sig0", Intrinsic::riscv_sha256sig0
)
1196 .StartsWith("sha256sig1", Intrinsic::riscv_sha256sig1
)
1197 .StartsWith("sha256sum0", Intrinsic::riscv_sha256sum0
)
1198 .StartsWith("sha256sum1", Intrinsic::riscv_sha256sum1
)
1199 .StartsWith("sm3p0", Intrinsic::riscv_sm3p0
)
1200 .StartsWith("sm3p1", Intrinsic::riscv_sm3p1
)
1201 .Default(Intrinsic::not_intrinsic
);
1202 if (ID
!= Intrinsic::not_intrinsic
) {
1203 if (F
->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1205 NewFn
= Intrinsic::getDeclaration(F
->getParent(), ID
);
1208 break; // No other applicable upgrades.
1210 break; // No other 'riscv.*' intrinsics
1215 if (Name
== "stackprotectorcheck") {
1222 if (Name
== "var.annotation" && F
->arg_size() == 4) {
1224 NewFn
= Intrinsic::getDeclaration(
1225 F
->getParent(), Intrinsic::var_annotation
,
1226 {{F
->arg_begin()->getType(), F
->getArg(1)->getType()}});
1233 if (Name
.consume_front("wasm.")) {
1235 StringSwitch
<Intrinsic::ID
>(Name
)
1236 .StartsWith("fma.", Intrinsic::wasm_relaxed_madd
)
1237 .StartsWith("fms.", Intrinsic::wasm_relaxed_nmadd
)
1238 .StartsWith("laneselect.", Intrinsic::wasm_relaxed_laneselect
)
1239 .Default(Intrinsic::not_intrinsic
);
1240 if (ID
!= Intrinsic::not_intrinsic
) {
1243 Intrinsic::getDeclaration(F
->getParent(), ID
, F
->getReturnType());
1247 if (Name
.consume_front("dot.i8x16.i7x16.")) {
1248 ID
= StringSwitch
<Intrinsic::ID
>(Name
)
1249 .Case("signed", Intrinsic::wasm_relaxed_dot_i8x16_i7x16_signed
)
1251 Intrinsic::wasm_relaxed_dot_i8x16_i7x16_add_signed
)
1252 .Default(Intrinsic::not_intrinsic
);
1253 if (ID
!= Intrinsic::not_intrinsic
) {
1255 NewFn
= Intrinsic::getDeclaration(F
->getParent(), ID
);
1258 break; // No other 'wasm.dot.i8x16.i7x16.*'.
1260 break; // No other 'wasm.*'.
1265 if (UpgradeX86IntrinsicFunction(F
, Name
, NewFn
))
1269 auto *ST
= dyn_cast
<StructType
>(F
->getReturnType());
1270 if (ST
&& (!ST
->isLiteral() || ST
->isPacked())) {
1271 // Replace return type with literal non-packed struct. Only do this for
1272 // intrinsics declared to return a struct, not for intrinsics with
1273 // overloaded return type, in which case the exact struct type will be
1274 // mangled into the name.
1275 SmallVector
<Intrinsic::IITDescriptor
> Desc
;
1276 Intrinsic::getIntrinsicInfoTableEntries(F
->getIntrinsicID(), Desc
);
1277 if (Desc
.front().Kind
== Intrinsic::IITDescriptor::Struct
) {
1278 auto *FT
= F
->getFunctionType();
1279 auto *NewST
= StructType::get(ST
->getContext(), ST
->elements());
1280 auto *NewFT
= FunctionType::get(NewST
, FT
->params(), FT
->isVarArg());
1281 std::string Name
= F
->getName().str();
1283 NewFn
= Function::Create(NewFT
, F
->getLinkage(), F
->getAddressSpace(),
1284 Name
, F
->getParent());
1286 // The new function may also need remangling.
1287 if (auto Result
= llvm::Intrinsic::remangleIntrinsicFunction(NewFn
))
1293 // Remangle our intrinsic since we upgrade the mangling
1294 auto Result
= llvm::Intrinsic::remangleIntrinsicFunction(F
);
1295 if (Result
!= std::nullopt
) {
1300 // This may not belong here. This function is effectively being overloaded
1301 // to both detect an intrinsic which needs upgrading, and to provide the
1302 // upgraded form of the intrinsic. We should perhaps have two separate
1303 // functions for this.
1307 bool llvm::UpgradeIntrinsicFunction(Function
*F
, Function
*&NewFn
) {
1309 bool Upgraded
= UpgradeIntrinsicFunction1(F
, NewFn
);
1310 assert(F
!= NewFn
&& "Intrinsic function upgraded to the same function");
1312 // Upgrade intrinsic attributes. This does not change the function.
1315 if (Intrinsic::ID id
= F
->getIntrinsicID())
1316 F
->setAttributes(Intrinsic::getAttributes(F
->getContext(), id
));
1320 GlobalVariable
*llvm::UpgradeGlobalVariable(GlobalVariable
*GV
) {
1321 if (!(GV
->hasName() && (GV
->getName() == "llvm.global_ctors" ||
1322 GV
->getName() == "llvm.global_dtors")) ||
1323 !GV
->hasInitializer())
1325 ArrayType
*ATy
= dyn_cast
<ArrayType
>(GV
->getValueType());
1328 StructType
*STy
= dyn_cast
<StructType
>(ATy
->getElementType());
1329 if (!STy
|| STy
->getNumElements() != 2)
1332 LLVMContext
&C
= GV
->getContext();
1334 auto EltTy
= StructType::get(STy
->getElementType(0), STy
->getElementType(1),
1336 Constant
*Init
= GV
->getInitializer();
1337 unsigned N
= Init
->getNumOperands();
1338 std::vector
<Constant
*> NewCtors(N
);
1339 for (unsigned i
= 0; i
!= N
; ++i
) {
1340 auto Ctor
= cast
<Constant
>(Init
->getOperand(i
));
1341 NewCtors
[i
] = ConstantStruct::get(EltTy
, Ctor
->getAggregateElement(0u),
1342 Ctor
->getAggregateElement(1),
1343 Constant::getNullValue(IRB
.getPtrTy()));
1345 Constant
*NewInit
= ConstantArray::get(ArrayType::get(EltTy
, N
), NewCtors
);
1347 return new GlobalVariable(NewInit
->getType(), false, GV
->getLinkage(),
1348 NewInit
, GV
->getName());
1351 // Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them
1352 // to byte shuffles.
1353 static Value
*UpgradeX86PSLLDQIntrinsics(IRBuilder
<> &Builder
,
1354 Value
*Op
, unsigned Shift
) {
1355 auto *ResultTy
= cast
<FixedVectorType
>(Op
->getType());
1356 unsigned NumElts
= ResultTy
->getNumElements() * 8;
1358 // Bitcast from a 64-bit element type to a byte element type.
1359 Type
*VecTy
= FixedVectorType::get(Builder
.getInt8Ty(), NumElts
);
1360 Op
= Builder
.CreateBitCast(Op
, VecTy
, "cast");
1362 // We'll be shuffling in zeroes.
1363 Value
*Res
= Constant::getNullValue(VecTy
);
1365 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1366 // we'll just return the zero vector.
1369 // 256/512-bit version is split into 2/4 16-byte lanes.
1370 for (unsigned l
= 0; l
!= NumElts
; l
+= 16)
1371 for (unsigned i
= 0; i
!= 16; ++i
) {
1372 unsigned Idx
= NumElts
+ i
- Shift
;
1374 Idx
-= NumElts
- 16; // end of lane, switch operand.
1375 Idxs
[l
+ i
] = Idx
+ l
;
1378 Res
= Builder
.CreateShuffleVector(Res
, Op
, ArrayRef(Idxs
, NumElts
));
1381 // Bitcast back to a 64-bit element type.
1382 return Builder
.CreateBitCast(Res
, ResultTy
, "cast");
1385 // Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them
1386 // to byte shuffles.
1387 static Value
*UpgradeX86PSRLDQIntrinsics(IRBuilder
<> &Builder
, Value
*Op
,
1389 auto *ResultTy
= cast
<FixedVectorType
>(Op
->getType());
1390 unsigned NumElts
= ResultTy
->getNumElements() * 8;
1392 // Bitcast from a 64-bit element type to a byte element type.
1393 Type
*VecTy
= FixedVectorType::get(Builder
.getInt8Ty(), NumElts
);
1394 Op
= Builder
.CreateBitCast(Op
, VecTy
, "cast");
1396 // We'll be shuffling in zeroes.
1397 Value
*Res
= Constant::getNullValue(VecTy
);
1399 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1400 // we'll just return the zero vector.
1403 // 256/512-bit version is split into 2/4 16-byte lanes.
1404 for (unsigned l
= 0; l
!= NumElts
; l
+= 16)
1405 for (unsigned i
= 0; i
!= 16; ++i
) {
1406 unsigned Idx
= i
+ Shift
;
1408 Idx
+= NumElts
- 16; // end of lane, switch operand.
1409 Idxs
[l
+ i
] = Idx
+ l
;
1412 Res
= Builder
.CreateShuffleVector(Op
, Res
, ArrayRef(Idxs
, NumElts
));
1415 // Bitcast back to a 64-bit element type.
1416 return Builder
.CreateBitCast(Res
, ResultTy
, "cast");
1419 static Value
*getX86MaskVec(IRBuilder
<> &Builder
, Value
*Mask
,
1421 assert(isPowerOf2_32(NumElts
) && "Expected power-of-2 mask elements");
1422 llvm::VectorType
*MaskTy
= FixedVectorType::get(
1423 Builder
.getInt1Ty(), cast
<IntegerType
>(Mask
->getType())->getBitWidth());
1424 Mask
= Builder
.CreateBitCast(Mask
, MaskTy
);
1426 // If we have less than 8 elements (1, 2 or 4), then the starting mask was an
1427 // i8 and we need to extract down to the right number of elements.
1430 for (unsigned i
= 0; i
!= NumElts
; ++i
)
1432 Mask
= Builder
.CreateShuffleVector(Mask
, Mask
, ArrayRef(Indices
, NumElts
),
1439 static Value
*EmitX86Select(IRBuilder
<> &Builder
, Value
*Mask
,
1440 Value
*Op0
, Value
*Op1
) {
1441 // If the mask is all ones just emit the first operation.
1442 if (const auto *C
= dyn_cast
<Constant
>(Mask
))
1443 if (C
->isAllOnesValue())
1446 Mask
= getX86MaskVec(Builder
, Mask
,
1447 cast
<FixedVectorType
>(Op0
->getType())->getNumElements());
1448 return Builder
.CreateSelect(Mask
, Op0
, Op1
);
1451 static Value
*EmitX86ScalarSelect(IRBuilder
<> &Builder
, Value
*Mask
,
1452 Value
*Op0
, Value
*Op1
) {
1453 // If the mask is all ones just emit the first operation.
1454 if (const auto *C
= dyn_cast
<Constant
>(Mask
))
1455 if (C
->isAllOnesValue())
1458 auto *MaskTy
= FixedVectorType::get(Builder
.getInt1Ty(),
1459 Mask
->getType()->getIntegerBitWidth());
1460 Mask
= Builder
.CreateBitCast(Mask
, MaskTy
);
1461 Mask
= Builder
.CreateExtractElement(Mask
, (uint64_t)0);
1462 return Builder
.CreateSelect(Mask
, Op0
, Op1
);
1465 // Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics.
1466 // PALIGNR handles large immediates by shifting while VALIGN masks the immediate
1467 // so we need to handle both cases. VALIGN also doesn't have 128-bit lanes.
1468 static Value
*UpgradeX86ALIGNIntrinsics(IRBuilder
<> &Builder
, Value
*Op0
,
1469 Value
*Op1
, Value
*Shift
,
1470 Value
*Passthru
, Value
*Mask
,
1472 unsigned ShiftVal
= cast
<llvm::ConstantInt
>(Shift
)->getZExtValue();
1474 unsigned NumElts
= cast
<FixedVectorType
>(Op0
->getType())->getNumElements();
1475 assert((IsVALIGN
|| NumElts
% 16 == 0) && "Illegal NumElts for PALIGNR!");
1476 assert((!IsVALIGN
|| NumElts
<= 16) && "NumElts too large for VALIGN!");
1477 assert(isPowerOf2_32(NumElts
) && "NumElts not a power of 2!");
1479 // Mask the immediate for VALIGN.
1481 ShiftVal
&= (NumElts
- 1);
1483 // If palignr is shifting the pair of vectors more than the size of two
1484 // lanes, emit zero.
1486 return llvm::Constant::getNullValue(Op0
->getType());
1488 // If palignr is shifting the pair of input vectors more than one lane,
1489 // but less than two lanes, convert to shifting in zeroes.
1490 if (ShiftVal
> 16) {
1493 Op0
= llvm::Constant::getNullValue(Op0
->getType());
1497 // 256-bit palignr operates on 128-bit lanes so we need to handle that
1498 for (unsigned l
= 0; l
< NumElts
; l
+= 16) {
1499 for (unsigned i
= 0; i
!= 16; ++i
) {
1500 unsigned Idx
= ShiftVal
+ i
;
1501 if (!IsVALIGN
&& Idx
>= 16) // Disable wrap for VALIGN.
1502 Idx
+= NumElts
- 16; // End of lane, switch operand.
1503 Indices
[l
+ i
] = Idx
+ l
;
1507 Value
*Align
= Builder
.CreateShuffleVector(
1508 Op1
, Op0
, ArrayRef(Indices
, NumElts
), "palignr");
1510 return EmitX86Select(Builder
, Mask
, Align
, Passthru
);
1513 static Value
*UpgradeX86VPERMT2Intrinsics(IRBuilder
<> &Builder
, CallBase
&CI
,
1514 bool ZeroMask
, bool IndexForm
) {
1515 Type
*Ty
= CI
.getType();
1516 unsigned VecWidth
= Ty
->getPrimitiveSizeInBits();
1517 unsigned EltWidth
= Ty
->getScalarSizeInBits();
1518 bool IsFloat
= Ty
->isFPOrFPVectorTy();
1520 if (VecWidth
== 128 && EltWidth
== 32 && IsFloat
)
1521 IID
= Intrinsic::x86_avx512_vpermi2var_ps_128
;
1522 else if (VecWidth
== 128 && EltWidth
== 32 && !IsFloat
)
1523 IID
= Intrinsic::x86_avx512_vpermi2var_d_128
;
1524 else if (VecWidth
== 128 && EltWidth
== 64 && IsFloat
)
1525 IID
= Intrinsic::x86_avx512_vpermi2var_pd_128
;
1526 else if (VecWidth
== 128 && EltWidth
== 64 && !IsFloat
)
1527 IID
= Intrinsic::x86_avx512_vpermi2var_q_128
;
1528 else if (VecWidth
== 256 && EltWidth
== 32 && IsFloat
)
1529 IID
= Intrinsic::x86_avx512_vpermi2var_ps_256
;
1530 else if (VecWidth
== 256 && EltWidth
== 32 && !IsFloat
)
1531 IID
= Intrinsic::x86_avx512_vpermi2var_d_256
;
1532 else if (VecWidth
== 256 && EltWidth
== 64 && IsFloat
)
1533 IID
= Intrinsic::x86_avx512_vpermi2var_pd_256
;
1534 else if (VecWidth
== 256 && EltWidth
== 64 && !IsFloat
)
1535 IID
= Intrinsic::x86_avx512_vpermi2var_q_256
;
1536 else if (VecWidth
== 512 && EltWidth
== 32 && IsFloat
)
1537 IID
= Intrinsic::x86_avx512_vpermi2var_ps_512
;
1538 else if (VecWidth
== 512 && EltWidth
== 32 && !IsFloat
)
1539 IID
= Intrinsic::x86_avx512_vpermi2var_d_512
;
1540 else if (VecWidth
== 512 && EltWidth
== 64 && IsFloat
)
1541 IID
= Intrinsic::x86_avx512_vpermi2var_pd_512
;
1542 else if (VecWidth
== 512 && EltWidth
== 64 && !IsFloat
)
1543 IID
= Intrinsic::x86_avx512_vpermi2var_q_512
;
1544 else if (VecWidth
== 128 && EltWidth
== 16)
1545 IID
= Intrinsic::x86_avx512_vpermi2var_hi_128
;
1546 else if (VecWidth
== 256 && EltWidth
== 16)
1547 IID
= Intrinsic::x86_avx512_vpermi2var_hi_256
;
1548 else if (VecWidth
== 512 && EltWidth
== 16)
1549 IID
= Intrinsic::x86_avx512_vpermi2var_hi_512
;
1550 else if (VecWidth
== 128 && EltWidth
== 8)
1551 IID
= Intrinsic::x86_avx512_vpermi2var_qi_128
;
1552 else if (VecWidth
== 256 && EltWidth
== 8)
1553 IID
= Intrinsic::x86_avx512_vpermi2var_qi_256
;
1554 else if (VecWidth
== 512 && EltWidth
== 8)
1555 IID
= Intrinsic::x86_avx512_vpermi2var_qi_512
;
1557 llvm_unreachable("Unexpected intrinsic");
1559 Value
*Args
[] = { CI
.getArgOperand(0) , CI
.getArgOperand(1),
1560 CI
.getArgOperand(2) };
1562 // If this isn't index form we need to swap operand 0 and 1.
1564 std::swap(Args
[0], Args
[1]);
1566 Value
*V
= Builder
.CreateCall(Intrinsic::getDeclaration(CI
.getModule(), IID
),
1568 Value
*PassThru
= ZeroMask
? ConstantAggregateZero::get(Ty
)
1569 : Builder
.CreateBitCast(CI
.getArgOperand(1),
1571 return EmitX86Select(Builder
, CI
.getArgOperand(3), V
, PassThru
);
1574 static Value
*UpgradeX86BinaryIntrinsics(IRBuilder
<> &Builder
, CallBase
&CI
,
1575 Intrinsic::ID IID
) {
1576 Type
*Ty
= CI
.getType();
1577 Value
*Op0
= CI
.getOperand(0);
1578 Value
*Op1
= CI
.getOperand(1);
1579 Function
*Intrin
= Intrinsic::getDeclaration(CI
.getModule(), IID
, Ty
);
1580 Value
*Res
= Builder
.CreateCall(Intrin
, {Op0
, Op1
});
1582 if (CI
.arg_size() == 4) { // For masked intrinsics.
1583 Value
*VecSrc
= CI
.getOperand(2);
1584 Value
*Mask
= CI
.getOperand(3);
1585 Res
= EmitX86Select(Builder
, Mask
, Res
, VecSrc
);
1590 static Value
*upgradeX86Rotate(IRBuilder
<> &Builder
, CallBase
&CI
,
1591 bool IsRotateRight
) {
1592 Type
*Ty
= CI
.getType();
1593 Value
*Src
= CI
.getArgOperand(0);
1594 Value
*Amt
= CI
.getArgOperand(1);
1596 // Amount may be scalar immediate, in which case create a splat vector.
1597 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
1598 // we only care about the lowest log2 bits anyway.
1599 if (Amt
->getType() != Ty
) {
1600 unsigned NumElts
= cast
<FixedVectorType
>(Ty
)->getNumElements();
1601 Amt
= Builder
.CreateIntCast(Amt
, Ty
->getScalarType(), false);
1602 Amt
= Builder
.CreateVectorSplat(NumElts
, Amt
);
1605 Intrinsic::ID IID
= IsRotateRight
? Intrinsic::fshr
: Intrinsic::fshl
;
1606 Function
*Intrin
= Intrinsic::getDeclaration(CI
.getModule(), IID
, Ty
);
1607 Value
*Res
= Builder
.CreateCall(Intrin
, {Src
, Src
, Amt
});
1609 if (CI
.arg_size() == 4) { // For masked intrinsics.
1610 Value
*VecSrc
= CI
.getOperand(2);
1611 Value
*Mask
= CI
.getOperand(3);
1612 Res
= EmitX86Select(Builder
, Mask
, Res
, VecSrc
);
1617 static Value
*upgradeX86vpcom(IRBuilder
<> &Builder
, CallBase
&CI
, unsigned Imm
,
1619 Type
*Ty
= CI
.getType();
1620 Value
*LHS
= CI
.getArgOperand(0);
1621 Value
*RHS
= CI
.getArgOperand(1);
1623 CmpInst::Predicate Pred
;
1626 Pred
= IsSigned
? ICmpInst::ICMP_SLT
: ICmpInst::ICMP_ULT
;
1629 Pred
= IsSigned
? ICmpInst::ICMP_SLE
: ICmpInst::ICMP_ULE
;
1632 Pred
= IsSigned
? ICmpInst::ICMP_SGT
: ICmpInst::ICMP_UGT
;
1635 Pred
= IsSigned
? ICmpInst::ICMP_SGE
: ICmpInst::ICMP_UGE
;
1638 Pred
= ICmpInst::ICMP_EQ
;
1641 Pred
= ICmpInst::ICMP_NE
;
1644 return Constant::getNullValue(Ty
); // FALSE
1646 return Constant::getAllOnesValue(Ty
); // TRUE
1648 llvm_unreachable("Unknown XOP vpcom/vpcomu predicate");
1651 Value
*Cmp
= Builder
.CreateICmp(Pred
, LHS
, RHS
);
1652 Value
*Ext
= Builder
.CreateSExt(Cmp
, Ty
);
1656 static Value
*upgradeX86ConcatShift(IRBuilder
<> &Builder
, CallBase
&CI
,
1657 bool IsShiftRight
, bool ZeroMask
) {
1658 Type
*Ty
= CI
.getType();
1659 Value
*Op0
= CI
.getArgOperand(0);
1660 Value
*Op1
= CI
.getArgOperand(1);
1661 Value
*Amt
= CI
.getArgOperand(2);
1664 std::swap(Op0
, Op1
);
1666 // Amount may be scalar immediate, in which case create a splat vector.
1667 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
1668 // we only care about the lowest log2 bits anyway.
1669 if (Amt
->getType() != Ty
) {
1670 unsigned NumElts
= cast
<FixedVectorType
>(Ty
)->getNumElements();
1671 Amt
= Builder
.CreateIntCast(Amt
, Ty
->getScalarType(), false);
1672 Amt
= Builder
.CreateVectorSplat(NumElts
, Amt
);
1675 Intrinsic::ID IID
= IsShiftRight
? Intrinsic::fshr
: Intrinsic::fshl
;
1676 Function
*Intrin
= Intrinsic::getDeclaration(CI
.getModule(), IID
, Ty
);
1677 Value
*Res
= Builder
.CreateCall(Intrin
, {Op0
, Op1
, Amt
});
1679 unsigned NumArgs
= CI
.arg_size();
1680 if (NumArgs
>= 4) { // For masked intrinsics.
1681 Value
*VecSrc
= NumArgs
== 5 ? CI
.getArgOperand(3) :
1682 ZeroMask
? ConstantAggregateZero::get(CI
.getType()) :
1683 CI
.getArgOperand(0);
1684 Value
*Mask
= CI
.getOperand(NumArgs
- 1);
1685 Res
= EmitX86Select(Builder
, Mask
, Res
, VecSrc
);
1690 static Value
*UpgradeMaskedStore(IRBuilder
<> &Builder
,
1691 Value
*Ptr
, Value
*Data
, Value
*Mask
,
1693 // Cast the pointer to the right type.
1694 Ptr
= Builder
.CreateBitCast(Ptr
,
1695 llvm::PointerType::getUnqual(Data
->getType()));
1696 const Align Alignment
=
1698 ? Align(Data
->getType()->getPrimitiveSizeInBits().getFixedValue() / 8)
1701 // If the mask is all ones just emit a regular store.
1702 if (const auto *C
= dyn_cast
<Constant
>(Mask
))
1703 if (C
->isAllOnesValue())
1704 return Builder
.CreateAlignedStore(Data
, Ptr
, Alignment
);
1706 // Convert the mask from an integer type to a vector of i1.
1707 unsigned NumElts
= cast
<FixedVectorType
>(Data
->getType())->getNumElements();
1708 Mask
= getX86MaskVec(Builder
, Mask
, NumElts
);
1709 return Builder
.CreateMaskedStore(Data
, Ptr
, Alignment
, Mask
);
1712 static Value
*UpgradeMaskedLoad(IRBuilder
<> &Builder
,
1713 Value
*Ptr
, Value
*Passthru
, Value
*Mask
,
1715 Type
*ValTy
= Passthru
->getType();
1716 // Cast the pointer to the right type.
1717 Ptr
= Builder
.CreateBitCast(Ptr
, llvm::PointerType::getUnqual(ValTy
));
1718 const Align Alignment
=
1721 Passthru
->getType()->getPrimitiveSizeInBits().getFixedValue() /
1725 // If the mask is all ones just emit a regular store.
1726 if (const auto *C
= dyn_cast
<Constant
>(Mask
))
1727 if (C
->isAllOnesValue())
1728 return Builder
.CreateAlignedLoad(ValTy
, Ptr
, Alignment
);
1730 // Convert the mask from an integer type to a vector of i1.
1731 unsigned NumElts
= cast
<FixedVectorType
>(ValTy
)->getNumElements();
1732 Mask
= getX86MaskVec(Builder
, Mask
, NumElts
);
1733 return Builder
.CreateMaskedLoad(ValTy
, Ptr
, Alignment
, Mask
, Passthru
);
1736 static Value
*upgradeAbs(IRBuilder
<> &Builder
, CallBase
&CI
) {
1737 Type
*Ty
= CI
.getType();
1738 Value
*Op0
= CI
.getArgOperand(0);
1739 Function
*F
= Intrinsic::getDeclaration(CI
.getModule(), Intrinsic::abs
, Ty
);
1740 Value
*Res
= Builder
.CreateCall(F
, {Op0
, Builder
.getInt1(false)});
1741 if (CI
.arg_size() == 3)
1742 Res
= EmitX86Select(Builder
, CI
.getArgOperand(2), Res
, CI
.getArgOperand(1));
1746 static Value
*upgradePMULDQ(IRBuilder
<> &Builder
, CallBase
&CI
, bool IsSigned
) {
1747 Type
*Ty
= CI
.getType();
1749 // Arguments have a vXi32 type so cast to vXi64.
1750 Value
*LHS
= Builder
.CreateBitCast(CI
.getArgOperand(0), Ty
);
1751 Value
*RHS
= Builder
.CreateBitCast(CI
.getArgOperand(1), Ty
);
1754 // Shift left then arithmetic shift right.
1755 Constant
*ShiftAmt
= ConstantInt::get(Ty
, 32);
1756 LHS
= Builder
.CreateShl(LHS
, ShiftAmt
);
1757 LHS
= Builder
.CreateAShr(LHS
, ShiftAmt
);
1758 RHS
= Builder
.CreateShl(RHS
, ShiftAmt
);
1759 RHS
= Builder
.CreateAShr(RHS
, ShiftAmt
);
1761 // Clear the upper bits.
1762 Constant
*Mask
= ConstantInt::get(Ty
, 0xffffffff);
1763 LHS
= Builder
.CreateAnd(LHS
, Mask
);
1764 RHS
= Builder
.CreateAnd(RHS
, Mask
);
1767 Value
*Res
= Builder
.CreateMul(LHS
, RHS
);
1769 if (CI
.arg_size() == 4)
1770 Res
= EmitX86Select(Builder
, CI
.getArgOperand(3), Res
, CI
.getArgOperand(2));
1775 // Applying mask on vector of i1's and make sure result is at least 8 bits wide.
1776 static Value
*ApplyX86MaskOn1BitsVec(IRBuilder
<> &Builder
, Value
*Vec
,
1778 unsigned NumElts
= cast
<FixedVectorType
>(Vec
->getType())->getNumElements();
1780 const auto *C
= dyn_cast
<Constant
>(Mask
);
1781 if (!C
|| !C
->isAllOnesValue())
1782 Vec
= Builder
.CreateAnd(Vec
, getX86MaskVec(Builder
, Mask
, NumElts
));
1787 for (unsigned i
= 0; i
!= NumElts
; ++i
)
1789 for (unsigned i
= NumElts
; i
!= 8; ++i
)
1790 Indices
[i
] = NumElts
+ i
% NumElts
;
1791 Vec
= Builder
.CreateShuffleVector(Vec
,
1792 Constant::getNullValue(Vec
->getType()),
1795 return Builder
.CreateBitCast(Vec
, Builder
.getIntNTy(std::max(NumElts
, 8U)));
1798 static Value
*upgradeMaskedCompare(IRBuilder
<> &Builder
, CallBase
&CI
,
1799 unsigned CC
, bool Signed
) {
1800 Value
*Op0
= CI
.getArgOperand(0);
1801 unsigned NumElts
= cast
<FixedVectorType
>(Op0
->getType())->getNumElements();
1805 Cmp
= Constant::getNullValue(
1806 FixedVectorType::get(Builder
.getInt1Ty(), NumElts
));
1807 } else if (CC
== 7) {
1808 Cmp
= Constant::getAllOnesValue(
1809 FixedVectorType::get(Builder
.getInt1Ty(), NumElts
));
1811 ICmpInst::Predicate Pred
;
1813 default: llvm_unreachable("Unknown condition code");
1814 case 0: Pred
= ICmpInst::ICMP_EQ
; break;
1815 case 1: Pred
= Signed
? ICmpInst::ICMP_SLT
: ICmpInst::ICMP_ULT
; break;
1816 case 2: Pred
= Signed
? ICmpInst::ICMP_SLE
: ICmpInst::ICMP_ULE
; break;
1817 case 4: Pred
= ICmpInst::ICMP_NE
; break;
1818 case 5: Pred
= Signed
? ICmpInst::ICMP_SGE
: ICmpInst::ICMP_UGE
; break;
1819 case 6: Pred
= Signed
? ICmpInst::ICMP_SGT
: ICmpInst::ICMP_UGT
; break;
1821 Cmp
= Builder
.CreateICmp(Pred
, Op0
, CI
.getArgOperand(1));
1824 Value
*Mask
= CI
.getArgOperand(CI
.arg_size() - 1);
1826 return ApplyX86MaskOn1BitsVec(Builder
, Cmp
, Mask
);
1829 // Replace a masked intrinsic with an older unmasked intrinsic.
1830 static Value
*UpgradeX86MaskedShift(IRBuilder
<> &Builder
, CallBase
&CI
,
1831 Intrinsic::ID IID
) {
1832 Function
*Intrin
= Intrinsic::getDeclaration(CI
.getModule(), IID
);
1833 Value
*Rep
= Builder
.CreateCall(Intrin
,
1834 { CI
.getArgOperand(0), CI
.getArgOperand(1) });
1835 return EmitX86Select(Builder
, CI
.getArgOperand(3), Rep
, CI
.getArgOperand(2));
1838 static Value
* upgradeMaskedMove(IRBuilder
<> &Builder
, CallBase
&CI
) {
1839 Value
* A
= CI
.getArgOperand(0);
1840 Value
* B
= CI
.getArgOperand(1);
1841 Value
* Src
= CI
.getArgOperand(2);
1842 Value
* Mask
= CI
.getArgOperand(3);
1844 Value
* AndNode
= Builder
.CreateAnd(Mask
, APInt(8, 1));
1845 Value
* Cmp
= Builder
.CreateIsNotNull(AndNode
);
1846 Value
* Extract1
= Builder
.CreateExtractElement(B
, (uint64_t)0);
1847 Value
* Extract2
= Builder
.CreateExtractElement(Src
, (uint64_t)0);
1848 Value
* Select
= Builder
.CreateSelect(Cmp
, Extract1
, Extract2
);
1849 return Builder
.CreateInsertElement(A
, Select
, (uint64_t)0);
1853 static Value
* UpgradeMaskToInt(IRBuilder
<> &Builder
, CallBase
&CI
) {
1854 Value
* Op
= CI
.getArgOperand(0);
1855 Type
* ReturnOp
= CI
.getType();
1856 unsigned NumElts
= cast
<FixedVectorType
>(CI
.getType())->getNumElements();
1857 Value
*Mask
= getX86MaskVec(Builder
, Op
, NumElts
);
1858 return Builder
.CreateSExt(Mask
, ReturnOp
, "vpmovm2");
1861 // Replace intrinsic with unmasked version and a select.
1862 static bool upgradeAVX512MaskToSelect(StringRef Name
, IRBuilder
<> &Builder
,
1863 CallBase
&CI
, Value
*&Rep
) {
1864 Name
= Name
.substr(12); // Remove avx512.mask.
1866 unsigned VecWidth
= CI
.getType()->getPrimitiveSizeInBits();
1867 unsigned EltWidth
= CI
.getType()->getScalarSizeInBits();
1869 if (Name
.startswith("max.p")) {
1870 if (VecWidth
== 128 && EltWidth
== 32)
1871 IID
= Intrinsic::x86_sse_max_ps
;
1872 else if (VecWidth
== 128 && EltWidth
== 64)
1873 IID
= Intrinsic::x86_sse2_max_pd
;
1874 else if (VecWidth
== 256 && EltWidth
== 32)
1875 IID
= Intrinsic::x86_avx_max_ps_256
;
1876 else if (VecWidth
== 256 && EltWidth
== 64)
1877 IID
= Intrinsic::x86_avx_max_pd_256
;
1879 llvm_unreachable("Unexpected intrinsic");
1880 } else if (Name
.startswith("min.p")) {
1881 if (VecWidth
== 128 && EltWidth
== 32)
1882 IID
= Intrinsic::x86_sse_min_ps
;
1883 else if (VecWidth
== 128 && EltWidth
== 64)
1884 IID
= Intrinsic::x86_sse2_min_pd
;
1885 else if (VecWidth
== 256 && EltWidth
== 32)
1886 IID
= Intrinsic::x86_avx_min_ps_256
;
1887 else if (VecWidth
== 256 && EltWidth
== 64)
1888 IID
= Intrinsic::x86_avx_min_pd_256
;
1890 llvm_unreachable("Unexpected intrinsic");
1891 } else if (Name
.startswith("pshuf.b.")) {
1892 if (VecWidth
== 128)
1893 IID
= Intrinsic::x86_ssse3_pshuf_b_128
;
1894 else if (VecWidth
== 256)
1895 IID
= Intrinsic::x86_avx2_pshuf_b
;
1896 else if (VecWidth
== 512)
1897 IID
= Intrinsic::x86_avx512_pshuf_b_512
;
1899 llvm_unreachable("Unexpected intrinsic");
1900 } else if (Name
.startswith("pmul.hr.sw.")) {
1901 if (VecWidth
== 128)
1902 IID
= Intrinsic::x86_ssse3_pmul_hr_sw_128
;
1903 else if (VecWidth
== 256)
1904 IID
= Intrinsic::x86_avx2_pmul_hr_sw
;
1905 else if (VecWidth
== 512)
1906 IID
= Intrinsic::x86_avx512_pmul_hr_sw_512
;
1908 llvm_unreachable("Unexpected intrinsic");
1909 } else if (Name
.startswith("pmulh.w.")) {
1910 if (VecWidth
== 128)
1911 IID
= Intrinsic::x86_sse2_pmulh_w
;
1912 else if (VecWidth
== 256)
1913 IID
= Intrinsic::x86_avx2_pmulh_w
;
1914 else if (VecWidth
== 512)
1915 IID
= Intrinsic::x86_avx512_pmulh_w_512
;
1917 llvm_unreachable("Unexpected intrinsic");
1918 } else if (Name
.startswith("pmulhu.w.")) {
1919 if (VecWidth
== 128)
1920 IID
= Intrinsic::x86_sse2_pmulhu_w
;
1921 else if (VecWidth
== 256)
1922 IID
= Intrinsic::x86_avx2_pmulhu_w
;
1923 else if (VecWidth
== 512)
1924 IID
= Intrinsic::x86_avx512_pmulhu_w_512
;
1926 llvm_unreachable("Unexpected intrinsic");
1927 } else if (Name
.startswith("pmaddw.d.")) {
1928 if (VecWidth
== 128)
1929 IID
= Intrinsic::x86_sse2_pmadd_wd
;
1930 else if (VecWidth
== 256)
1931 IID
= Intrinsic::x86_avx2_pmadd_wd
;
1932 else if (VecWidth
== 512)
1933 IID
= Intrinsic::x86_avx512_pmaddw_d_512
;
1935 llvm_unreachable("Unexpected intrinsic");
1936 } else if (Name
.startswith("pmaddubs.w.")) {
1937 if (VecWidth
== 128)
1938 IID
= Intrinsic::x86_ssse3_pmadd_ub_sw_128
;
1939 else if (VecWidth
== 256)
1940 IID
= Intrinsic::x86_avx2_pmadd_ub_sw
;
1941 else if (VecWidth
== 512)
1942 IID
= Intrinsic::x86_avx512_pmaddubs_w_512
;
1944 llvm_unreachable("Unexpected intrinsic");
1945 } else if (Name
.startswith("packsswb.")) {
1946 if (VecWidth
== 128)
1947 IID
= Intrinsic::x86_sse2_packsswb_128
;
1948 else if (VecWidth
== 256)
1949 IID
= Intrinsic::x86_avx2_packsswb
;
1950 else if (VecWidth
== 512)
1951 IID
= Intrinsic::x86_avx512_packsswb_512
;
1953 llvm_unreachable("Unexpected intrinsic");
1954 } else if (Name
.startswith("packssdw.")) {
1955 if (VecWidth
== 128)
1956 IID
= Intrinsic::x86_sse2_packssdw_128
;
1957 else if (VecWidth
== 256)
1958 IID
= Intrinsic::x86_avx2_packssdw
;
1959 else if (VecWidth
== 512)
1960 IID
= Intrinsic::x86_avx512_packssdw_512
;
1962 llvm_unreachable("Unexpected intrinsic");
1963 } else if (Name
.startswith("packuswb.")) {
1964 if (VecWidth
== 128)
1965 IID
= Intrinsic::x86_sse2_packuswb_128
;
1966 else if (VecWidth
== 256)
1967 IID
= Intrinsic::x86_avx2_packuswb
;
1968 else if (VecWidth
== 512)
1969 IID
= Intrinsic::x86_avx512_packuswb_512
;
1971 llvm_unreachable("Unexpected intrinsic");
1972 } else if (Name
.startswith("packusdw.")) {
1973 if (VecWidth
== 128)
1974 IID
= Intrinsic::x86_sse41_packusdw
;
1975 else if (VecWidth
== 256)
1976 IID
= Intrinsic::x86_avx2_packusdw
;
1977 else if (VecWidth
== 512)
1978 IID
= Intrinsic::x86_avx512_packusdw_512
;
1980 llvm_unreachable("Unexpected intrinsic");
1981 } else if (Name
.startswith("vpermilvar.")) {
1982 if (VecWidth
== 128 && EltWidth
== 32)
1983 IID
= Intrinsic::x86_avx_vpermilvar_ps
;
1984 else if (VecWidth
== 128 && EltWidth
== 64)
1985 IID
= Intrinsic::x86_avx_vpermilvar_pd
;
1986 else if (VecWidth
== 256 && EltWidth
== 32)
1987 IID
= Intrinsic::x86_avx_vpermilvar_ps_256
;
1988 else if (VecWidth
== 256 && EltWidth
== 64)
1989 IID
= Intrinsic::x86_avx_vpermilvar_pd_256
;
1990 else if (VecWidth
== 512 && EltWidth
== 32)
1991 IID
= Intrinsic::x86_avx512_vpermilvar_ps_512
;
1992 else if (VecWidth
== 512 && EltWidth
== 64)
1993 IID
= Intrinsic::x86_avx512_vpermilvar_pd_512
;
1995 llvm_unreachable("Unexpected intrinsic");
1996 } else if (Name
== "cvtpd2dq.256") {
1997 IID
= Intrinsic::x86_avx_cvt_pd2dq_256
;
1998 } else if (Name
== "cvtpd2ps.256") {
1999 IID
= Intrinsic::x86_avx_cvt_pd2_ps_256
;
2000 } else if (Name
== "cvttpd2dq.256") {
2001 IID
= Intrinsic::x86_avx_cvtt_pd2dq_256
;
2002 } else if (Name
== "cvttps2dq.128") {
2003 IID
= Intrinsic::x86_sse2_cvttps2dq
;
2004 } else if (Name
== "cvttps2dq.256") {
2005 IID
= Intrinsic::x86_avx_cvtt_ps2dq_256
;
2006 } else if (Name
.startswith("permvar.")) {
2007 bool IsFloat
= CI
.getType()->isFPOrFPVectorTy();
2008 if (VecWidth
== 256 && EltWidth
== 32 && IsFloat
)
2009 IID
= Intrinsic::x86_avx2_permps
;
2010 else if (VecWidth
== 256 && EltWidth
== 32 && !IsFloat
)
2011 IID
= Intrinsic::x86_avx2_permd
;
2012 else if (VecWidth
== 256 && EltWidth
== 64 && IsFloat
)
2013 IID
= Intrinsic::x86_avx512_permvar_df_256
;
2014 else if (VecWidth
== 256 && EltWidth
== 64 && !IsFloat
)
2015 IID
= Intrinsic::x86_avx512_permvar_di_256
;
2016 else if (VecWidth
== 512 && EltWidth
== 32 && IsFloat
)
2017 IID
= Intrinsic::x86_avx512_permvar_sf_512
;
2018 else if (VecWidth
== 512 && EltWidth
== 32 && !IsFloat
)
2019 IID
= Intrinsic::x86_avx512_permvar_si_512
;
2020 else if (VecWidth
== 512 && EltWidth
== 64 && IsFloat
)
2021 IID
= Intrinsic::x86_avx512_permvar_df_512
;
2022 else if (VecWidth
== 512 && EltWidth
== 64 && !IsFloat
)
2023 IID
= Intrinsic::x86_avx512_permvar_di_512
;
2024 else if (VecWidth
== 128 && EltWidth
== 16)
2025 IID
= Intrinsic::x86_avx512_permvar_hi_128
;
2026 else if (VecWidth
== 256 && EltWidth
== 16)
2027 IID
= Intrinsic::x86_avx512_permvar_hi_256
;
2028 else if (VecWidth
== 512 && EltWidth
== 16)
2029 IID
= Intrinsic::x86_avx512_permvar_hi_512
;
2030 else if (VecWidth
== 128 && EltWidth
== 8)
2031 IID
= Intrinsic::x86_avx512_permvar_qi_128
;
2032 else if (VecWidth
== 256 && EltWidth
== 8)
2033 IID
= Intrinsic::x86_avx512_permvar_qi_256
;
2034 else if (VecWidth
== 512 && EltWidth
== 8)
2035 IID
= Intrinsic::x86_avx512_permvar_qi_512
;
2037 llvm_unreachable("Unexpected intrinsic");
2038 } else if (Name
.startswith("dbpsadbw.")) {
2039 if (VecWidth
== 128)
2040 IID
= Intrinsic::x86_avx512_dbpsadbw_128
;
2041 else if (VecWidth
== 256)
2042 IID
= Intrinsic::x86_avx512_dbpsadbw_256
;
2043 else if (VecWidth
== 512)
2044 IID
= Intrinsic::x86_avx512_dbpsadbw_512
;
2046 llvm_unreachable("Unexpected intrinsic");
2047 } else if (Name
.startswith("pmultishift.qb.")) {
2048 if (VecWidth
== 128)
2049 IID
= Intrinsic::x86_avx512_pmultishift_qb_128
;
2050 else if (VecWidth
== 256)
2051 IID
= Intrinsic::x86_avx512_pmultishift_qb_256
;
2052 else if (VecWidth
== 512)
2053 IID
= Intrinsic::x86_avx512_pmultishift_qb_512
;
2055 llvm_unreachable("Unexpected intrinsic");
2056 } else if (Name
.startswith("conflict.")) {
2057 if (Name
[9] == 'd' && VecWidth
== 128)
2058 IID
= Intrinsic::x86_avx512_conflict_d_128
;
2059 else if (Name
[9] == 'd' && VecWidth
== 256)
2060 IID
= Intrinsic::x86_avx512_conflict_d_256
;
2061 else if (Name
[9] == 'd' && VecWidth
== 512)
2062 IID
= Intrinsic::x86_avx512_conflict_d_512
;
2063 else if (Name
[9] == 'q' && VecWidth
== 128)
2064 IID
= Intrinsic::x86_avx512_conflict_q_128
;
2065 else if (Name
[9] == 'q' && VecWidth
== 256)
2066 IID
= Intrinsic::x86_avx512_conflict_q_256
;
2067 else if (Name
[9] == 'q' && VecWidth
== 512)
2068 IID
= Intrinsic::x86_avx512_conflict_q_512
;
2070 llvm_unreachable("Unexpected intrinsic");
2071 } else if (Name
.startswith("pavg.")) {
2072 if (Name
[5] == 'b' && VecWidth
== 128)
2073 IID
= Intrinsic::x86_sse2_pavg_b
;
2074 else if (Name
[5] == 'b' && VecWidth
== 256)
2075 IID
= Intrinsic::x86_avx2_pavg_b
;
2076 else if (Name
[5] == 'b' && VecWidth
== 512)
2077 IID
= Intrinsic::x86_avx512_pavg_b_512
;
2078 else if (Name
[5] == 'w' && VecWidth
== 128)
2079 IID
= Intrinsic::x86_sse2_pavg_w
;
2080 else if (Name
[5] == 'w' && VecWidth
== 256)
2081 IID
= Intrinsic::x86_avx2_pavg_w
;
2082 else if (Name
[5] == 'w' && VecWidth
== 512)
2083 IID
= Intrinsic::x86_avx512_pavg_w_512
;
2085 llvm_unreachable("Unexpected intrinsic");
2089 SmallVector
<Value
*, 4> Args(CI
.args());
2092 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(CI
.getModule(), IID
),
2094 unsigned NumArgs
= CI
.arg_size();
2095 Rep
= EmitX86Select(Builder
, CI
.getArgOperand(NumArgs
- 1), Rep
,
2096 CI
.getArgOperand(NumArgs
- 2));
2100 /// Upgrade comment in call to inline asm that represents an objc retain release
2102 void llvm::UpgradeInlineAsmString(std::string
*AsmStr
) {
2104 if (AsmStr
->find("mov\tfp") == 0 &&
2105 AsmStr
->find("objc_retainAutoreleaseReturnValue") != std::string::npos
&&
2106 (Pos
= AsmStr
->find("# marker")) != std::string::npos
) {
2107 AsmStr
->replace(Pos
, 1, ";");
2111 static Value
*UpgradeARMIntrinsicCall(StringRef Name
, CallBase
*CI
, Function
*F
,
2112 IRBuilder
<> &Builder
) {
2113 if (Name
== "mve.vctp64.old") {
2114 // Replace the old v4i1 vctp64 with a v2i1 vctp and predicate-casts to the
2116 Value
*VCTP
= Builder
.CreateCall(
2117 Intrinsic::getDeclaration(F
->getParent(), Intrinsic::arm_mve_vctp64
),
2118 CI
->getArgOperand(0), CI
->getName());
2119 Value
*C1
= Builder
.CreateCall(
2120 Intrinsic::getDeclaration(
2121 F
->getParent(), Intrinsic::arm_mve_pred_v2i
,
2122 {VectorType::get(Builder
.getInt1Ty(), 2, false)}),
2124 return Builder
.CreateCall(
2125 Intrinsic::getDeclaration(
2126 F
->getParent(), Intrinsic::arm_mve_pred_i2v
,
2127 {VectorType::get(Builder
.getInt1Ty(), 4, false)}),
2129 } else if (Name
== "mve.mull.int.predicated.v2i64.v4i32.v4i1" ||
2130 Name
== "mve.vqdmull.predicated.v2i64.v4i32.v4i1" ||
2131 Name
== "mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1" ||
2132 Name
== "mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1" ||
2134 "mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1" ||
2135 Name
== "mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v4i1" ||
2136 Name
== "mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1" ||
2137 Name
== "mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1" ||
2139 "mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1" ||
2140 Name
== "mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v4i1" ||
2141 Name
== "cde.vcx1q.predicated.v2i64.v4i1" ||
2142 Name
== "cde.vcx1qa.predicated.v2i64.v4i1" ||
2143 Name
== "cde.vcx2q.predicated.v2i64.v4i1" ||
2144 Name
== "cde.vcx2qa.predicated.v2i64.v4i1" ||
2145 Name
== "cde.vcx3q.predicated.v2i64.v4i1" ||
2146 Name
== "cde.vcx3qa.predicated.v2i64.v4i1") {
2147 std::vector
<Type
*> Tys
;
2148 unsigned ID
= CI
->getIntrinsicID();
2149 Type
*V2I1Ty
= FixedVectorType::get(Builder
.getInt1Ty(), 2);
2151 case Intrinsic::arm_mve_mull_int_predicated
:
2152 case Intrinsic::arm_mve_vqdmull_predicated
:
2153 case Intrinsic::arm_mve_vldr_gather_base_predicated
:
2154 Tys
= {CI
->getType(), CI
->getOperand(0)->getType(), V2I1Ty
};
2156 case Intrinsic::arm_mve_vldr_gather_base_wb_predicated
:
2157 case Intrinsic::arm_mve_vstr_scatter_base_predicated
:
2158 case Intrinsic::arm_mve_vstr_scatter_base_wb_predicated
:
2159 Tys
= {CI
->getOperand(0)->getType(), CI
->getOperand(0)->getType(),
2162 case Intrinsic::arm_mve_vldr_gather_offset_predicated
:
2163 Tys
= {CI
->getType(), CI
->getOperand(0)->getType(),
2164 CI
->getOperand(1)->getType(), V2I1Ty
};
2166 case Intrinsic::arm_mve_vstr_scatter_offset_predicated
:
2167 Tys
= {CI
->getOperand(0)->getType(), CI
->getOperand(1)->getType(),
2168 CI
->getOperand(2)->getType(), V2I1Ty
};
2170 case Intrinsic::arm_cde_vcx1q_predicated
:
2171 case Intrinsic::arm_cde_vcx1qa_predicated
:
2172 case Intrinsic::arm_cde_vcx2q_predicated
:
2173 case Intrinsic::arm_cde_vcx2qa_predicated
:
2174 case Intrinsic::arm_cde_vcx3q_predicated
:
2175 case Intrinsic::arm_cde_vcx3qa_predicated
:
2176 Tys
= {CI
->getOperand(1)->getType(), V2I1Ty
};
2179 llvm_unreachable("Unhandled Intrinsic!");
2182 std::vector
<Value
*> Ops
;
2183 for (Value
*Op
: CI
->args()) {
2184 Type
*Ty
= Op
->getType();
2185 if (Ty
->getScalarSizeInBits() == 1) {
2186 Value
*C1
= Builder
.CreateCall(
2187 Intrinsic::getDeclaration(
2188 F
->getParent(), Intrinsic::arm_mve_pred_v2i
,
2189 {VectorType::get(Builder
.getInt1Ty(), 4, false)}),
2191 Op
= Builder
.CreateCall(
2192 Intrinsic::getDeclaration(F
->getParent(),
2193 Intrinsic::arm_mve_pred_i2v
, {V2I1Ty
}),
2199 Function
*Fn
= Intrinsic::getDeclaration(F
->getParent(), ID
, Tys
);
2200 return Builder
.CreateCall(Fn
, Ops
, CI
->getName());
2202 llvm_unreachable("Unknown function for ARM CallBase upgrade.");
2205 static Value
*UpgradeAMDGCNIntrinsicCall(StringRef Name
, CallBase
*CI
,
2206 Function
*F
, IRBuilder
<> &Builder
) {
2207 const bool IsInc
= Name
.startswith("atomic.inc.");
2208 if (IsInc
|| Name
.startswith("atomic.dec.")) {
2209 if (CI
->getNumOperands() != 6) // Malformed bitcode.
2212 AtomicRMWInst::BinOp RMWOp
=
2213 IsInc
? AtomicRMWInst::UIncWrap
: AtomicRMWInst::UDecWrap
;
2215 Value
*Ptr
= CI
->getArgOperand(0);
2216 Value
*Val
= CI
->getArgOperand(1);
2217 ConstantInt
*OrderArg
= dyn_cast
<ConstantInt
>(CI
->getArgOperand(2));
2218 ConstantInt
*VolatileArg
= dyn_cast
<ConstantInt
>(CI
->getArgOperand(4));
2220 AtomicOrdering Order
= AtomicOrdering::SequentiallyConsistent
;
2221 if (OrderArg
&& isValidAtomicOrdering(OrderArg
->getZExtValue()))
2222 Order
= static_cast<AtomicOrdering
>(OrderArg
->getZExtValue());
2223 if (Order
== AtomicOrdering::NotAtomic
||
2224 Order
== AtomicOrdering::Unordered
)
2225 Order
= AtomicOrdering::SequentiallyConsistent
;
2227 // The scope argument never really worked correctly. Use agent as the most
2228 // conservative option which should still always produce the instruction.
2229 SyncScope::ID SSID
= F
->getContext().getOrInsertSyncScopeID("agent");
2230 AtomicRMWInst
*RMW
=
2231 Builder
.CreateAtomicRMW(RMWOp
, Ptr
, Val
, std::nullopt
, Order
, SSID
);
2233 if (!VolatileArg
|| !VolatileArg
->isZero())
2234 RMW
->setVolatile(true);
2238 llvm_unreachable("Unknown function for AMDGPU intrinsic upgrade.");
2241 /// Upgrade a call to an old intrinsic. All argument and return casting must be
2242 /// provided to seamlessly integrate with existing context.
2243 void llvm::UpgradeIntrinsicCall(CallBase
*CI
, Function
*NewFn
) {
2244 // Note dyn_cast to Function is not quite the same as getCalledFunction, which
2245 // checks the callee's function type matches. It's likely we need to handle
2246 // type changes here.
2247 Function
*F
= dyn_cast
<Function
>(CI
->getCalledOperand());
2251 LLVMContext
&C
= CI
->getContext();
2252 IRBuilder
<> Builder(C
);
2253 Builder
.SetInsertPoint(CI
->getParent(), CI
->getIterator());
2256 // Get the Function's name.
2257 StringRef Name
= F
->getName();
2259 assert(Name
.startswith("llvm.") && "Intrinsic doesn't start with 'llvm.'");
2260 Name
= Name
.substr(5);
2262 bool IsX86
= Name
.startswith("x86.");
2264 Name
= Name
.substr(4);
2265 bool IsNVVM
= Name
.startswith("nvvm.");
2267 Name
= Name
.substr(5);
2268 bool IsARM
= Name
.startswith("arm.");
2270 Name
= Name
.substr(4);
2271 bool IsAMDGCN
= Name
.startswith("amdgcn.");
2273 Name
= Name
.substr(7);
2275 if (IsX86
&& Name
.startswith("sse4a.movnt.")) {
2276 SmallVector
<Metadata
*, 1> Elts
;
2278 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C
), 1)));
2279 MDNode
*Node
= MDNode::get(C
, Elts
);
2281 Value
*Arg0
= CI
->getArgOperand(0);
2282 Value
*Arg1
= CI
->getArgOperand(1);
2284 // Nontemporal (unaligned) store of the 0'th element of the float/double
2286 Type
*SrcEltTy
= cast
<VectorType
>(Arg1
->getType())->getElementType();
2287 PointerType
*EltPtrTy
= PointerType::getUnqual(SrcEltTy
);
2288 Value
*Addr
= Builder
.CreateBitCast(Arg0
, EltPtrTy
, "cast");
2290 Builder
.CreateExtractElement(Arg1
, (uint64_t)0, "extractelement");
2292 StoreInst
*SI
= Builder
.CreateAlignedStore(Extract
, Addr
, Align(1));
2293 SI
->setMetadata(LLVMContext::MD_nontemporal
, Node
);
2295 // Remove intrinsic.
2296 CI
->eraseFromParent();
2300 if (IsX86
&& (Name
.startswith("avx.movnt.") ||
2301 Name
.startswith("avx512.storent."))) {
2302 SmallVector
<Metadata
*, 1> Elts
;
2304 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C
), 1)));
2305 MDNode
*Node
= MDNode::get(C
, Elts
);
2307 Value
*Arg0
= CI
->getArgOperand(0);
2308 Value
*Arg1
= CI
->getArgOperand(1);
2310 // Convert the type of the pointer to a pointer to the stored type.
2311 Value
*BC
= Builder
.CreateBitCast(Arg0
,
2312 PointerType::getUnqual(Arg1
->getType()),
2314 StoreInst
*SI
= Builder
.CreateAlignedStore(
2316 Align(Arg1
->getType()->getPrimitiveSizeInBits().getFixedValue() / 8));
2317 SI
->setMetadata(LLVMContext::MD_nontemporal
, Node
);
2319 // Remove intrinsic.
2320 CI
->eraseFromParent();
2324 if (IsX86
&& Name
== "sse2.storel.dq") {
2325 Value
*Arg0
= CI
->getArgOperand(0);
2326 Value
*Arg1
= CI
->getArgOperand(1);
2328 auto *NewVecTy
= FixedVectorType::get(Type::getInt64Ty(C
), 2);
2329 Value
*BC0
= Builder
.CreateBitCast(Arg1
, NewVecTy
, "cast");
2330 Value
*Elt
= Builder
.CreateExtractElement(BC0
, (uint64_t)0);
2331 Value
*BC
= Builder
.CreateBitCast(Arg0
,
2332 PointerType::getUnqual(Elt
->getType()),
2334 Builder
.CreateAlignedStore(Elt
, BC
, Align(1));
2336 // Remove intrinsic.
2337 CI
->eraseFromParent();
2341 if (IsX86
&& (Name
.startswith("sse.storeu.") ||
2342 Name
.startswith("sse2.storeu.") ||
2343 Name
.startswith("avx.storeu."))) {
2344 Value
*Arg0
= CI
->getArgOperand(0);
2345 Value
*Arg1
= CI
->getArgOperand(1);
2347 Arg0
= Builder
.CreateBitCast(Arg0
,
2348 PointerType::getUnqual(Arg1
->getType()),
2350 Builder
.CreateAlignedStore(Arg1
, Arg0
, Align(1));
2352 // Remove intrinsic.
2353 CI
->eraseFromParent();
2357 if (IsX86
&& Name
== "avx512.mask.store.ss") {
2358 Value
*Mask
= Builder
.CreateAnd(CI
->getArgOperand(2), Builder
.getInt8(1));
2359 UpgradeMaskedStore(Builder
, CI
->getArgOperand(0), CI
->getArgOperand(1),
2362 // Remove intrinsic.
2363 CI
->eraseFromParent();
2367 if (IsX86
&& (Name
.startswith("avx512.mask.store"))) {
2368 // "avx512.mask.storeu." or "avx512.mask.store."
2369 bool Aligned
= Name
[17] != 'u'; // "avx512.mask.storeu".
2370 UpgradeMaskedStore(Builder
, CI
->getArgOperand(0), CI
->getArgOperand(1),
2371 CI
->getArgOperand(2), Aligned
);
2373 // Remove intrinsic.
2374 CI
->eraseFromParent();
2379 // Upgrade packed integer vector compare intrinsics to compare instructions.
2380 if (IsX86
&& (Name
.startswith("sse2.pcmp") ||
2381 Name
.startswith("avx2.pcmp"))) {
2382 // "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt."
2383 bool CmpEq
= Name
[9] == 'e';
2384 Rep
= Builder
.CreateICmp(CmpEq
? ICmpInst::ICMP_EQ
: ICmpInst::ICMP_SGT
,
2385 CI
->getArgOperand(0), CI
->getArgOperand(1));
2386 Rep
= Builder
.CreateSExt(Rep
, CI
->getType(), "");
2387 } else if (IsX86
&& (Name
.startswith("avx512.broadcastm"))) {
2388 Type
*ExtTy
= Type::getInt32Ty(C
);
2389 if (CI
->getOperand(0)->getType()->isIntegerTy(8))
2390 ExtTy
= Type::getInt64Ty(C
);
2391 unsigned NumElts
= CI
->getType()->getPrimitiveSizeInBits() /
2392 ExtTy
->getPrimitiveSizeInBits();
2393 Rep
= Builder
.CreateZExt(CI
->getArgOperand(0), ExtTy
);
2394 Rep
= Builder
.CreateVectorSplat(NumElts
, Rep
);
2395 } else if (IsX86
&& (Name
== "sse.sqrt.ss" ||
2396 Name
== "sse2.sqrt.sd")) {
2397 Value
*Vec
= CI
->getArgOperand(0);
2398 Value
*Elt0
= Builder
.CreateExtractElement(Vec
, (uint64_t)0);
2399 Function
*Intr
= Intrinsic::getDeclaration(F
->getParent(),
2400 Intrinsic::sqrt
, Elt0
->getType());
2401 Elt0
= Builder
.CreateCall(Intr
, Elt0
);
2402 Rep
= Builder
.CreateInsertElement(Vec
, Elt0
, (uint64_t)0);
2403 } else if (IsX86
&& (Name
.startswith("avx.sqrt.p") ||
2404 Name
.startswith("sse2.sqrt.p") ||
2405 Name
.startswith("sse.sqrt.p"))) {
2406 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(F
->getParent(),
2409 {CI
->getArgOperand(0)});
2410 } else if (IsX86
&& (Name
.startswith("avx512.mask.sqrt.p"))) {
2411 if (CI
->arg_size() == 4 &&
2412 (!isa
<ConstantInt
>(CI
->getArgOperand(3)) ||
2413 cast
<ConstantInt
>(CI
->getArgOperand(3))->getZExtValue() != 4)) {
2414 Intrinsic::ID IID
= Name
[18] == 's' ? Intrinsic::x86_avx512_sqrt_ps_512
2415 : Intrinsic::x86_avx512_sqrt_pd_512
;
2417 Value
*Args
[] = { CI
->getArgOperand(0), CI
->getArgOperand(3) };
2418 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(CI
->getModule(),
2421 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(F
->getParent(),
2424 {CI
->getArgOperand(0)});
2426 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(2), Rep
,
2427 CI
->getArgOperand(1));
2428 } else if (IsX86
&& (Name
.startswith("avx512.ptestm") ||
2429 Name
.startswith("avx512.ptestnm"))) {
2430 Value
*Op0
= CI
->getArgOperand(0);
2431 Value
*Op1
= CI
->getArgOperand(1);
2432 Value
*Mask
= CI
->getArgOperand(2);
2433 Rep
= Builder
.CreateAnd(Op0
, Op1
);
2434 llvm::Type
*Ty
= Op0
->getType();
2435 Value
*Zero
= llvm::Constant::getNullValue(Ty
);
2436 ICmpInst::Predicate Pred
=
2437 Name
.startswith("avx512.ptestm") ? ICmpInst::ICMP_NE
: ICmpInst::ICMP_EQ
;
2438 Rep
= Builder
.CreateICmp(Pred
, Rep
, Zero
);
2439 Rep
= ApplyX86MaskOn1BitsVec(Builder
, Rep
, Mask
);
2440 } else if (IsX86
&& (Name
.startswith("avx512.mask.pbroadcast"))){
2441 unsigned NumElts
= cast
<FixedVectorType
>(CI
->getArgOperand(1)->getType())
2443 Rep
= Builder
.CreateVectorSplat(NumElts
, CI
->getArgOperand(0));
2444 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(2), Rep
,
2445 CI
->getArgOperand(1));
2446 } else if (IsX86
&& (Name
.startswith("avx512.kunpck"))) {
2447 unsigned NumElts
= CI
->getType()->getScalarSizeInBits();
2448 Value
*LHS
= getX86MaskVec(Builder
, CI
->getArgOperand(0), NumElts
);
2449 Value
*RHS
= getX86MaskVec(Builder
, CI
->getArgOperand(1), NumElts
);
2451 for (unsigned i
= 0; i
!= NumElts
; ++i
)
2454 // First extract half of each vector. This gives better codegen than
2455 // doing it in a single shuffle.
2457 Builder
.CreateShuffleVector(LHS
, LHS
, ArrayRef(Indices
, NumElts
/ 2));
2459 Builder
.CreateShuffleVector(RHS
, RHS
, ArrayRef(Indices
, NumElts
/ 2));
2460 // Concat the vectors.
2461 // NOTE: Operands have to be swapped to match intrinsic definition.
2462 Rep
= Builder
.CreateShuffleVector(RHS
, LHS
, ArrayRef(Indices
, NumElts
));
2463 Rep
= Builder
.CreateBitCast(Rep
, CI
->getType());
2464 } else if (IsX86
&& Name
== "avx512.kand.w") {
2465 Value
*LHS
= getX86MaskVec(Builder
, CI
->getArgOperand(0), 16);
2466 Value
*RHS
= getX86MaskVec(Builder
, CI
->getArgOperand(1), 16);
2467 Rep
= Builder
.CreateAnd(LHS
, RHS
);
2468 Rep
= Builder
.CreateBitCast(Rep
, CI
->getType());
2469 } else if (IsX86
&& Name
== "avx512.kandn.w") {
2470 Value
*LHS
= getX86MaskVec(Builder
, CI
->getArgOperand(0), 16);
2471 Value
*RHS
= getX86MaskVec(Builder
, CI
->getArgOperand(1), 16);
2472 LHS
= Builder
.CreateNot(LHS
);
2473 Rep
= Builder
.CreateAnd(LHS
, RHS
);
2474 Rep
= Builder
.CreateBitCast(Rep
, CI
->getType());
2475 } else if (IsX86
&& Name
== "avx512.kor.w") {
2476 Value
*LHS
= getX86MaskVec(Builder
, CI
->getArgOperand(0), 16);
2477 Value
*RHS
= getX86MaskVec(Builder
, CI
->getArgOperand(1), 16);
2478 Rep
= Builder
.CreateOr(LHS
, RHS
);
2479 Rep
= Builder
.CreateBitCast(Rep
, CI
->getType());
2480 } else if (IsX86
&& Name
== "avx512.kxor.w") {
2481 Value
*LHS
= getX86MaskVec(Builder
, CI
->getArgOperand(0), 16);
2482 Value
*RHS
= getX86MaskVec(Builder
, CI
->getArgOperand(1), 16);
2483 Rep
= Builder
.CreateXor(LHS
, RHS
);
2484 Rep
= Builder
.CreateBitCast(Rep
, CI
->getType());
2485 } else if (IsX86
&& Name
== "avx512.kxnor.w") {
2486 Value
*LHS
= getX86MaskVec(Builder
, CI
->getArgOperand(0), 16);
2487 Value
*RHS
= getX86MaskVec(Builder
, CI
->getArgOperand(1), 16);
2488 LHS
= Builder
.CreateNot(LHS
);
2489 Rep
= Builder
.CreateXor(LHS
, RHS
);
2490 Rep
= Builder
.CreateBitCast(Rep
, CI
->getType());
2491 } else if (IsX86
&& Name
== "avx512.knot.w") {
2492 Rep
= getX86MaskVec(Builder
, CI
->getArgOperand(0), 16);
2493 Rep
= Builder
.CreateNot(Rep
);
2494 Rep
= Builder
.CreateBitCast(Rep
, CI
->getType());
2496 (Name
== "avx512.kortestz.w" || Name
== "avx512.kortestc.w")) {
2497 Value
*LHS
= getX86MaskVec(Builder
, CI
->getArgOperand(0), 16);
2498 Value
*RHS
= getX86MaskVec(Builder
, CI
->getArgOperand(1), 16);
2499 Rep
= Builder
.CreateOr(LHS
, RHS
);
2500 Rep
= Builder
.CreateBitCast(Rep
, Builder
.getInt16Ty());
2502 if (Name
[14] == 'c')
2503 C
= ConstantInt::getAllOnesValue(Builder
.getInt16Ty());
2505 C
= ConstantInt::getNullValue(Builder
.getInt16Ty());
2506 Rep
= Builder
.CreateICmpEQ(Rep
, C
);
2507 Rep
= Builder
.CreateZExt(Rep
, Builder
.getInt32Ty());
2508 } else if (IsX86
&& (Name
== "sse.add.ss" || Name
== "sse2.add.sd" ||
2509 Name
== "sse.sub.ss" || Name
== "sse2.sub.sd" ||
2510 Name
== "sse.mul.ss" || Name
== "sse2.mul.sd" ||
2511 Name
== "sse.div.ss" || Name
== "sse2.div.sd")) {
2512 Type
*I32Ty
= Type::getInt32Ty(C
);
2513 Value
*Elt0
= Builder
.CreateExtractElement(CI
->getArgOperand(0),
2514 ConstantInt::get(I32Ty
, 0));
2515 Value
*Elt1
= Builder
.CreateExtractElement(CI
->getArgOperand(1),
2516 ConstantInt::get(I32Ty
, 0));
2518 if (Name
.contains(".add."))
2519 EltOp
= Builder
.CreateFAdd(Elt0
, Elt1
);
2520 else if (Name
.contains(".sub."))
2521 EltOp
= Builder
.CreateFSub(Elt0
, Elt1
);
2522 else if (Name
.contains(".mul."))
2523 EltOp
= Builder
.CreateFMul(Elt0
, Elt1
);
2525 EltOp
= Builder
.CreateFDiv(Elt0
, Elt1
);
2526 Rep
= Builder
.CreateInsertElement(CI
->getArgOperand(0), EltOp
,
2527 ConstantInt::get(I32Ty
, 0));
2528 } else if (IsX86
&& Name
.startswith("avx512.mask.pcmp")) {
2529 // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt."
2530 bool CmpEq
= Name
[16] == 'e';
2531 Rep
= upgradeMaskedCompare(Builder
, *CI
, CmpEq
? 0 : 6, true);
2532 } else if (IsX86
&& Name
.startswith("avx512.mask.vpshufbitqmb.")) {
2533 Type
*OpTy
= CI
->getArgOperand(0)->getType();
2534 unsigned VecWidth
= OpTy
->getPrimitiveSizeInBits();
2537 default: llvm_unreachable("Unexpected intrinsic");
2538 case 128: IID
= Intrinsic::x86_avx512_vpshufbitqmb_128
; break;
2539 case 256: IID
= Intrinsic::x86_avx512_vpshufbitqmb_256
; break;
2540 case 512: IID
= Intrinsic::x86_avx512_vpshufbitqmb_512
; break;
2543 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(F
->getParent(), IID
),
2544 { CI
->getOperand(0), CI
->getArgOperand(1) });
2545 Rep
= ApplyX86MaskOn1BitsVec(Builder
, Rep
, CI
->getArgOperand(2));
2546 } else if (IsX86
&& Name
.startswith("avx512.mask.fpclass.p")) {
2547 Type
*OpTy
= CI
->getArgOperand(0)->getType();
2548 unsigned VecWidth
= OpTy
->getPrimitiveSizeInBits();
2549 unsigned EltWidth
= OpTy
->getScalarSizeInBits();
2551 if (VecWidth
== 128 && EltWidth
== 32)
2552 IID
= Intrinsic::x86_avx512_fpclass_ps_128
;
2553 else if (VecWidth
== 256 && EltWidth
== 32)
2554 IID
= Intrinsic::x86_avx512_fpclass_ps_256
;
2555 else if (VecWidth
== 512 && EltWidth
== 32)
2556 IID
= Intrinsic::x86_avx512_fpclass_ps_512
;
2557 else if (VecWidth
== 128 && EltWidth
== 64)
2558 IID
= Intrinsic::x86_avx512_fpclass_pd_128
;
2559 else if (VecWidth
== 256 && EltWidth
== 64)
2560 IID
= Intrinsic::x86_avx512_fpclass_pd_256
;
2561 else if (VecWidth
== 512 && EltWidth
== 64)
2562 IID
= Intrinsic::x86_avx512_fpclass_pd_512
;
2564 llvm_unreachable("Unexpected intrinsic");
2566 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(F
->getParent(), IID
),
2567 { CI
->getOperand(0), CI
->getArgOperand(1) });
2568 Rep
= ApplyX86MaskOn1BitsVec(Builder
, Rep
, CI
->getArgOperand(2));
2569 } else if (IsX86
&& Name
.startswith("avx512.cmp.p")) {
2570 SmallVector
<Value
*, 4> Args(CI
->args());
2571 Type
*OpTy
= Args
[0]->getType();
2572 unsigned VecWidth
= OpTy
->getPrimitiveSizeInBits();
2573 unsigned EltWidth
= OpTy
->getScalarSizeInBits();
2575 if (VecWidth
== 128 && EltWidth
== 32)
2576 IID
= Intrinsic::x86_avx512_mask_cmp_ps_128
;
2577 else if (VecWidth
== 256 && EltWidth
== 32)
2578 IID
= Intrinsic::x86_avx512_mask_cmp_ps_256
;
2579 else if (VecWidth
== 512 && EltWidth
== 32)
2580 IID
= Intrinsic::x86_avx512_mask_cmp_ps_512
;
2581 else if (VecWidth
== 128 && EltWidth
== 64)
2582 IID
= Intrinsic::x86_avx512_mask_cmp_pd_128
;
2583 else if (VecWidth
== 256 && EltWidth
== 64)
2584 IID
= Intrinsic::x86_avx512_mask_cmp_pd_256
;
2585 else if (VecWidth
== 512 && EltWidth
== 64)
2586 IID
= Intrinsic::x86_avx512_mask_cmp_pd_512
;
2588 llvm_unreachable("Unexpected intrinsic");
2590 Value
*Mask
= Constant::getAllOnesValue(CI
->getType());
2591 if (VecWidth
== 512)
2592 std::swap(Mask
, Args
.back());
2593 Args
.push_back(Mask
);
2595 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(F
->getParent(), IID
),
2597 } else if (IsX86
&& Name
.startswith("avx512.mask.cmp.")) {
2598 // Integer compare intrinsics.
2599 unsigned Imm
= cast
<ConstantInt
>(CI
->getArgOperand(2))->getZExtValue();
2600 Rep
= upgradeMaskedCompare(Builder
, *CI
, Imm
, true);
2601 } else if (IsX86
&& Name
.startswith("avx512.mask.ucmp.")) {
2602 unsigned Imm
= cast
<ConstantInt
>(CI
->getArgOperand(2))->getZExtValue();
2603 Rep
= upgradeMaskedCompare(Builder
, *CI
, Imm
, false);
2604 } else if (IsX86
&& (Name
.startswith("avx512.cvtb2mask.") ||
2605 Name
.startswith("avx512.cvtw2mask.") ||
2606 Name
.startswith("avx512.cvtd2mask.") ||
2607 Name
.startswith("avx512.cvtq2mask."))) {
2608 Value
*Op
= CI
->getArgOperand(0);
2609 Value
*Zero
= llvm::Constant::getNullValue(Op
->getType());
2610 Rep
= Builder
.CreateICmp(ICmpInst::ICMP_SLT
, Op
, Zero
);
2611 Rep
= ApplyX86MaskOn1BitsVec(Builder
, Rep
, nullptr);
2612 } else if(IsX86
&& (Name
== "ssse3.pabs.b.128" ||
2613 Name
== "ssse3.pabs.w.128" ||
2614 Name
== "ssse3.pabs.d.128" ||
2615 Name
.startswith("avx2.pabs") ||
2616 Name
.startswith("avx512.mask.pabs"))) {
2617 Rep
= upgradeAbs(Builder
, *CI
);
2618 } else if (IsX86
&& (Name
== "sse41.pmaxsb" ||
2619 Name
== "sse2.pmaxs.w" ||
2620 Name
== "sse41.pmaxsd" ||
2621 Name
.startswith("avx2.pmaxs") ||
2622 Name
.startswith("avx512.mask.pmaxs"))) {
2623 Rep
= UpgradeX86BinaryIntrinsics(Builder
, *CI
, Intrinsic::smax
);
2624 } else if (IsX86
&& (Name
== "sse2.pmaxu.b" ||
2625 Name
== "sse41.pmaxuw" ||
2626 Name
== "sse41.pmaxud" ||
2627 Name
.startswith("avx2.pmaxu") ||
2628 Name
.startswith("avx512.mask.pmaxu"))) {
2629 Rep
= UpgradeX86BinaryIntrinsics(Builder
, *CI
, Intrinsic::umax
);
2630 } else if (IsX86
&& (Name
== "sse41.pminsb" ||
2631 Name
== "sse2.pmins.w" ||
2632 Name
== "sse41.pminsd" ||
2633 Name
.startswith("avx2.pmins") ||
2634 Name
.startswith("avx512.mask.pmins"))) {
2635 Rep
= UpgradeX86BinaryIntrinsics(Builder
, *CI
, Intrinsic::smin
);
2636 } else if (IsX86
&& (Name
== "sse2.pminu.b" ||
2637 Name
== "sse41.pminuw" ||
2638 Name
== "sse41.pminud" ||
2639 Name
.startswith("avx2.pminu") ||
2640 Name
.startswith("avx512.mask.pminu"))) {
2641 Rep
= UpgradeX86BinaryIntrinsics(Builder
, *CI
, Intrinsic::umin
);
2642 } else if (IsX86
&& (Name
== "sse2.pmulu.dq" ||
2643 Name
== "avx2.pmulu.dq" ||
2644 Name
== "avx512.pmulu.dq.512" ||
2645 Name
.startswith("avx512.mask.pmulu.dq."))) {
2646 Rep
= upgradePMULDQ(Builder
, *CI
, /*Signed*/false);
2647 } else if (IsX86
&& (Name
== "sse41.pmuldq" ||
2648 Name
== "avx2.pmul.dq" ||
2649 Name
== "avx512.pmul.dq.512" ||
2650 Name
.startswith("avx512.mask.pmul.dq."))) {
2651 Rep
= upgradePMULDQ(Builder
, *CI
, /*Signed*/true);
2652 } else if (IsX86
&& (Name
== "sse.cvtsi2ss" ||
2653 Name
== "sse2.cvtsi2sd" ||
2654 Name
== "sse.cvtsi642ss" ||
2655 Name
== "sse2.cvtsi642sd")) {
2656 Rep
= Builder
.CreateSIToFP(
2657 CI
->getArgOperand(1),
2658 cast
<VectorType
>(CI
->getType())->getElementType());
2659 Rep
= Builder
.CreateInsertElement(CI
->getArgOperand(0), Rep
, (uint64_t)0);
2660 } else if (IsX86
&& Name
== "avx512.cvtusi2sd") {
2661 Rep
= Builder
.CreateUIToFP(
2662 CI
->getArgOperand(1),
2663 cast
<VectorType
>(CI
->getType())->getElementType());
2664 Rep
= Builder
.CreateInsertElement(CI
->getArgOperand(0), Rep
, (uint64_t)0);
2665 } else if (IsX86
&& Name
== "sse2.cvtss2sd") {
2666 Rep
= Builder
.CreateExtractElement(CI
->getArgOperand(1), (uint64_t)0);
2667 Rep
= Builder
.CreateFPExt(
2668 Rep
, cast
<VectorType
>(CI
->getType())->getElementType());
2669 Rep
= Builder
.CreateInsertElement(CI
->getArgOperand(0), Rep
, (uint64_t)0);
2670 } else if (IsX86
&& (Name
== "sse2.cvtdq2pd" ||
2671 Name
== "sse2.cvtdq2ps" ||
2672 Name
== "avx.cvtdq2.pd.256" ||
2673 Name
== "avx.cvtdq2.ps.256" ||
2674 Name
.startswith("avx512.mask.cvtdq2pd.") ||
2675 Name
.startswith("avx512.mask.cvtudq2pd.") ||
2676 Name
.startswith("avx512.mask.cvtdq2ps.") ||
2677 Name
.startswith("avx512.mask.cvtudq2ps.") ||
2678 Name
.startswith("avx512.mask.cvtqq2pd.") ||
2679 Name
.startswith("avx512.mask.cvtuqq2pd.") ||
2680 Name
== "avx512.mask.cvtqq2ps.256" ||
2681 Name
== "avx512.mask.cvtqq2ps.512" ||
2682 Name
== "avx512.mask.cvtuqq2ps.256" ||
2683 Name
== "avx512.mask.cvtuqq2ps.512" ||
2684 Name
== "sse2.cvtps2pd" ||
2685 Name
== "avx.cvt.ps2.pd.256" ||
2686 Name
== "avx512.mask.cvtps2pd.128" ||
2687 Name
== "avx512.mask.cvtps2pd.256")) {
2688 auto *DstTy
= cast
<FixedVectorType
>(CI
->getType());
2689 Rep
= CI
->getArgOperand(0);
2690 auto *SrcTy
= cast
<FixedVectorType
>(Rep
->getType());
2692 unsigned NumDstElts
= DstTy
->getNumElements();
2693 if (NumDstElts
< SrcTy
->getNumElements()) {
2694 assert(NumDstElts
== 2 && "Unexpected vector size");
2695 Rep
= Builder
.CreateShuffleVector(Rep
, Rep
, ArrayRef
<int>{0, 1});
2698 bool IsPS2PD
= SrcTy
->getElementType()->isFloatTy();
2699 bool IsUnsigned
= (StringRef::npos
!= Name
.find("cvtu"));
2701 Rep
= Builder
.CreateFPExt(Rep
, DstTy
, "cvtps2pd");
2702 else if (CI
->arg_size() == 4 &&
2703 (!isa
<ConstantInt
>(CI
->getArgOperand(3)) ||
2704 cast
<ConstantInt
>(CI
->getArgOperand(3))->getZExtValue() != 4)) {
2705 Intrinsic::ID IID
= IsUnsigned
? Intrinsic::x86_avx512_uitofp_round
2706 : Intrinsic::x86_avx512_sitofp_round
;
2707 Function
*F
= Intrinsic::getDeclaration(CI
->getModule(), IID
,
2709 Rep
= Builder
.CreateCall(F
, { Rep
, CI
->getArgOperand(3) });
2711 Rep
= IsUnsigned
? Builder
.CreateUIToFP(Rep
, DstTy
, "cvt")
2712 : Builder
.CreateSIToFP(Rep
, DstTy
, "cvt");
2715 if (CI
->arg_size() >= 3)
2716 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(2), Rep
,
2717 CI
->getArgOperand(1));
2718 } else if (IsX86
&& (Name
.startswith("avx512.mask.vcvtph2ps.") ||
2719 Name
.startswith("vcvtph2ps."))) {
2720 auto *DstTy
= cast
<FixedVectorType
>(CI
->getType());
2721 Rep
= CI
->getArgOperand(0);
2722 auto *SrcTy
= cast
<FixedVectorType
>(Rep
->getType());
2723 unsigned NumDstElts
= DstTy
->getNumElements();
2724 if (NumDstElts
!= SrcTy
->getNumElements()) {
2725 assert(NumDstElts
== 4 && "Unexpected vector size");
2726 Rep
= Builder
.CreateShuffleVector(Rep
, Rep
, ArrayRef
<int>{0, 1, 2, 3});
2728 Rep
= Builder
.CreateBitCast(
2729 Rep
, FixedVectorType::get(Type::getHalfTy(C
), NumDstElts
));
2730 Rep
= Builder
.CreateFPExt(Rep
, DstTy
, "cvtph2ps");
2731 if (CI
->arg_size() >= 3)
2732 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(2), Rep
,
2733 CI
->getArgOperand(1));
2734 } else if (IsX86
&& Name
.startswith("avx512.mask.load")) {
2735 // "avx512.mask.loadu." or "avx512.mask.load."
2736 bool Aligned
= Name
[16] != 'u'; // "avx512.mask.loadu".
2738 UpgradeMaskedLoad(Builder
, CI
->getArgOperand(0), CI
->getArgOperand(1),
2739 CI
->getArgOperand(2), Aligned
);
2740 } else if (IsX86
&& Name
.startswith("avx512.mask.expand.load.")) {
2741 auto *ResultTy
= cast
<FixedVectorType
>(CI
->getType());
2742 Type
*PtrTy
= ResultTy
->getElementType();
2744 // Cast the pointer to element type.
2745 Value
*Ptr
= Builder
.CreateBitCast(CI
->getOperand(0),
2746 llvm::PointerType::getUnqual(PtrTy
));
2748 Value
*MaskVec
= getX86MaskVec(Builder
, CI
->getArgOperand(2),
2749 ResultTy
->getNumElements());
2751 Function
*ELd
= Intrinsic::getDeclaration(F
->getParent(),
2752 Intrinsic::masked_expandload
,
2754 Rep
= Builder
.CreateCall(ELd
, { Ptr
, MaskVec
, CI
->getOperand(1) });
2755 } else if (IsX86
&& Name
.startswith("avx512.mask.compress.store.")) {
2756 auto *ResultTy
= cast
<VectorType
>(CI
->getArgOperand(1)->getType());
2757 Type
*PtrTy
= ResultTy
->getElementType();
2759 // Cast the pointer to element type.
2760 Value
*Ptr
= Builder
.CreateBitCast(CI
->getOperand(0),
2761 llvm::PointerType::getUnqual(PtrTy
));
2764 getX86MaskVec(Builder
, CI
->getArgOperand(2),
2765 cast
<FixedVectorType
>(ResultTy
)->getNumElements());
2767 Function
*CSt
= Intrinsic::getDeclaration(F
->getParent(),
2768 Intrinsic::masked_compressstore
,
2770 Rep
= Builder
.CreateCall(CSt
, { CI
->getArgOperand(1), Ptr
, MaskVec
});
2771 } else if (IsX86
&& (Name
.startswith("avx512.mask.compress.") ||
2772 Name
.startswith("avx512.mask.expand."))) {
2773 auto *ResultTy
= cast
<FixedVectorType
>(CI
->getType());
2775 Value
*MaskVec
= getX86MaskVec(Builder
, CI
->getArgOperand(2),
2776 ResultTy
->getNumElements());
2778 bool IsCompress
= Name
[12] == 'c';
2779 Intrinsic::ID IID
= IsCompress
? Intrinsic::x86_avx512_mask_compress
2780 : Intrinsic::x86_avx512_mask_expand
;
2781 Function
*Intr
= Intrinsic::getDeclaration(F
->getParent(), IID
, ResultTy
);
2782 Rep
= Builder
.CreateCall(Intr
, { CI
->getOperand(0), CI
->getOperand(1),
2784 } else if (IsX86
&& Name
.startswith("xop.vpcom")) {
2786 if (Name
.endswith("ub") || Name
.endswith("uw") || Name
.endswith("ud") ||
2787 Name
.endswith("uq"))
2789 else if (Name
.endswith("b") || Name
.endswith("w") || Name
.endswith("d") ||
2793 llvm_unreachable("Unknown suffix");
2796 if (CI
->arg_size() == 3) {
2797 Imm
= cast
<ConstantInt
>(CI
->getArgOperand(2))->getZExtValue();
2799 Name
= Name
.substr(9); // strip off "xop.vpcom"
2800 if (Name
.startswith("lt"))
2802 else if (Name
.startswith("le"))
2804 else if (Name
.startswith("gt"))
2806 else if (Name
.startswith("ge"))
2808 else if (Name
.startswith("eq"))
2810 else if (Name
.startswith("ne"))
2812 else if (Name
.startswith("false"))
2814 else if (Name
.startswith("true"))
2817 llvm_unreachable("Unknown condition");
2820 Rep
= upgradeX86vpcom(Builder
, *CI
, Imm
, IsSigned
);
2821 } else if (IsX86
&& Name
.startswith("xop.vpcmov")) {
2822 Value
*Sel
= CI
->getArgOperand(2);
2823 Value
*NotSel
= Builder
.CreateNot(Sel
);
2824 Value
*Sel0
= Builder
.CreateAnd(CI
->getArgOperand(0), Sel
);
2825 Value
*Sel1
= Builder
.CreateAnd(CI
->getArgOperand(1), NotSel
);
2826 Rep
= Builder
.CreateOr(Sel0
, Sel1
);
2827 } else if (IsX86
&& (Name
.startswith("xop.vprot") ||
2828 Name
.startswith("avx512.prol") ||
2829 Name
.startswith("avx512.mask.prol"))) {
2830 Rep
= upgradeX86Rotate(Builder
, *CI
, false);
2831 } else if (IsX86
&& (Name
.startswith("avx512.pror") ||
2832 Name
.startswith("avx512.mask.pror"))) {
2833 Rep
= upgradeX86Rotate(Builder
, *CI
, true);
2834 } else if (IsX86
&& (Name
.startswith("avx512.vpshld.") ||
2835 Name
.startswith("avx512.mask.vpshld") ||
2836 Name
.startswith("avx512.maskz.vpshld"))) {
2837 bool ZeroMask
= Name
[11] == 'z';
2838 Rep
= upgradeX86ConcatShift(Builder
, *CI
, false, ZeroMask
);
2839 } else if (IsX86
&& (Name
.startswith("avx512.vpshrd.") ||
2840 Name
.startswith("avx512.mask.vpshrd") ||
2841 Name
.startswith("avx512.maskz.vpshrd"))) {
2842 bool ZeroMask
= Name
[11] == 'z';
2843 Rep
= upgradeX86ConcatShift(Builder
, *CI
, true, ZeroMask
);
2844 } else if (IsX86
&& Name
== "sse42.crc32.64.8") {
2845 Function
*CRC32
= Intrinsic::getDeclaration(F
->getParent(),
2846 Intrinsic::x86_sse42_crc32_32_8
);
2847 Value
*Trunc0
= Builder
.CreateTrunc(CI
->getArgOperand(0), Type::getInt32Ty(C
));
2848 Rep
= Builder
.CreateCall(CRC32
, {Trunc0
, CI
->getArgOperand(1)});
2849 Rep
= Builder
.CreateZExt(Rep
, CI
->getType(), "");
2850 } else if (IsX86
&& (Name
.startswith("avx.vbroadcast.s") ||
2851 Name
.startswith("avx512.vbroadcast.s"))) {
2852 // Replace broadcasts with a series of insertelements.
2853 auto *VecTy
= cast
<FixedVectorType
>(CI
->getType());
2854 Type
*EltTy
= VecTy
->getElementType();
2855 unsigned EltNum
= VecTy
->getNumElements();
2856 Value
*Load
= Builder
.CreateLoad(EltTy
, CI
->getArgOperand(0));
2857 Type
*I32Ty
= Type::getInt32Ty(C
);
2858 Rep
= PoisonValue::get(VecTy
);
2859 for (unsigned I
= 0; I
< EltNum
; ++I
)
2860 Rep
= Builder
.CreateInsertElement(Rep
, Load
,
2861 ConstantInt::get(I32Ty
, I
));
2862 } else if (IsX86
&& (Name
.startswith("sse41.pmovsx") ||
2863 Name
.startswith("sse41.pmovzx") ||
2864 Name
.startswith("avx2.pmovsx") ||
2865 Name
.startswith("avx2.pmovzx") ||
2866 Name
.startswith("avx512.mask.pmovsx") ||
2867 Name
.startswith("avx512.mask.pmovzx"))) {
2868 auto *DstTy
= cast
<FixedVectorType
>(CI
->getType());
2869 unsigned NumDstElts
= DstTy
->getNumElements();
2871 // Extract a subvector of the first NumDstElts lanes and sign/zero extend.
2872 SmallVector
<int, 8> ShuffleMask(NumDstElts
);
2873 for (unsigned i
= 0; i
!= NumDstElts
; ++i
)
2877 Builder
.CreateShuffleVector(CI
->getArgOperand(0), ShuffleMask
);
2879 bool DoSext
= (StringRef::npos
!= Name
.find("pmovsx"));
2880 Rep
= DoSext
? Builder
.CreateSExt(SV
, DstTy
)
2881 : Builder
.CreateZExt(SV
, DstTy
);
2882 // If there are 3 arguments, it's a masked intrinsic so we need a select.
2883 if (CI
->arg_size() == 3)
2884 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(2), Rep
,
2885 CI
->getArgOperand(1));
2886 } else if (Name
== "avx512.mask.pmov.qd.256" ||
2887 Name
== "avx512.mask.pmov.qd.512" ||
2888 Name
== "avx512.mask.pmov.wb.256" ||
2889 Name
== "avx512.mask.pmov.wb.512") {
2890 Type
*Ty
= CI
->getArgOperand(1)->getType();
2891 Rep
= Builder
.CreateTrunc(CI
->getArgOperand(0), Ty
);
2892 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(2), Rep
,
2893 CI
->getArgOperand(1));
2894 } else if (IsX86
&& (Name
.startswith("avx.vbroadcastf128") ||
2895 Name
== "avx2.vbroadcasti128")) {
2896 // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.
2897 Type
*EltTy
= cast
<VectorType
>(CI
->getType())->getElementType();
2898 unsigned NumSrcElts
= 128 / EltTy
->getPrimitiveSizeInBits();
2899 auto *VT
= FixedVectorType::get(EltTy
, NumSrcElts
);
2900 Value
*Op
= Builder
.CreatePointerCast(CI
->getArgOperand(0),
2901 PointerType::getUnqual(VT
));
2902 Value
*Load
= Builder
.CreateAlignedLoad(VT
, Op
, Align(1));
2903 if (NumSrcElts
== 2)
2904 Rep
= Builder
.CreateShuffleVector(Load
, ArrayRef
<int>{0, 1, 0, 1});
2906 Rep
= Builder
.CreateShuffleVector(
2907 Load
, ArrayRef
<int>{0, 1, 2, 3, 0, 1, 2, 3});
2908 } else if (IsX86
&& (Name
.startswith("avx512.mask.shuf.i") ||
2909 Name
.startswith("avx512.mask.shuf.f"))) {
2910 unsigned Imm
= cast
<ConstantInt
>(CI
->getArgOperand(2))->getZExtValue();
2911 Type
*VT
= CI
->getType();
2912 unsigned NumLanes
= VT
->getPrimitiveSizeInBits() / 128;
2913 unsigned NumElementsInLane
= 128 / VT
->getScalarSizeInBits();
2914 unsigned ControlBitsMask
= NumLanes
- 1;
2915 unsigned NumControlBits
= NumLanes
/ 2;
2916 SmallVector
<int, 8> ShuffleMask(0);
2918 for (unsigned l
= 0; l
!= NumLanes
; ++l
) {
2919 unsigned LaneMask
= (Imm
>> (l
* NumControlBits
)) & ControlBitsMask
;
2920 // We actually need the other source.
2921 if (l
>= NumLanes
/ 2)
2922 LaneMask
+= NumLanes
;
2923 for (unsigned i
= 0; i
!= NumElementsInLane
; ++i
)
2924 ShuffleMask
.push_back(LaneMask
* NumElementsInLane
+ i
);
2926 Rep
= Builder
.CreateShuffleVector(CI
->getArgOperand(0),
2927 CI
->getArgOperand(1), ShuffleMask
);
2928 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(4), Rep
,
2929 CI
->getArgOperand(3));
2930 }else if (IsX86
&& (Name
.startswith("avx512.mask.broadcastf") ||
2931 Name
.startswith("avx512.mask.broadcasti"))) {
2932 unsigned NumSrcElts
=
2933 cast
<FixedVectorType
>(CI
->getArgOperand(0)->getType())
2935 unsigned NumDstElts
=
2936 cast
<FixedVectorType
>(CI
->getType())->getNumElements();
2938 SmallVector
<int, 8> ShuffleMask(NumDstElts
);
2939 for (unsigned i
= 0; i
!= NumDstElts
; ++i
)
2940 ShuffleMask
[i
] = i
% NumSrcElts
;
2942 Rep
= Builder
.CreateShuffleVector(CI
->getArgOperand(0),
2943 CI
->getArgOperand(0),
2945 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(2), Rep
,
2946 CI
->getArgOperand(1));
2947 } else if (IsX86
&& (Name
.startswith("avx2.pbroadcast") ||
2948 Name
.startswith("avx2.vbroadcast") ||
2949 Name
.startswith("avx512.pbroadcast") ||
2950 Name
.startswith("avx512.mask.broadcast.s"))) {
2951 // Replace vp?broadcasts with a vector shuffle.
2952 Value
*Op
= CI
->getArgOperand(0);
2953 ElementCount EC
= cast
<VectorType
>(CI
->getType())->getElementCount();
2954 Type
*MaskTy
= VectorType::get(Type::getInt32Ty(C
), EC
);
2955 SmallVector
<int, 8> M
;
2956 ShuffleVectorInst::getShuffleMask(Constant::getNullValue(MaskTy
), M
);
2957 Rep
= Builder
.CreateShuffleVector(Op
, M
);
2959 if (CI
->arg_size() == 3)
2960 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(2), Rep
,
2961 CI
->getArgOperand(1));
2962 } else if (IsX86
&& (Name
.startswith("sse2.padds.") ||
2963 Name
.startswith("avx2.padds.") ||
2964 Name
.startswith("avx512.padds.") ||
2965 Name
.startswith("avx512.mask.padds."))) {
2966 Rep
= UpgradeX86BinaryIntrinsics(Builder
, *CI
, Intrinsic::sadd_sat
);
2967 } else if (IsX86
&& (Name
.startswith("sse2.psubs.") ||
2968 Name
.startswith("avx2.psubs.") ||
2969 Name
.startswith("avx512.psubs.") ||
2970 Name
.startswith("avx512.mask.psubs."))) {
2971 Rep
= UpgradeX86BinaryIntrinsics(Builder
, *CI
, Intrinsic::ssub_sat
);
2972 } else if (IsX86
&& (Name
.startswith("sse2.paddus.") ||
2973 Name
.startswith("avx2.paddus.") ||
2974 Name
.startswith("avx512.mask.paddus."))) {
2975 Rep
= UpgradeX86BinaryIntrinsics(Builder
, *CI
, Intrinsic::uadd_sat
);
2976 } else if (IsX86
&& (Name
.startswith("sse2.psubus.") ||
2977 Name
.startswith("avx2.psubus.") ||
2978 Name
.startswith("avx512.mask.psubus."))) {
2979 Rep
= UpgradeX86BinaryIntrinsics(Builder
, *CI
, Intrinsic::usub_sat
);
2980 } else if (IsX86
&& Name
.startswith("avx512.mask.palignr.")) {
2981 Rep
= UpgradeX86ALIGNIntrinsics(Builder
, CI
->getArgOperand(0),
2982 CI
->getArgOperand(1),
2983 CI
->getArgOperand(2),
2984 CI
->getArgOperand(3),
2985 CI
->getArgOperand(4),
2987 } else if (IsX86
&& Name
.startswith("avx512.mask.valign.")) {
2988 Rep
= UpgradeX86ALIGNIntrinsics(Builder
, CI
->getArgOperand(0),
2989 CI
->getArgOperand(1),
2990 CI
->getArgOperand(2),
2991 CI
->getArgOperand(3),
2992 CI
->getArgOperand(4),
2994 } else if (IsX86
&& (Name
== "sse2.psll.dq" ||
2995 Name
== "avx2.psll.dq")) {
2996 // 128/256-bit shift left specified in bits.
2997 unsigned Shift
= cast
<ConstantInt
>(CI
->getArgOperand(1))->getZExtValue();
2998 Rep
= UpgradeX86PSLLDQIntrinsics(Builder
, CI
->getArgOperand(0),
2999 Shift
/ 8); // Shift is in bits.
3000 } else if (IsX86
&& (Name
== "sse2.psrl.dq" ||
3001 Name
== "avx2.psrl.dq")) {
3002 // 128/256-bit shift right specified in bits.
3003 unsigned Shift
= cast
<ConstantInt
>(CI
->getArgOperand(1))->getZExtValue();
3004 Rep
= UpgradeX86PSRLDQIntrinsics(Builder
, CI
->getArgOperand(0),
3005 Shift
/ 8); // Shift is in bits.
3006 } else if (IsX86
&& (Name
== "sse2.psll.dq.bs" ||
3007 Name
== "avx2.psll.dq.bs" ||
3008 Name
== "avx512.psll.dq.512")) {
3009 // 128/256/512-bit shift left specified in bytes.
3010 unsigned Shift
= cast
<ConstantInt
>(CI
->getArgOperand(1))->getZExtValue();
3011 Rep
= UpgradeX86PSLLDQIntrinsics(Builder
, CI
->getArgOperand(0), Shift
);
3012 } else if (IsX86
&& (Name
== "sse2.psrl.dq.bs" ||
3013 Name
== "avx2.psrl.dq.bs" ||
3014 Name
== "avx512.psrl.dq.512")) {
3015 // 128/256/512-bit shift right specified in bytes.
3016 unsigned Shift
= cast
<ConstantInt
>(CI
->getArgOperand(1))->getZExtValue();
3017 Rep
= UpgradeX86PSRLDQIntrinsics(Builder
, CI
->getArgOperand(0), Shift
);
3018 } else if (IsX86
&& (Name
== "sse41.pblendw" ||
3019 Name
.startswith("sse41.blendp") ||
3020 Name
.startswith("avx.blend.p") ||
3021 Name
== "avx2.pblendw" ||
3022 Name
.startswith("avx2.pblendd."))) {
3023 Value
*Op0
= CI
->getArgOperand(0);
3024 Value
*Op1
= CI
->getArgOperand(1);
3025 unsigned Imm
= cast
<ConstantInt
>(CI
->getArgOperand(2))->getZExtValue();
3026 auto *VecTy
= cast
<FixedVectorType
>(CI
->getType());
3027 unsigned NumElts
= VecTy
->getNumElements();
3029 SmallVector
<int, 16> Idxs(NumElts
);
3030 for (unsigned i
= 0; i
!= NumElts
; ++i
)
3031 Idxs
[i
] = ((Imm
>> (i
%8)) & 1) ? i
+ NumElts
: i
;
3033 Rep
= Builder
.CreateShuffleVector(Op0
, Op1
, Idxs
);
3034 } else if (IsX86
&& (Name
.startswith("avx.vinsertf128.") ||
3035 Name
== "avx2.vinserti128" ||
3036 Name
.startswith("avx512.mask.insert"))) {
3037 Value
*Op0
= CI
->getArgOperand(0);
3038 Value
*Op1
= CI
->getArgOperand(1);
3039 unsigned Imm
= cast
<ConstantInt
>(CI
->getArgOperand(2))->getZExtValue();
3040 unsigned DstNumElts
=
3041 cast
<FixedVectorType
>(CI
->getType())->getNumElements();
3042 unsigned SrcNumElts
=
3043 cast
<FixedVectorType
>(Op1
->getType())->getNumElements();
3044 unsigned Scale
= DstNumElts
/ SrcNumElts
;
3046 // Mask off the high bits of the immediate value; hardware ignores those.
3049 // Extend the second operand into a vector the size of the destination.
3050 SmallVector
<int, 8> Idxs(DstNumElts
);
3051 for (unsigned i
= 0; i
!= SrcNumElts
; ++i
)
3053 for (unsigned i
= SrcNumElts
; i
!= DstNumElts
; ++i
)
3054 Idxs
[i
] = SrcNumElts
;
3055 Rep
= Builder
.CreateShuffleVector(Op1
, Idxs
);
3057 // Insert the second operand into the first operand.
3059 // Note that there is no guarantee that instruction lowering will actually
3060 // produce a vinsertf128 instruction for the created shuffles. In
3061 // particular, the 0 immediate case involves no lane changes, so it can
3062 // be handled as a blend.
3064 // Example of shuffle mask for 32-bit elements:
3065 // Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
3066 // Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 >
3068 // First fill with identify mask.
3069 for (unsigned i
= 0; i
!= DstNumElts
; ++i
)
3071 // Then replace the elements where we need to insert.
3072 for (unsigned i
= 0; i
!= SrcNumElts
; ++i
)
3073 Idxs
[i
+ Imm
* SrcNumElts
] = i
+ DstNumElts
;
3074 Rep
= Builder
.CreateShuffleVector(Op0
, Rep
, Idxs
);
3076 // If the intrinsic has a mask operand, handle that.
3077 if (CI
->arg_size() == 5)
3078 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(4), Rep
,
3079 CI
->getArgOperand(3));
3080 } else if (IsX86
&& (Name
.startswith("avx.vextractf128.") ||
3081 Name
== "avx2.vextracti128" ||
3082 Name
.startswith("avx512.mask.vextract"))) {
3083 Value
*Op0
= CI
->getArgOperand(0);
3084 unsigned Imm
= cast
<ConstantInt
>(CI
->getArgOperand(1))->getZExtValue();
3085 unsigned DstNumElts
=
3086 cast
<FixedVectorType
>(CI
->getType())->getNumElements();
3087 unsigned SrcNumElts
=
3088 cast
<FixedVectorType
>(Op0
->getType())->getNumElements();
3089 unsigned Scale
= SrcNumElts
/ DstNumElts
;
3091 // Mask off the high bits of the immediate value; hardware ignores those.
3094 // Get indexes for the subvector of the input vector.
3095 SmallVector
<int, 8> Idxs(DstNumElts
);
3096 for (unsigned i
= 0; i
!= DstNumElts
; ++i
) {
3097 Idxs
[i
] = i
+ (Imm
* DstNumElts
);
3099 Rep
= Builder
.CreateShuffleVector(Op0
, Op0
, Idxs
);
3101 // If the intrinsic has a mask operand, handle that.
3102 if (CI
->arg_size() == 4)
3103 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
3104 CI
->getArgOperand(2));
3105 } else if (!IsX86
&& Name
== "stackprotectorcheck") {
3107 } else if (IsX86
&& (Name
.startswith("avx512.mask.perm.df.") ||
3108 Name
.startswith("avx512.mask.perm.di."))) {
3109 Value
*Op0
= CI
->getArgOperand(0);
3110 unsigned Imm
= cast
<ConstantInt
>(CI
->getArgOperand(1))->getZExtValue();
3111 auto *VecTy
= cast
<FixedVectorType
>(CI
->getType());
3112 unsigned NumElts
= VecTy
->getNumElements();
3114 SmallVector
<int, 8> Idxs(NumElts
);
3115 for (unsigned i
= 0; i
!= NumElts
; ++i
)
3116 Idxs
[i
] = (i
& ~0x3) + ((Imm
>> (2 * (i
& 0x3))) & 3);
3118 Rep
= Builder
.CreateShuffleVector(Op0
, Op0
, Idxs
);
3120 if (CI
->arg_size() == 4)
3121 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
3122 CI
->getArgOperand(2));
3123 } else if (IsX86
&& (Name
.startswith("avx.vperm2f128.") ||
3124 Name
== "avx2.vperm2i128")) {
3125 // The immediate permute control byte looks like this:
3126 // [1:0] - select 128 bits from sources for low half of destination
3128 // [3] - zero low half of destination
3129 // [5:4] - select 128 bits from sources for high half of destination
3131 // [7] - zero high half of destination
3133 uint8_t Imm
= cast
<ConstantInt
>(CI
->getArgOperand(2))->getZExtValue();
3135 unsigned NumElts
= cast
<FixedVectorType
>(CI
->getType())->getNumElements();
3136 unsigned HalfSize
= NumElts
/ 2;
3137 SmallVector
<int, 8> ShuffleMask(NumElts
);
3139 // Determine which operand(s) are actually in use for this instruction.
3140 Value
*V0
= (Imm
& 0x02) ? CI
->getArgOperand(1) : CI
->getArgOperand(0);
3141 Value
*V1
= (Imm
& 0x20) ? CI
->getArgOperand(1) : CI
->getArgOperand(0);
3143 // If needed, replace operands based on zero mask.
3144 V0
= (Imm
& 0x08) ? ConstantAggregateZero::get(CI
->getType()) : V0
;
3145 V1
= (Imm
& 0x80) ? ConstantAggregateZero::get(CI
->getType()) : V1
;
3147 // Permute low half of result.
3148 unsigned StartIndex
= (Imm
& 0x01) ? HalfSize
: 0;
3149 for (unsigned i
= 0; i
< HalfSize
; ++i
)
3150 ShuffleMask
[i
] = StartIndex
+ i
;
3152 // Permute high half of result.
3153 StartIndex
= (Imm
& 0x10) ? HalfSize
: 0;
3154 for (unsigned i
= 0; i
< HalfSize
; ++i
)
3155 ShuffleMask
[i
+ HalfSize
] = NumElts
+ StartIndex
+ i
;
3157 Rep
= Builder
.CreateShuffleVector(V0
, V1
, ShuffleMask
);
3159 } else if (IsX86
&& (Name
.startswith("avx.vpermil.") ||
3160 Name
== "sse2.pshuf.d" ||
3161 Name
.startswith("avx512.mask.vpermil.p") ||
3162 Name
.startswith("avx512.mask.pshuf.d."))) {
3163 Value
*Op0
= CI
->getArgOperand(0);
3164 unsigned Imm
= cast
<ConstantInt
>(CI
->getArgOperand(1))->getZExtValue();
3165 auto *VecTy
= cast
<FixedVectorType
>(CI
->getType());
3166 unsigned NumElts
= VecTy
->getNumElements();
3167 // Calculate the size of each index in the immediate.
3168 unsigned IdxSize
= 64 / VecTy
->getScalarSizeInBits();
3169 unsigned IdxMask
= ((1 << IdxSize
) - 1);
3171 SmallVector
<int, 8> Idxs(NumElts
);
3172 // Lookup the bits for this element, wrapping around the immediate every
3173 // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need
3174 // to offset by the first index of each group.
3175 for (unsigned i
= 0; i
!= NumElts
; ++i
)
3176 Idxs
[i
] = ((Imm
>> ((i
* IdxSize
) % 8)) & IdxMask
) | (i
& ~IdxMask
);
3178 Rep
= Builder
.CreateShuffleVector(Op0
, Op0
, Idxs
);
3180 if (CI
->arg_size() == 4)
3181 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
3182 CI
->getArgOperand(2));
3183 } else if (IsX86
&& (Name
== "sse2.pshufl.w" ||
3184 Name
.startswith("avx512.mask.pshufl.w."))) {
3185 Value
*Op0
= CI
->getArgOperand(0);
3186 unsigned Imm
= cast
<ConstantInt
>(CI
->getArgOperand(1))->getZExtValue();
3187 unsigned NumElts
= cast
<FixedVectorType
>(CI
->getType())->getNumElements();
3189 SmallVector
<int, 16> Idxs(NumElts
);
3190 for (unsigned l
= 0; l
!= NumElts
; l
+= 8) {
3191 for (unsigned i
= 0; i
!= 4; ++i
)
3192 Idxs
[i
+ l
] = ((Imm
>> (2 * i
)) & 0x3) + l
;
3193 for (unsigned i
= 4; i
!= 8; ++i
)
3194 Idxs
[i
+ l
] = i
+ l
;
3197 Rep
= Builder
.CreateShuffleVector(Op0
, Op0
, Idxs
);
3199 if (CI
->arg_size() == 4)
3200 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
3201 CI
->getArgOperand(2));
3202 } else if (IsX86
&& (Name
== "sse2.pshufh.w" ||
3203 Name
.startswith("avx512.mask.pshufh.w."))) {
3204 Value
*Op0
= CI
->getArgOperand(0);
3205 unsigned Imm
= cast
<ConstantInt
>(CI
->getArgOperand(1))->getZExtValue();
3206 unsigned NumElts
= cast
<FixedVectorType
>(CI
->getType())->getNumElements();
3208 SmallVector
<int, 16> Idxs(NumElts
);
3209 for (unsigned l
= 0; l
!= NumElts
; l
+= 8) {
3210 for (unsigned i
= 0; i
!= 4; ++i
)
3211 Idxs
[i
+ l
] = i
+ l
;
3212 for (unsigned i
= 0; i
!= 4; ++i
)
3213 Idxs
[i
+ l
+ 4] = ((Imm
>> (2 * i
)) & 0x3) + 4 + l
;
3216 Rep
= Builder
.CreateShuffleVector(Op0
, Op0
, Idxs
);
3218 if (CI
->arg_size() == 4)
3219 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
3220 CI
->getArgOperand(2));
3221 } else if (IsX86
&& Name
.startswith("avx512.mask.shuf.p")) {
3222 Value
*Op0
= CI
->getArgOperand(0);
3223 Value
*Op1
= CI
->getArgOperand(1);
3224 unsigned Imm
= cast
<ConstantInt
>(CI
->getArgOperand(2))->getZExtValue();
3225 unsigned NumElts
= cast
<FixedVectorType
>(CI
->getType())->getNumElements();
3227 unsigned NumLaneElts
= 128/CI
->getType()->getScalarSizeInBits();
3228 unsigned HalfLaneElts
= NumLaneElts
/ 2;
3230 SmallVector
<int, 16> Idxs(NumElts
);
3231 for (unsigned i
= 0; i
!= NumElts
; ++i
) {
3232 // Base index is the starting element of the lane.
3233 Idxs
[i
] = i
- (i
% NumLaneElts
);
3234 // If we are half way through the lane switch to the other source.
3235 if ((i
% NumLaneElts
) >= HalfLaneElts
)
3237 // Now select the specific element. By adding HalfLaneElts bits from
3238 // the immediate. Wrapping around the immediate every 8-bits.
3239 Idxs
[i
] += (Imm
>> ((i
* HalfLaneElts
) % 8)) & ((1 << HalfLaneElts
) - 1);
3242 Rep
= Builder
.CreateShuffleVector(Op0
, Op1
, Idxs
);
3244 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(4), Rep
,
3245 CI
->getArgOperand(3));
3246 } else if (IsX86
&& (Name
.startswith("avx512.mask.movddup") ||
3247 Name
.startswith("avx512.mask.movshdup") ||
3248 Name
.startswith("avx512.mask.movsldup"))) {
3249 Value
*Op0
= CI
->getArgOperand(0);
3250 unsigned NumElts
= cast
<FixedVectorType
>(CI
->getType())->getNumElements();
3251 unsigned NumLaneElts
= 128/CI
->getType()->getScalarSizeInBits();
3253 unsigned Offset
= 0;
3254 if (Name
.startswith("avx512.mask.movshdup."))
3257 SmallVector
<int, 16> Idxs(NumElts
);
3258 for (unsigned l
= 0; l
!= NumElts
; l
+= NumLaneElts
)
3259 for (unsigned i
= 0; i
!= NumLaneElts
; i
+= 2) {
3260 Idxs
[i
+ l
+ 0] = i
+ l
+ Offset
;
3261 Idxs
[i
+ l
+ 1] = i
+ l
+ Offset
;
3264 Rep
= Builder
.CreateShuffleVector(Op0
, Op0
, Idxs
);
3266 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(2), Rep
,
3267 CI
->getArgOperand(1));
3268 } else if (IsX86
&& (Name
.startswith("avx512.mask.punpckl") ||
3269 Name
.startswith("avx512.mask.unpckl."))) {
3270 Value
*Op0
= CI
->getArgOperand(0);
3271 Value
*Op1
= CI
->getArgOperand(1);
3272 int NumElts
= cast
<FixedVectorType
>(CI
->getType())->getNumElements();
3273 int NumLaneElts
= 128/CI
->getType()->getScalarSizeInBits();
3275 SmallVector
<int, 64> Idxs(NumElts
);
3276 for (int l
= 0; l
!= NumElts
; l
+= NumLaneElts
)
3277 for (int i
= 0; i
!= NumLaneElts
; ++i
)
3278 Idxs
[i
+ l
] = l
+ (i
/ 2) + NumElts
* (i
% 2);
3280 Rep
= Builder
.CreateShuffleVector(Op0
, Op1
, Idxs
);
3282 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
3283 CI
->getArgOperand(2));
3284 } else if (IsX86
&& (Name
.startswith("avx512.mask.punpckh") ||
3285 Name
.startswith("avx512.mask.unpckh."))) {
3286 Value
*Op0
= CI
->getArgOperand(0);
3287 Value
*Op1
= CI
->getArgOperand(1);
3288 int NumElts
= cast
<FixedVectorType
>(CI
->getType())->getNumElements();
3289 int NumLaneElts
= 128/CI
->getType()->getScalarSizeInBits();
3291 SmallVector
<int, 64> Idxs(NumElts
);
3292 for (int l
= 0; l
!= NumElts
; l
+= NumLaneElts
)
3293 for (int i
= 0; i
!= NumLaneElts
; ++i
)
3294 Idxs
[i
+ l
] = (NumLaneElts
/ 2) + l
+ (i
/ 2) + NumElts
* (i
% 2);
3296 Rep
= Builder
.CreateShuffleVector(Op0
, Op1
, Idxs
);
3298 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
3299 CI
->getArgOperand(2));
3300 } else if (IsX86
&& (Name
.startswith("avx512.mask.and.") ||
3301 Name
.startswith("avx512.mask.pand."))) {
3302 VectorType
*FTy
= cast
<VectorType
>(CI
->getType());
3303 VectorType
*ITy
= VectorType::getInteger(FTy
);
3304 Rep
= Builder
.CreateAnd(Builder
.CreateBitCast(CI
->getArgOperand(0), ITy
),
3305 Builder
.CreateBitCast(CI
->getArgOperand(1), ITy
));
3306 Rep
= Builder
.CreateBitCast(Rep
, FTy
);
3307 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
3308 CI
->getArgOperand(2));
3309 } else if (IsX86
&& (Name
.startswith("avx512.mask.andn.") ||
3310 Name
.startswith("avx512.mask.pandn."))) {
3311 VectorType
*FTy
= cast
<VectorType
>(CI
->getType());
3312 VectorType
*ITy
= VectorType::getInteger(FTy
);
3313 Rep
= Builder
.CreateNot(Builder
.CreateBitCast(CI
->getArgOperand(0), ITy
));
3314 Rep
= Builder
.CreateAnd(Rep
,
3315 Builder
.CreateBitCast(CI
->getArgOperand(1), ITy
));
3316 Rep
= Builder
.CreateBitCast(Rep
, FTy
);
3317 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
3318 CI
->getArgOperand(2));
3319 } else if (IsX86
&& (Name
.startswith("avx512.mask.or.") ||
3320 Name
.startswith("avx512.mask.por."))) {
3321 VectorType
*FTy
= cast
<VectorType
>(CI
->getType());
3322 VectorType
*ITy
= VectorType::getInteger(FTy
);
3323 Rep
= Builder
.CreateOr(Builder
.CreateBitCast(CI
->getArgOperand(0), ITy
),
3324 Builder
.CreateBitCast(CI
->getArgOperand(1), ITy
));
3325 Rep
= Builder
.CreateBitCast(Rep
, FTy
);
3326 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
3327 CI
->getArgOperand(2));
3328 } else if (IsX86
&& (Name
.startswith("avx512.mask.xor.") ||
3329 Name
.startswith("avx512.mask.pxor."))) {
3330 VectorType
*FTy
= cast
<VectorType
>(CI
->getType());
3331 VectorType
*ITy
= VectorType::getInteger(FTy
);
3332 Rep
= Builder
.CreateXor(Builder
.CreateBitCast(CI
->getArgOperand(0), ITy
),
3333 Builder
.CreateBitCast(CI
->getArgOperand(1), ITy
));
3334 Rep
= Builder
.CreateBitCast(Rep
, FTy
);
3335 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
3336 CI
->getArgOperand(2));
3337 } else if (IsX86
&& Name
.startswith("avx512.mask.padd.")) {
3338 Rep
= Builder
.CreateAdd(CI
->getArgOperand(0), CI
->getArgOperand(1));
3339 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
3340 CI
->getArgOperand(2));
3341 } else if (IsX86
&& Name
.startswith("avx512.mask.psub.")) {
3342 Rep
= Builder
.CreateSub(CI
->getArgOperand(0), CI
->getArgOperand(1));
3343 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
3344 CI
->getArgOperand(2));
3345 } else if (IsX86
&& Name
.startswith("avx512.mask.pmull.")) {
3346 Rep
= Builder
.CreateMul(CI
->getArgOperand(0), CI
->getArgOperand(1));
3347 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
3348 CI
->getArgOperand(2));
3349 } else if (IsX86
&& Name
.startswith("avx512.mask.add.p")) {
3350 if (Name
.endswith(".512")) {
3352 if (Name
[17] == 's')
3353 IID
= Intrinsic::x86_avx512_add_ps_512
;
3355 IID
= Intrinsic::x86_avx512_add_pd_512
;
3357 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(F
->getParent(), IID
),
3358 { CI
->getArgOperand(0), CI
->getArgOperand(1),
3359 CI
->getArgOperand(4) });
3361 Rep
= Builder
.CreateFAdd(CI
->getArgOperand(0), CI
->getArgOperand(1));
3363 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
3364 CI
->getArgOperand(2));
3365 } else if (IsX86
&& Name
.startswith("avx512.mask.div.p")) {
3366 if (Name
.endswith(".512")) {
3368 if (Name
[17] == 's')
3369 IID
= Intrinsic::x86_avx512_div_ps_512
;
3371 IID
= Intrinsic::x86_avx512_div_pd_512
;
3373 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(F
->getParent(), IID
),
3374 { CI
->getArgOperand(0), CI
->getArgOperand(1),
3375 CI
->getArgOperand(4) });
3377 Rep
= Builder
.CreateFDiv(CI
->getArgOperand(0), CI
->getArgOperand(1));
3379 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
3380 CI
->getArgOperand(2));
3381 } else if (IsX86
&& Name
.startswith("avx512.mask.mul.p")) {
3382 if (Name
.endswith(".512")) {
3384 if (Name
[17] == 's')
3385 IID
= Intrinsic::x86_avx512_mul_ps_512
;
3387 IID
= Intrinsic::x86_avx512_mul_pd_512
;
3389 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(F
->getParent(), IID
),
3390 { CI
->getArgOperand(0), CI
->getArgOperand(1),
3391 CI
->getArgOperand(4) });
3393 Rep
= Builder
.CreateFMul(CI
->getArgOperand(0), CI
->getArgOperand(1));
3395 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
3396 CI
->getArgOperand(2));
3397 } else if (IsX86
&& Name
.startswith("avx512.mask.sub.p")) {
3398 if (Name
.endswith(".512")) {
3400 if (Name
[17] == 's')
3401 IID
= Intrinsic::x86_avx512_sub_ps_512
;
3403 IID
= Intrinsic::x86_avx512_sub_pd_512
;
3405 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(F
->getParent(), IID
),
3406 { CI
->getArgOperand(0), CI
->getArgOperand(1),
3407 CI
->getArgOperand(4) });
3409 Rep
= Builder
.CreateFSub(CI
->getArgOperand(0), CI
->getArgOperand(1));
3411 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
3412 CI
->getArgOperand(2));
3413 } else if (IsX86
&& (Name
.startswith("avx512.mask.max.p") ||
3414 Name
.startswith("avx512.mask.min.p")) &&
3415 Name
.drop_front(18) == ".512") {
3416 bool IsDouble
= Name
[17] == 'd';
3417 bool IsMin
= Name
[13] == 'i';
3418 static const Intrinsic::ID MinMaxTbl
[2][2] = {
3419 { Intrinsic::x86_avx512_max_ps_512
, Intrinsic::x86_avx512_max_pd_512
},
3420 { Intrinsic::x86_avx512_min_ps_512
, Intrinsic::x86_avx512_min_pd_512
}
3422 Intrinsic::ID IID
= MinMaxTbl
[IsMin
][IsDouble
];
3424 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(F
->getParent(), IID
),
3425 { CI
->getArgOperand(0), CI
->getArgOperand(1),
3426 CI
->getArgOperand(4) });
3427 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
3428 CI
->getArgOperand(2));
3429 } else if (IsX86
&& Name
.startswith("avx512.mask.lzcnt.")) {
3430 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(F
->getParent(),
3433 { CI
->getArgOperand(0), Builder
.getInt1(false) });
3434 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(2), Rep
,
3435 CI
->getArgOperand(1));
3436 } else if (IsX86
&& Name
.startswith("avx512.mask.psll")) {
3437 bool IsImmediate
= Name
[16] == 'i' ||
3438 (Name
.size() > 18 && Name
[18] == 'i');
3439 bool IsVariable
= Name
[16] == 'v';
3440 char Size
= Name
[16] == '.' ? Name
[17] :
3441 Name
[17] == '.' ? Name
[18] :
3442 Name
[18] == '.' ? Name
[19] :
3446 if (IsVariable
&& Name
[17] != '.') {
3447 if (Size
== 'd' && Name
[17] == '2') // avx512.mask.psllv2.di
3448 IID
= Intrinsic::x86_avx2_psllv_q
;
3449 else if (Size
== 'd' && Name
[17] == '4') // avx512.mask.psllv4.di
3450 IID
= Intrinsic::x86_avx2_psllv_q_256
;
3451 else if (Size
== 's' && Name
[17] == '4') // avx512.mask.psllv4.si
3452 IID
= Intrinsic::x86_avx2_psllv_d
;
3453 else if (Size
== 's' && Name
[17] == '8') // avx512.mask.psllv8.si
3454 IID
= Intrinsic::x86_avx2_psllv_d_256
;
3455 else if (Size
== 'h' && Name
[17] == '8') // avx512.mask.psllv8.hi
3456 IID
= Intrinsic::x86_avx512_psllv_w_128
;
3457 else if (Size
== 'h' && Name
[17] == '1') // avx512.mask.psllv16.hi
3458 IID
= Intrinsic::x86_avx512_psllv_w_256
;
3459 else if (Name
[17] == '3' && Name
[18] == '2') // avx512.mask.psllv32hi
3460 IID
= Intrinsic::x86_avx512_psllv_w_512
;
3462 llvm_unreachable("Unexpected size");
3463 } else if (Name
.endswith(".128")) {
3464 if (Size
== 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128
3465 IID
= IsImmediate
? Intrinsic::x86_sse2_pslli_d
3466 : Intrinsic::x86_sse2_psll_d
;
3467 else if (Size
== 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.128
3468 IID
= IsImmediate
? Intrinsic::x86_sse2_pslli_q
3469 : Intrinsic::x86_sse2_psll_q
;
3470 else if (Size
== 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.128
3471 IID
= IsImmediate
? Intrinsic::x86_sse2_pslli_w
3472 : Intrinsic::x86_sse2_psll_w
;
3474 llvm_unreachable("Unexpected size");
3475 } else if (Name
.endswith(".256")) {
3476 if (Size
== 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256
3477 IID
= IsImmediate
? Intrinsic::x86_avx2_pslli_d
3478 : Intrinsic::x86_avx2_psll_d
;
3479 else if (Size
== 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.256
3480 IID
= IsImmediate
? Intrinsic::x86_avx2_pslli_q
3481 : Intrinsic::x86_avx2_psll_q
;
3482 else if (Size
== 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.256
3483 IID
= IsImmediate
? Intrinsic::x86_avx2_pslli_w
3484 : Intrinsic::x86_avx2_psll_w
;
3486 llvm_unreachable("Unexpected size");
3488 if (Size
== 'd') // psll.di.512, pslli.d, psll.d, psllv.d.512
3489 IID
= IsImmediate
? Intrinsic::x86_avx512_pslli_d_512
:
3490 IsVariable
? Intrinsic::x86_avx512_psllv_d_512
:
3491 Intrinsic::x86_avx512_psll_d_512
;
3492 else if (Size
== 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.512
3493 IID
= IsImmediate
? Intrinsic::x86_avx512_pslli_q_512
:
3494 IsVariable
? Intrinsic::x86_avx512_psllv_q_512
:
3495 Intrinsic::x86_avx512_psll_q_512
;
3496 else if (Size
== 'w') // psll.wi.512, pslli.w, psll.w
3497 IID
= IsImmediate
? Intrinsic::x86_avx512_pslli_w_512
3498 : Intrinsic::x86_avx512_psll_w_512
;
3500 llvm_unreachable("Unexpected size");
3503 Rep
= UpgradeX86MaskedShift(Builder
, *CI
, IID
);
3504 } else if (IsX86
&& Name
.startswith("avx512.mask.psrl")) {
3505 bool IsImmediate
= Name
[16] == 'i' ||
3506 (Name
.size() > 18 && Name
[18] == 'i');
3507 bool IsVariable
= Name
[16] == 'v';
3508 char Size
= Name
[16] == '.' ? Name
[17] :
3509 Name
[17] == '.' ? Name
[18] :
3510 Name
[18] == '.' ? Name
[19] :
3514 if (IsVariable
&& Name
[17] != '.') {
3515 if (Size
== 'd' && Name
[17] == '2') // avx512.mask.psrlv2.di
3516 IID
= Intrinsic::x86_avx2_psrlv_q
;
3517 else if (Size
== 'd' && Name
[17] == '4') // avx512.mask.psrlv4.di
3518 IID
= Intrinsic::x86_avx2_psrlv_q_256
;
3519 else if (Size
== 's' && Name
[17] == '4') // avx512.mask.psrlv4.si
3520 IID
= Intrinsic::x86_avx2_psrlv_d
;
3521 else if (Size
== 's' && Name
[17] == '8') // avx512.mask.psrlv8.si
3522 IID
= Intrinsic::x86_avx2_psrlv_d_256
;
3523 else if (Size
== 'h' && Name
[17] == '8') // avx512.mask.psrlv8.hi
3524 IID
= Intrinsic::x86_avx512_psrlv_w_128
;
3525 else if (Size
== 'h' && Name
[17] == '1') // avx512.mask.psrlv16.hi
3526 IID
= Intrinsic::x86_avx512_psrlv_w_256
;
3527 else if (Name
[17] == '3' && Name
[18] == '2') // avx512.mask.psrlv32hi
3528 IID
= Intrinsic::x86_avx512_psrlv_w_512
;
3530 llvm_unreachable("Unexpected size");
3531 } else if (Name
.endswith(".128")) {
3532 if (Size
== 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128
3533 IID
= IsImmediate
? Intrinsic::x86_sse2_psrli_d
3534 : Intrinsic::x86_sse2_psrl_d
;
3535 else if (Size
== 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.128
3536 IID
= IsImmediate
? Intrinsic::x86_sse2_psrli_q
3537 : Intrinsic::x86_sse2_psrl_q
;
3538 else if (Size
== 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.128
3539 IID
= IsImmediate
? Intrinsic::x86_sse2_psrli_w
3540 : Intrinsic::x86_sse2_psrl_w
;
3542 llvm_unreachable("Unexpected size");
3543 } else if (Name
.endswith(".256")) {
3544 if (Size
== 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256
3545 IID
= IsImmediate
? Intrinsic::x86_avx2_psrli_d
3546 : Intrinsic::x86_avx2_psrl_d
;
3547 else if (Size
== 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.256
3548 IID
= IsImmediate
? Intrinsic::x86_avx2_psrli_q
3549 : Intrinsic::x86_avx2_psrl_q
;
3550 else if (Size
== 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.256
3551 IID
= IsImmediate
? Intrinsic::x86_avx2_psrli_w
3552 : Intrinsic::x86_avx2_psrl_w
;
3554 llvm_unreachable("Unexpected size");
3556 if (Size
== 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.512
3557 IID
= IsImmediate
? Intrinsic::x86_avx512_psrli_d_512
:
3558 IsVariable
? Intrinsic::x86_avx512_psrlv_d_512
:
3559 Intrinsic::x86_avx512_psrl_d_512
;
3560 else if (Size
== 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.512
3561 IID
= IsImmediate
? Intrinsic::x86_avx512_psrli_q_512
:
3562 IsVariable
? Intrinsic::x86_avx512_psrlv_q_512
:
3563 Intrinsic::x86_avx512_psrl_q_512
;
3564 else if (Size
== 'w') // psrl.wi.512, psrli.w, psrl.w)
3565 IID
= IsImmediate
? Intrinsic::x86_avx512_psrli_w_512
3566 : Intrinsic::x86_avx512_psrl_w_512
;
3568 llvm_unreachable("Unexpected size");
3571 Rep
= UpgradeX86MaskedShift(Builder
, *CI
, IID
);
3572 } else if (IsX86
&& Name
.startswith("avx512.mask.psra")) {
3573 bool IsImmediate
= Name
[16] == 'i' ||
3574 (Name
.size() > 18 && Name
[18] == 'i');
3575 bool IsVariable
= Name
[16] == 'v';
3576 char Size
= Name
[16] == '.' ? Name
[17] :
3577 Name
[17] == '.' ? Name
[18] :
3578 Name
[18] == '.' ? Name
[19] :
3582 if (IsVariable
&& Name
[17] != '.') {
3583 if (Size
== 's' && Name
[17] == '4') // avx512.mask.psrav4.si
3584 IID
= Intrinsic::x86_avx2_psrav_d
;
3585 else if (Size
== 's' && Name
[17] == '8') // avx512.mask.psrav8.si
3586 IID
= Intrinsic::x86_avx2_psrav_d_256
;
3587 else if (Size
== 'h' && Name
[17] == '8') // avx512.mask.psrav8.hi
3588 IID
= Intrinsic::x86_avx512_psrav_w_128
;
3589 else if (Size
== 'h' && Name
[17] == '1') // avx512.mask.psrav16.hi
3590 IID
= Intrinsic::x86_avx512_psrav_w_256
;
3591 else if (Name
[17] == '3' && Name
[18] == '2') // avx512.mask.psrav32hi
3592 IID
= Intrinsic::x86_avx512_psrav_w_512
;
3594 llvm_unreachable("Unexpected size");
3595 } else if (Name
.endswith(".128")) {
3596 if (Size
== 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128
3597 IID
= IsImmediate
? Intrinsic::x86_sse2_psrai_d
3598 : Intrinsic::x86_sse2_psra_d
;
3599 else if (Size
== 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.128
3600 IID
= IsImmediate
? Intrinsic::x86_avx512_psrai_q_128
:
3601 IsVariable
? Intrinsic::x86_avx512_psrav_q_128
:
3602 Intrinsic::x86_avx512_psra_q_128
;
3603 else if (Size
== 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.128
3604 IID
= IsImmediate
? Intrinsic::x86_sse2_psrai_w
3605 : Intrinsic::x86_sse2_psra_w
;
3607 llvm_unreachable("Unexpected size");
3608 } else if (Name
.endswith(".256")) {
3609 if (Size
== 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256
3610 IID
= IsImmediate
? Intrinsic::x86_avx2_psrai_d
3611 : Intrinsic::x86_avx2_psra_d
;
3612 else if (Size
== 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.256
3613 IID
= IsImmediate
? Intrinsic::x86_avx512_psrai_q_256
:
3614 IsVariable
? Intrinsic::x86_avx512_psrav_q_256
:
3615 Intrinsic::x86_avx512_psra_q_256
;
3616 else if (Size
== 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.256
3617 IID
= IsImmediate
? Intrinsic::x86_avx2_psrai_w
3618 : Intrinsic::x86_avx2_psra_w
;
3620 llvm_unreachable("Unexpected size");
3622 if (Size
== 'd') // psra.di.512, psrai.d, psra.d, psrav.d.512
3623 IID
= IsImmediate
? Intrinsic::x86_avx512_psrai_d_512
:
3624 IsVariable
? Intrinsic::x86_avx512_psrav_d_512
:
3625 Intrinsic::x86_avx512_psra_d_512
;
3626 else if (Size
== 'q') // psra.qi.512, psrai.q, psra.q
3627 IID
= IsImmediate
? Intrinsic::x86_avx512_psrai_q_512
:
3628 IsVariable
? Intrinsic::x86_avx512_psrav_q_512
:
3629 Intrinsic::x86_avx512_psra_q_512
;
3630 else if (Size
== 'w') // psra.wi.512, psrai.w, psra.w
3631 IID
= IsImmediate
? Intrinsic::x86_avx512_psrai_w_512
3632 : Intrinsic::x86_avx512_psra_w_512
;
3634 llvm_unreachable("Unexpected size");
3637 Rep
= UpgradeX86MaskedShift(Builder
, *CI
, IID
);
3638 } else if (IsX86
&& Name
.startswith("avx512.mask.move.s")) {
3639 Rep
= upgradeMaskedMove(Builder
, *CI
);
3640 } else if (IsX86
&& Name
.startswith("avx512.cvtmask2")) {
3641 Rep
= UpgradeMaskToInt(Builder
, *CI
);
3642 } else if (IsX86
&& Name
.endswith(".movntdqa")) {
3643 MDNode
*Node
= MDNode::get(
3644 C
, ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C
), 1)));
3646 Value
*Ptr
= CI
->getArgOperand(0);
3648 // Convert the type of the pointer to a pointer to the stored type.
3649 Value
*BC
= Builder
.CreateBitCast(
3650 Ptr
, PointerType::getUnqual(CI
->getType()), "cast");
3651 LoadInst
*LI
= Builder
.CreateAlignedLoad(
3653 Align(CI
->getType()->getPrimitiveSizeInBits().getFixedValue() / 8));
3654 LI
->setMetadata(LLVMContext::MD_nontemporal
, Node
);
3656 } else if (IsX86
&& (Name
.startswith("fma.vfmadd.") ||
3657 Name
.startswith("fma.vfmsub.") ||
3658 Name
.startswith("fma.vfnmadd.") ||
3659 Name
.startswith("fma.vfnmsub."))) {
3660 bool NegMul
= Name
[6] == 'n';
3661 bool NegAcc
= NegMul
? Name
[8] == 's' : Name
[7] == 's';
3662 bool IsScalar
= NegMul
? Name
[12] == 's' : Name
[11] == 's';
3664 Value
*Ops
[] = { CI
->getArgOperand(0), CI
->getArgOperand(1),
3665 CI
->getArgOperand(2) };
3668 Ops
[0] = Builder
.CreateExtractElement(Ops
[0], (uint64_t)0);
3669 Ops
[1] = Builder
.CreateExtractElement(Ops
[1], (uint64_t)0);
3670 Ops
[2] = Builder
.CreateExtractElement(Ops
[2], (uint64_t)0);
3673 if (NegMul
&& !IsScalar
)
3674 Ops
[0] = Builder
.CreateFNeg(Ops
[0]);
3675 if (NegMul
&& IsScalar
)
3676 Ops
[1] = Builder
.CreateFNeg(Ops
[1]);
3678 Ops
[2] = Builder
.CreateFNeg(Ops
[2]);
3680 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(CI
->getModule(),
3686 Rep
= Builder
.CreateInsertElement(CI
->getArgOperand(0), Rep
,
3688 } else if (IsX86
&& Name
.startswith("fma4.vfmadd.s")) {
3689 Value
*Ops
[] = { CI
->getArgOperand(0), CI
->getArgOperand(1),
3690 CI
->getArgOperand(2) };
3692 Ops
[0] = Builder
.CreateExtractElement(Ops
[0], (uint64_t)0);
3693 Ops
[1] = Builder
.CreateExtractElement(Ops
[1], (uint64_t)0);
3694 Ops
[2] = Builder
.CreateExtractElement(Ops
[2], (uint64_t)0);
3696 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(CI
->getModule(),
3701 Rep
= Builder
.CreateInsertElement(Constant::getNullValue(CI
->getType()),
3703 } else if (IsX86
&& (Name
.startswith("avx512.mask.vfmadd.s") ||
3704 Name
.startswith("avx512.maskz.vfmadd.s") ||
3705 Name
.startswith("avx512.mask3.vfmadd.s") ||
3706 Name
.startswith("avx512.mask3.vfmsub.s") ||
3707 Name
.startswith("avx512.mask3.vfnmsub.s"))) {
3708 bool IsMask3
= Name
[11] == '3';
3709 bool IsMaskZ
= Name
[11] == 'z';
3710 // Drop the "avx512.mask." to make it easier.
3711 Name
= Name
.drop_front(IsMask3
|| IsMaskZ
? 13 : 12);
3712 bool NegMul
= Name
[2] == 'n';
3713 bool NegAcc
= NegMul
? Name
[4] == 's' : Name
[3] == 's';
3715 Value
*A
= CI
->getArgOperand(0);
3716 Value
*B
= CI
->getArgOperand(1);
3717 Value
*C
= CI
->getArgOperand(2);
3719 if (NegMul
&& (IsMask3
|| IsMaskZ
))
3720 A
= Builder
.CreateFNeg(A
);
3721 if (NegMul
&& !(IsMask3
|| IsMaskZ
))
3722 B
= Builder
.CreateFNeg(B
);
3724 C
= Builder
.CreateFNeg(C
);
3726 A
= Builder
.CreateExtractElement(A
, (uint64_t)0);
3727 B
= Builder
.CreateExtractElement(B
, (uint64_t)0);
3728 C
= Builder
.CreateExtractElement(C
, (uint64_t)0);
3730 if (!isa
<ConstantInt
>(CI
->getArgOperand(4)) ||
3731 cast
<ConstantInt
>(CI
->getArgOperand(4))->getZExtValue() != 4) {
3732 Value
*Ops
[] = { A
, B
, C
, CI
->getArgOperand(4) };
3735 if (Name
.back() == 'd')
3736 IID
= Intrinsic::x86_avx512_vfmadd_f64
;
3738 IID
= Intrinsic::x86_avx512_vfmadd_f32
;
3739 Function
*FMA
= Intrinsic::getDeclaration(CI
->getModule(), IID
);
3740 Rep
= Builder
.CreateCall(FMA
, Ops
);
3742 Function
*FMA
= Intrinsic::getDeclaration(CI
->getModule(),
3745 Rep
= Builder
.CreateCall(FMA
, { A
, B
, C
});
3748 Value
*PassThru
= IsMaskZ
? Constant::getNullValue(Rep
->getType()) :
3751 // For Mask3 with NegAcc, we need to create a new extractelement that
3752 // avoids the negation above.
3753 if (NegAcc
&& IsMask3
)
3754 PassThru
= Builder
.CreateExtractElement(CI
->getArgOperand(2),
3757 Rep
= EmitX86ScalarSelect(Builder
, CI
->getArgOperand(3),
3759 Rep
= Builder
.CreateInsertElement(CI
->getArgOperand(IsMask3
? 2 : 0),
3761 } else if (IsX86
&& (Name
.startswith("avx512.mask.vfmadd.p") ||
3762 Name
.startswith("avx512.mask.vfnmadd.p") ||
3763 Name
.startswith("avx512.mask.vfnmsub.p") ||
3764 Name
.startswith("avx512.mask3.vfmadd.p") ||
3765 Name
.startswith("avx512.mask3.vfmsub.p") ||
3766 Name
.startswith("avx512.mask3.vfnmsub.p") ||
3767 Name
.startswith("avx512.maskz.vfmadd.p"))) {
3768 bool IsMask3
= Name
[11] == '3';
3769 bool IsMaskZ
= Name
[11] == 'z';
3770 // Drop the "avx512.mask." to make it easier.
3771 Name
= Name
.drop_front(IsMask3
|| IsMaskZ
? 13 : 12);
3772 bool NegMul
= Name
[2] == 'n';
3773 bool NegAcc
= NegMul
? Name
[4] == 's' : Name
[3] == 's';
3775 Value
*A
= CI
->getArgOperand(0);
3776 Value
*B
= CI
->getArgOperand(1);
3777 Value
*C
= CI
->getArgOperand(2);
3779 if (NegMul
&& (IsMask3
|| IsMaskZ
))
3780 A
= Builder
.CreateFNeg(A
);
3781 if (NegMul
&& !(IsMask3
|| IsMaskZ
))
3782 B
= Builder
.CreateFNeg(B
);
3784 C
= Builder
.CreateFNeg(C
);
3786 if (CI
->arg_size() == 5 &&
3787 (!isa
<ConstantInt
>(CI
->getArgOperand(4)) ||
3788 cast
<ConstantInt
>(CI
->getArgOperand(4))->getZExtValue() != 4)) {
3790 // Check the character before ".512" in string.
3791 if (Name
[Name
.size()-5] == 's')
3792 IID
= Intrinsic::x86_avx512_vfmadd_ps_512
;
3794 IID
= Intrinsic::x86_avx512_vfmadd_pd_512
;
3796 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(F
->getParent(), IID
),
3797 { A
, B
, C
, CI
->getArgOperand(4) });
3799 Function
*FMA
= Intrinsic::getDeclaration(CI
->getModule(),
3802 Rep
= Builder
.CreateCall(FMA
, { A
, B
, C
});
3805 Value
*PassThru
= IsMaskZ
? llvm::Constant::getNullValue(CI
->getType()) :
3806 IsMask3
? CI
->getArgOperand(2) :
3807 CI
->getArgOperand(0);
3809 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
, PassThru
);
3810 } else if (IsX86
&& Name
.startswith("fma.vfmsubadd.p")) {
3811 unsigned VecWidth
= CI
->getType()->getPrimitiveSizeInBits();
3812 unsigned EltWidth
= CI
->getType()->getScalarSizeInBits();
3814 if (VecWidth
== 128 && EltWidth
== 32)
3815 IID
= Intrinsic::x86_fma_vfmaddsub_ps
;
3816 else if (VecWidth
== 256 && EltWidth
== 32)
3817 IID
= Intrinsic::x86_fma_vfmaddsub_ps_256
;
3818 else if (VecWidth
== 128 && EltWidth
== 64)
3819 IID
= Intrinsic::x86_fma_vfmaddsub_pd
;
3820 else if (VecWidth
== 256 && EltWidth
== 64)
3821 IID
= Intrinsic::x86_fma_vfmaddsub_pd_256
;
3823 llvm_unreachable("Unexpected intrinsic");
3825 Value
*Ops
[] = { CI
->getArgOperand(0), CI
->getArgOperand(1),
3826 CI
->getArgOperand(2) };
3827 Ops
[2] = Builder
.CreateFNeg(Ops
[2]);
3828 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(F
->getParent(), IID
),
3830 } else if (IsX86
&& (Name
.startswith("avx512.mask.vfmaddsub.p") ||
3831 Name
.startswith("avx512.mask3.vfmaddsub.p") ||
3832 Name
.startswith("avx512.maskz.vfmaddsub.p") ||
3833 Name
.startswith("avx512.mask3.vfmsubadd.p"))) {
3834 bool IsMask3
= Name
[11] == '3';
3835 bool IsMaskZ
= Name
[11] == 'z';
3836 // Drop the "avx512.mask." to make it easier.
3837 Name
= Name
.drop_front(IsMask3
|| IsMaskZ
? 13 : 12);
3838 bool IsSubAdd
= Name
[3] == 's';
3839 if (CI
->arg_size() == 5) {
3841 // Check the character before ".512" in string.
3842 if (Name
[Name
.size()-5] == 's')
3843 IID
= Intrinsic::x86_avx512_vfmaddsub_ps_512
;
3845 IID
= Intrinsic::x86_avx512_vfmaddsub_pd_512
;
3847 Value
*Ops
[] = { CI
->getArgOperand(0), CI
->getArgOperand(1),
3848 CI
->getArgOperand(2), CI
->getArgOperand(4) };
3850 Ops
[2] = Builder
.CreateFNeg(Ops
[2]);
3852 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(F
->getParent(), IID
),
3855 int NumElts
= cast
<FixedVectorType
>(CI
->getType())->getNumElements();
3857 Value
*Ops
[] = { CI
->getArgOperand(0), CI
->getArgOperand(1),
3858 CI
->getArgOperand(2) };
3860 Function
*FMA
= Intrinsic::getDeclaration(CI
->getModule(), Intrinsic::fma
,
3862 Value
*Odd
= Builder
.CreateCall(FMA
, Ops
);
3863 Ops
[2] = Builder
.CreateFNeg(Ops
[2]);
3864 Value
*Even
= Builder
.CreateCall(FMA
, Ops
);
3867 std::swap(Even
, Odd
);
3869 SmallVector
<int, 32> Idxs(NumElts
);
3870 for (int i
= 0; i
!= NumElts
; ++i
)
3871 Idxs
[i
] = i
+ (i
% 2) * NumElts
;
3873 Rep
= Builder
.CreateShuffleVector(Even
, Odd
, Idxs
);
3876 Value
*PassThru
= IsMaskZ
? llvm::Constant::getNullValue(CI
->getType()) :
3877 IsMask3
? CI
->getArgOperand(2) :
3878 CI
->getArgOperand(0);
3880 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
, PassThru
);
3881 } else if (IsX86
&& (Name
.startswith("avx512.mask.pternlog.") ||
3882 Name
.startswith("avx512.maskz.pternlog."))) {
3883 bool ZeroMask
= Name
[11] == 'z';
3884 unsigned VecWidth
= CI
->getType()->getPrimitiveSizeInBits();
3885 unsigned EltWidth
= CI
->getType()->getScalarSizeInBits();
3887 if (VecWidth
== 128 && EltWidth
== 32)
3888 IID
= Intrinsic::x86_avx512_pternlog_d_128
;
3889 else if (VecWidth
== 256 && EltWidth
== 32)
3890 IID
= Intrinsic::x86_avx512_pternlog_d_256
;
3891 else if (VecWidth
== 512 && EltWidth
== 32)
3892 IID
= Intrinsic::x86_avx512_pternlog_d_512
;
3893 else if (VecWidth
== 128 && EltWidth
== 64)
3894 IID
= Intrinsic::x86_avx512_pternlog_q_128
;
3895 else if (VecWidth
== 256 && EltWidth
== 64)
3896 IID
= Intrinsic::x86_avx512_pternlog_q_256
;
3897 else if (VecWidth
== 512 && EltWidth
== 64)
3898 IID
= Intrinsic::x86_avx512_pternlog_q_512
;
3900 llvm_unreachable("Unexpected intrinsic");
3902 Value
*Args
[] = { CI
->getArgOperand(0) , CI
->getArgOperand(1),
3903 CI
->getArgOperand(2), CI
->getArgOperand(3) };
3904 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(CI
->getModule(), IID
),
3906 Value
*PassThru
= ZeroMask
? ConstantAggregateZero::get(CI
->getType())
3907 : CI
->getArgOperand(0);
3908 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(4), Rep
, PassThru
);
3909 } else if (IsX86
&& (Name
.startswith("avx512.mask.vpmadd52") ||
3910 Name
.startswith("avx512.maskz.vpmadd52"))) {
3911 bool ZeroMask
= Name
[11] == 'z';
3912 bool High
= Name
[20] == 'h' || Name
[21] == 'h';
3913 unsigned VecWidth
= CI
->getType()->getPrimitiveSizeInBits();
3915 if (VecWidth
== 128 && !High
)
3916 IID
= Intrinsic::x86_avx512_vpmadd52l_uq_128
;
3917 else if (VecWidth
== 256 && !High
)
3918 IID
= Intrinsic::x86_avx512_vpmadd52l_uq_256
;
3919 else if (VecWidth
== 512 && !High
)
3920 IID
= Intrinsic::x86_avx512_vpmadd52l_uq_512
;
3921 else if (VecWidth
== 128 && High
)
3922 IID
= Intrinsic::x86_avx512_vpmadd52h_uq_128
;
3923 else if (VecWidth
== 256 && High
)
3924 IID
= Intrinsic::x86_avx512_vpmadd52h_uq_256
;
3925 else if (VecWidth
== 512 && High
)
3926 IID
= Intrinsic::x86_avx512_vpmadd52h_uq_512
;
3928 llvm_unreachable("Unexpected intrinsic");
3930 Value
*Args
[] = { CI
->getArgOperand(0) , CI
->getArgOperand(1),
3931 CI
->getArgOperand(2) };
3932 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(CI
->getModule(), IID
),
3934 Value
*PassThru
= ZeroMask
? ConstantAggregateZero::get(CI
->getType())
3935 : CI
->getArgOperand(0);
3936 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
, PassThru
);
3937 } else if (IsX86
&& (Name
.startswith("avx512.mask.vpermi2var.") ||
3938 Name
.startswith("avx512.mask.vpermt2var.") ||
3939 Name
.startswith("avx512.maskz.vpermt2var."))) {
3940 bool ZeroMask
= Name
[11] == 'z';
3941 bool IndexForm
= Name
[17] == 'i';
3942 Rep
= UpgradeX86VPERMT2Intrinsics(Builder
, *CI
, ZeroMask
, IndexForm
);
3943 } else if (IsX86
&& (Name
.startswith("avx512.mask.vpdpbusd.") ||
3944 Name
.startswith("avx512.maskz.vpdpbusd.") ||
3945 Name
.startswith("avx512.mask.vpdpbusds.") ||
3946 Name
.startswith("avx512.maskz.vpdpbusds."))) {
3947 bool ZeroMask
= Name
[11] == 'z';
3948 bool IsSaturating
= Name
[ZeroMask
? 21 : 20] == 's';
3949 unsigned VecWidth
= CI
->getType()->getPrimitiveSizeInBits();
3951 if (VecWidth
== 128 && !IsSaturating
)
3952 IID
= Intrinsic::x86_avx512_vpdpbusd_128
;
3953 else if (VecWidth
== 256 && !IsSaturating
)
3954 IID
= Intrinsic::x86_avx512_vpdpbusd_256
;
3955 else if (VecWidth
== 512 && !IsSaturating
)
3956 IID
= Intrinsic::x86_avx512_vpdpbusd_512
;
3957 else if (VecWidth
== 128 && IsSaturating
)
3958 IID
= Intrinsic::x86_avx512_vpdpbusds_128
;
3959 else if (VecWidth
== 256 && IsSaturating
)
3960 IID
= Intrinsic::x86_avx512_vpdpbusds_256
;
3961 else if (VecWidth
== 512 && IsSaturating
)
3962 IID
= Intrinsic::x86_avx512_vpdpbusds_512
;
3964 llvm_unreachable("Unexpected intrinsic");
3966 Value
*Args
[] = { CI
->getArgOperand(0), CI
->getArgOperand(1),
3967 CI
->getArgOperand(2) };
3968 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(CI
->getModule(), IID
),
3970 Value
*PassThru
= ZeroMask
? ConstantAggregateZero::get(CI
->getType())
3971 : CI
->getArgOperand(0);
3972 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
, PassThru
);
3973 } else if (IsX86
&& (Name
.startswith("avx512.mask.vpdpwssd.") ||
3974 Name
.startswith("avx512.maskz.vpdpwssd.") ||
3975 Name
.startswith("avx512.mask.vpdpwssds.") ||
3976 Name
.startswith("avx512.maskz.vpdpwssds."))) {
3977 bool ZeroMask
= Name
[11] == 'z';
3978 bool IsSaturating
= Name
[ZeroMask
? 21 : 20] == 's';
3979 unsigned VecWidth
= CI
->getType()->getPrimitiveSizeInBits();
3981 if (VecWidth
== 128 && !IsSaturating
)
3982 IID
= Intrinsic::x86_avx512_vpdpwssd_128
;
3983 else if (VecWidth
== 256 && !IsSaturating
)
3984 IID
= Intrinsic::x86_avx512_vpdpwssd_256
;
3985 else if (VecWidth
== 512 && !IsSaturating
)
3986 IID
= Intrinsic::x86_avx512_vpdpwssd_512
;
3987 else if (VecWidth
== 128 && IsSaturating
)
3988 IID
= Intrinsic::x86_avx512_vpdpwssds_128
;
3989 else if (VecWidth
== 256 && IsSaturating
)
3990 IID
= Intrinsic::x86_avx512_vpdpwssds_256
;
3991 else if (VecWidth
== 512 && IsSaturating
)
3992 IID
= Intrinsic::x86_avx512_vpdpwssds_512
;
3994 llvm_unreachable("Unexpected intrinsic");
3996 Value
*Args
[] = { CI
->getArgOperand(0), CI
->getArgOperand(1),
3997 CI
->getArgOperand(2) };
3998 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(CI
->getModule(), IID
),
4000 Value
*PassThru
= ZeroMask
? ConstantAggregateZero::get(CI
->getType())
4001 : CI
->getArgOperand(0);
4002 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
, PassThru
);
4003 } else if (IsX86
&& (Name
== "addcarryx.u32" || Name
== "addcarryx.u64" ||
4004 Name
== "addcarry.u32" || Name
== "addcarry.u64" ||
4005 Name
== "subborrow.u32" || Name
== "subborrow.u64")) {
4007 if (Name
[0] == 'a' && Name
.back() == '2')
4008 IID
= Intrinsic::x86_addcarry_32
;
4009 else if (Name
[0] == 'a' && Name
.back() == '4')
4010 IID
= Intrinsic::x86_addcarry_64
;
4011 else if (Name
[0] == 's' && Name
.back() == '2')
4012 IID
= Intrinsic::x86_subborrow_32
;
4013 else if (Name
[0] == 's' && Name
.back() == '4')
4014 IID
= Intrinsic::x86_subborrow_64
;
4016 llvm_unreachable("Unexpected intrinsic");
4018 // Make a call with 3 operands.
4019 Value
*Args
[] = { CI
->getArgOperand(0), CI
->getArgOperand(1),
4020 CI
->getArgOperand(2)};
4021 Value
*NewCall
= Builder
.CreateCall(
4022 Intrinsic::getDeclaration(CI
->getModule(), IID
),
4025 // Extract the second result and store it.
4026 Value
*Data
= Builder
.CreateExtractValue(NewCall
, 1);
4027 // Cast the pointer to the right type.
4028 Value
*Ptr
= Builder
.CreateBitCast(CI
->getArgOperand(3),
4029 llvm::PointerType::getUnqual(Data
->getType()));
4030 Builder
.CreateAlignedStore(Data
, Ptr
, Align(1));
4031 // Replace the original call result with the first result of the new call.
4032 Value
*CF
= Builder
.CreateExtractValue(NewCall
, 0);
4034 CI
->replaceAllUsesWith(CF
);
4036 } else if (IsX86
&& Name
.startswith("avx512.mask.") &&
4037 upgradeAVX512MaskToSelect(Name
, Builder
, *CI
, Rep
)) {
4038 // Rep will be updated by the call in the condition.
4039 } else if (IsNVVM
&& (Name
== "abs.i" || Name
== "abs.ll")) {
4040 Value
*Arg
= CI
->getArgOperand(0);
4041 Value
*Neg
= Builder
.CreateNeg(Arg
, "neg");
4042 Value
*Cmp
= Builder
.CreateICmpSGE(
4043 Arg
, llvm::Constant::getNullValue(Arg
->getType()), "abs.cond");
4044 Rep
= Builder
.CreateSelect(Cmp
, Arg
, Neg
, "abs");
4045 } else if (IsNVVM
&& (Name
.startswith("atomic.load.add.f32.p") ||
4046 Name
.startswith("atomic.load.add.f64.p"))) {
4047 Value
*Ptr
= CI
->getArgOperand(0);
4048 Value
*Val
= CI
->getArgOperand(1);
4049 Rep
= Builder
.CreateAtomicRMW(AtomicRMWInst::FAdd
, Ptr
, Val
, MaybeAlign(),
4050 AtomicOrdering::SequentiallyConsistent
);
4051 } else if (IsNVVM
&& (Name
== "max.i" || Name
== "max.ll" ||
4052 Name
== "max.ui" || Name
== "max.ull")) {
4053 Value
*Arg0
= CI
->getArgOperand(0);
4054 Value
*Arg1
= CI
->getArgOperand(1);
4055 Value
*Cmp
= Name
.endswith(".ui") || Name
.endswith(".ull")
4056 ? Builder
.CreateICmpUGE(Arg0
, Arg1
, "max.cond")
4057 : Builder
.CreateICmpSGE(Arg0
, Arg1
, "max.cond");
4058 Rep
= Builder
.CreateSelect(Cmp
, Arg0
, Arg1
, "max");
4059 } else if (IsNVVM
&& (Name
== "min.i" || Name
== "min.ll" ||
4060 Name
== "min.ui" || Name
== "min.ull")) {
4061 Value
*Arg0
= CI
->getArgOperand(0);
4062 Value
*Arg1
= CI
->getArgOperand(1);
4063 Value
*Cmp
= Name
.endswith(".ui") || Name
.endswith(".ull")
4064 ? Builder
.CreateICmpULE(Arg0
, Arg1
, "min.cond")
4065 : Builder
.CreateICmpSLE(Arg0
, Arg1
, "min.cond");
4066 Rep
= Builder
.CreateSelect(Cmp
, Arg0
, Arg1
, "min");
4067 } else if (IsNVVM
&& Name
== "clz.ll") {
4068 // llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 and returns an i64.
4069 Value
*Arg
= CI
->getArgOperand(0);
4070 Value
*Ctlz
= Builder
.CreateCall(
4071 Intrinsic::getDeclaration(F
->getParent(), Intrinsic::ctlz
,
4073 {Arg
, Builder
.getFalse()}, "ctlz");
4074 Rep
= Builder
.CreateTrunc(Ctlz
, Builder
.getInt32Ty(), "ctlz.trunc");
4075 } else if (IsNVVM
&& Name
== "popc.ll") {
4076 // llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 and returns an
4078 Value
*Arg
= CI
->getArgOperand(0);
4079 Value
*Popc
= Builder
.CreateCall(
4080 Intrinsic::getDeclaration(F
->getParent(), Intrinsic::ctpop
,
4083 Rep
= Builder
.CreateTrunc(Popc
, Builder
.getInt32Ty(), "ctpop.trunc");
4084 } else if (IsNVVM
) {
4085 if (Name
== "h2f") {
4087 Builder
.CreateCall(Intrinsic::getDeclaration(
4088 F
->getParent(), Intrinsic::convert_from_fp16
,
4089 {Builder
.getFloatTy()}),
4090 CI
->getArgOperand(0), "h2f");
4092 Intrinsic::ID IID
= ShouldUpgradeNVPTXBF16Intrinsic(Name
);
4093 if (IID
!= Intrinsic::not_intrinsic
&&
4094 !F
->getReturnType()->getScalarType()->isBFloatTy()) {
4096 NewFn
= Intrinsic::getDeclaration(F
->getParent(), IID
);
4097 SmallVector
<Value
*, 2> Args
;
4098 for (size_t I
= 0; I
< NewFn
->arg_size(); ++I
) {
4099 Value
*Arg
= CI
->getArgOperand(I
);
4100 Type
*OldType
= Arg
->getType();
4101 Type
*NewType
= NewFn
->getArg(I
)->getType();
4102 Args
.push_back((OldType
->isIntegerTy() &&
4103 NewType
->getScalarType()->isBFloatTy())
4104 ? Builder
.CreateBitCast(Arg
, NewType
)
4107 Rep
= Builder
.CreateCall(NewFn
, Args
);
4108 if (F
->getReturnType()->isIntegerTy())
4109 Rep
= Builder
.CreateBitCast(Rep
, F
->getReturnType());
4113 Rep
= UpgradeARMIntrinsicCall(Name
, CI
, F
, Builder
);
4114 } else if (IsAMDGCN
) {
4115 Rep
= UpgradeAMDGCNIntrinsicCall(Name
, CI
, F
, Builder
);
4117 llvm_unreachable("Unknown function for CallBase upgrade.");
4121 CI
->replaceAllUsesWith(Rep
);
4122 CI
->eraseFromParent();
4126 const auto &DefaultCase
= [&]() -> void {
4127 if (CI
->getFunctionType() == NewFn
->getFunctionType()) {
4128 // Handle generic mangling change.
4130 (CI
->getCalledFunction()->getName() != NewFn
->getName()) &&
4131 "Unknown function for CallBase upgrade and isn't just a name change");
4132 CI
->setCalledFunction(NewFn
);
4136 // This must be an upgrade from a named to a literal struct.
4137 if (auto *OldST
= dyn_cast
<StructType
>(CI
->getType())) {
4138 assert(OldST
!= NewFn
->getReturnType() &&
4139 "Return type must have changed");
4140 assert(OldST
->getNumElements() ==
4141 cast
<StructType
>(NewFn
->getReturnType())->getNumElements() &&
4142 "Must have same number of elements");
4144 SmallVector
<Value
*> Args(CI
->args());
4145 Value
*NewCI
= Builder
.CreateCall(NewFn
, Args
);
4146 Value
*Res
= PoisonValue::get(OldST
);
4147 for (unsigned Idx
= 0; Idx
< OldST
->getNumElements(); ++Idx
) {
4148 Value
*Elem
= Builder
.CreateExtractValue(NewCI
, Idx
);
4149 Res
= Builder
.CreateInsertValue(Res
, Elem
, Idx
);
4151 CI
->replaceAllUsesWith(Res
);
4152 CI
->eraseFromParent();
4156 // We're probably about to produce something invalid. Let the verifier catch
4157 // it instead of dying here.
4158 CI
->setCalledOperand(
4159 ConstantExpr::getPointerCast(NewFn
, CI
->getCalledOperand()->getType()));
4162 CallInst
*NewCall
= nullptr;
4163 switch (NewFn
->getIntrinsicID()) {
4168 case Intrinsic::arm_neon_vst1
:
4169 case Intrinsic::arm_neon_vst2
:
4170 case Intrinsic::arm_neon_vst3
:
4171 case Intrinsic::arm_neon_vst4
:
4172 case Intrinsic::arm_neon_vst2lane
:
4173 case Intrinsic::arm_neon_vst3lane
:
4174 case Intrinsic::arm_neon_vst4lane
: {
4175 SmallVector
<Value
*, 4> Args(CI
->args());
4176 NewCall
= Builder
.CreateCall(NewFn
, Args
);
4179 case Intrinsic::aarch64_sve_bfmlalb_lane_v2
:
4180 case Intrinsic::aarch64_sve_bfmlalt_lane_v2
:
4181 case Intrinsic::aarch64_sve_bfdot_lane_v2
: {
4182 LLVMContext
&Ctx
= F
->getParent()->getContext();
4183 SmallVector
<Value
*, 4> Args(CI
->args());
4184 Args
[3] = ConstantInt::get(Type::getInt32Ty(Ctx
),
4185 cast
<ConstantInt
>(Args
[3])->getZExtValue());
4186 NewCall
= Builder
.CreateCall(NewFn
, Args
);
4189 case Intrinsic::aarch64_sve_ld3_sret
:
4190 case Intrinsic::aarch64_sve_ld4_sret
:
4191 case Intrinsic::aarch64_sve_ld2_sret
: {
4192 StringRef Name
= F
->getName();
4193 Name
= Name
.substr(5);
4194 unsigned N
= StringSwitch
<unsigned>(Name
)
4195 .StartsWith("aarch64.sve.ld2", 2)
4196 .StartsWith("aarch64.sve.ld3", 3)
4197 .StartsWith("aarch64.sve.ld4", 4)
4199 ScalableVectorType
*RetTy
=
4200 dyn_cast
<ScalableVectorType
>(F
->getReturnType());
4201 unsigned MinElts
= RetTy
->getMinNumElements() / N
;
4202 SmallVector
<Value
*, 2> Args(CI
->args());
4203 Value
*NewLdCall
= Builder
.CreateCall(NewFn
, Args
);
4204 Value
*Ret
= llvm::PoisonValue::get(RetTy
);
4205 for (unsigned I
= 0; I
< N
; I
++) {
4206 Value
*Idx
= ConstantInt::get(Type::getInt64Ty(C
), I
* MinElts
);
4207 Value
*SRet
= Builder
.CreateExtractValue(NewLdCall
, I
);
4208 Ret
= Builder
.CreateInsertVector(RetTy
, Ret
, SRet
, Idx
);
4210 NewCall
= dyn_cast
<CallInst
>(Ret
);
4214 case Intrinsic::coro_end
: {
4215 SmallVector
<Value
*, 3> Args(CI
->args());
4216 Args
.push_back(ConstantTokenNone::get(CI
->getContext()));
4217 NewCall
= Builder
.CreateCall(NewFn
, Args
);
4221 case Intrinsic::vector_extract
: {
4222 StringRef Name
= F
->getName();
4223 Name
= Name
.substr(5); // Strip llvm
4224 if (!Name
.startswith("aarch64.sve.tuple.get")) {
4228 ScalableVectorType
*RetTy
=
4229 dyn_cast
<ScalableVectorType
>(F
->getReturnType());
4230 unsigned MinElts
= RetTy
->getMinNumElements();
4231 unsigned I
= cast
<ConstantInt
>(CI
->getArgOperand(1))->getZExtValue();
4232 Value
*NewIdx
= ConstantInt::get(Type::getInt64Ty(C
), I
* MinElts
);
4233 NewCall
= Builder
.CreateCall(NewFn
, {CI
->getArgOperand(0), NewIdx
});
4237 case Intrinsic::vector_insert
: {
4238 StringRef Name
= F
->getName();
4239 Name
= Name
.substr(5);
4240 if (!Name
.startswith("aarch64.sve.tuple")) {
4244 if (Name
.startswith("aarch64.sve.tuple.set")) {
4245 unsigned I
= dyn_cast
<ConstantInt
>(CI
->getArgOperand(1))->getZExtValue();
4246 ScalableVectorType
*Ty
=
4247 dyn_cast
<ScalableVectorType
>(CI
->getArgOperand(2)->getType());
4249 ConstantInt::get(Type::getInt64Ty(C
), I
* Ty
->getMinNumElements());
4250 NewCall
= Builder
.CreateCall(
4251 NewFn
, {CI
->getArgOperand(0), CI
->getArgOperand(2), NewIdx
});
4254 if (Name
.startswith("aarch64.sve.tuple.create")) {
4255 unsigned N
= StringSwitch
<unsigned>(Name
)
4256 .StartsWith("aarch64.sve.tuple.create2", 2)
4257 .StartsWith("aarch64.sve.tuple.create3", 3)
4258 .StartsWith("aarch64.sve.tuple.create4", 4)
4260 assert(N
> 1 && "Create is expected to be between 2-4");
4261 ScalableVectorType
*RetTy
=
4262 dyn_cast
<ScalableVectorType
>(F
->getReturnType());
4263 Value
*Ret
= llvm::PoisonValue::get(RetTy
);
4264 unsigned MinElts
= RetTy
->getMinNumElements() / N
;
4265 for (unsigned I
= 0; I
< N
; I
++) {
4266 Value
*Idx
= ConstantInt::get(Type::getInt64Ty(C
), I
* MinElts
);
4267 Value
*V
= CI
->getArgOperand(I
);
4268 Ret
= Builder
.CreateInsertVector(RetTy
, Ret
, V
, Idx
);
4270 NewCall
= dyn_cast
<CallInst
>(Ret
);
4275 case Intrinsic::arm_neon_bfdot
:
4276 case Intrinsic::arm_neon_bfmmla
:
4277 case Intrinsic::arm_neon_bfmlalb
:
4278 case Intrinsic::arm_neon_bfmlalt
:
4279 case Intrinsic::aarch64_neon_bfdot
:
4280 case Intrinsic::aarch64_neon_bfmmla
:
4281 case Intrinsic::aarch64_neon_bfmlalb
:
4282 case Intrinsic::aarch64_neon_bfmlalt
: {
4283 SmallVector
<Value
*, 3> Args
;
4284 assert(CI
->arg_size() == 3 &&
4285 "Mismatch between function args and call args");
4286 size_t OperandWidth
=
4287 CI
->getArgOperand(1)->getType()->getPrimitiveSizeInBits();
4288 assert((OperandWidth
== 64 || OperandWidth
== 128) &&
4289 "Unexpected operand width");
4290 Type
*NewTy
= FixedVectorType::get(Type::getBFloatTy(C
), OperandWidth
/ 16);
4291 auto Iter
= CI
->args().begin();
4292 Args
.push_back(*Iter
++);
4293 Args
.push_back(Builder
.CreateBitCast(*Iter
++, NewTy
));
4294 Args
.push_back(Builder
.CreateBitCast(*Iter
++, NewTy
));
4295 NewCall
= Builder
.CreateCall(NewFn
, Args
);
4299 case Intrinsic::bitreverse
:
4300 NewCall
= Builder
.CreateCall(NewFn
, {CI
->getArgOperand(0)});
4303 case Intrinsic::ctlz
:
4304 case Intrinsic::cttz
:
4305 assert(CI
->arg_size() == 1 &&
4306 "Mismatch between function args and call args");
4308 Builder
.CreateCall(NewFn
, {CI
->getArgOperand(0), Builder
.getFalse()});
4311 case Intrinsic::objectsize
: {
4312 Value
*NullIsUnknownSize
=
4313 CI
->arg_size() == 2 ? Builder
.getFalse() : CI
->getArgOperand(2);
4315 CI
->arg_size() < 4 ? Builder
.getFalse() : CI
->getArgOperand(3);
4316 NewCall
= Builder
.CreateCall(
4317 NewFn
, {CI
->getArgOperand(0), CI
->getArgOperand(1), NullIsUnknownSize
, Dynamic
});
4321 case Intrinsic::ctpop
:
4322 NewCall
= Builder
.CreateCall(NewFn
, {CI
->getArgOperand(0)});
4325 case Intrinsic::convert_from_fp16
:
4326 NewCall
= Builder
.CreateCall(NewFn
, {CI
->getArgOperand(0)});
4329 case Intrinsic::dbg_value
: {
4330 StringRef Name
= F
->getName();
4331 Name
= Name
.substr(5); // Strip llvm.
4332 // Upgrade `dbg.addr` to `dbg.value` with `DW_OP_deref`.
4333 if (Name
.startswith("dbg.addr")) {
4334 DIExpression
*Expr
= cast
<DIExpression
>(
4335 cast
<MetadataAsValue
>(CI
->getArgOperand(2))->getMetadata());
4336 Expr
= DIExpression::append(Expr
, dwarf::DW_OP_deref
);
4338 Builder
.CreateCall(NewFn
, {CI
->getArgOperand(0), CI
->getArgOperand(1),
4339 MetadataAsValue::get(C
, Expr
)});
4343 // Upgrade from the old version that had an extra offset argument.
4344 assert(CI
->arg_size() == 4);
4345 // Drop nonzero offsets instead of attempting to upgrade them.
4346 if (auto *Offset
= dyn_cast_or_null
<Constant
>(CI
->getArgOperand(1)))
4347 if (Offset
->isZeroValue()) {
4348 NewCall
= Builder
.CreateCall(
4350 {CI
->getArgOperand(0), CI
->getArgOperand(2), CI
->getArgOperand(3)});
4353 CI
->eraseFromParent();
4357 case Intrinsic::ptr_annotation
:
4358 // Upgrade from versions that lacked the annotation attribute argument.
4359 if (CI
->arg_size() != 4) {
4364 // Create a new call with an added null annotation attribute argument.
4366 Builder
.CreateCall(NewFn
, {CI
->getArgOperand(0), CI
->getArgOperand(1),
4367 CI
->getArgOperand(2), CI
->getArgOperand(3),
4368 Constant::getNullValue(Builder
.getPtrTy())});
4369 NewCall
->takeName(CI
);
4370 CI
->replaceAllUsesWith(NewCall
);
4371 CI
->eraseFromParent();
4374 case Intrinsic::var_annotation
:
4375 // Upgrade from versions that lacked the annotation attribute argument.
4376 if (CI
->arg_size() != 4) {
4380 // Create a new call with an added null annotation attribute argument.
4382 Builder
.CreateCall(NewFn
, {CI
->getArgOperand(0), CI
->getArgOperand(1),
4383 CI
->getArgOperand(2), CI
->getArgOperand(3),
4384 Constant::getNullValue(Builder
.getPtrTy())});
4385 NewCall
->takeName(CI
);
4386 CI
->replaceAllUsesWith(NewCall
);
4387 CI
->eraseFromParent();
4390 case Intrinsic::riscv_aes32dsi
:
4391 case Intrinsic::riscv_aes32dsmi
:
4392 case Intrinsic::riscv_aes32esi
:
4393 case Intrinsic::riscv_aes32esmi
:
4394 case Intrinsic::riscv_sm4ks
:
4395 case Intrinsic::riscv_sm4ed
: {
4396 // The last argument to these intrinsics used to be i8 and changed to i32.
4397 // The type overload for sm4ks and sm4ed was removed.
4398 Value
*Arg2
= CI
->getArgOperand(2);
4399 if (Arg2
->getType()->isIntegerTy(32) && !CI
->getType()->isIntegerTy(64))
4402 Value
*Arg0
= CI
->getArgOperand(0);
4403 Value
*Arg1
= CI
->getArgOperand(1);
4404 if (CI
->getType()->isIntegerTy(64)) {
4405 Arg0
= Builder
.CreateTrunc(Arg0
, Builder
.getInt32Ty());
4406 Arg1
= Builder
.CreateTrunc(Arg1
, Builder
.getInt32Ty());
4409 Arg2
= ConstantInt::get(Type::getInt32Ty(C
),
4410 cast
<ConstantInt
>(Arg2
)->getZExtValue());
4412 NewCall
= Builder
.CreateCall(NewFn
, {Arg0
, Arg1
, Arg2
});
4413 Value
*Res
= NewCall
;
4414 if (Res
->getType() != CI
->getType())
4415 Res
= Builder
.CreateIntCast(NewCall
, CI
->getType(), /*isSigned*/ true);
4416 NewCall
->takeName(CI
);
4417 CI
->replaceAllUsesWith(Res
);
4418 CI
->eraseFromParent();
4421 case Intrinsic::riscv_sha256sig0
:
4422 case Intrinsic::riscv_sha256sig1
:
4423 case Intrinsic::riscv_sha256sum0
:
4424 case Intrinsic::riscv_sha256sum1
:
4425 case Intrinsic::riscv_sm3p0
:
4426 case Intrinsic::riscv_sm3p1
: {
4427 // The last argument to these intrinsics used to be i8 and changed to i32.
4428 // The type overload for sm4ks and sm4ed was removed.
4429 if (!CI
->getType()->isIntegerTy(64))
4433 Builder
.CreateTrunc(CI
->getArgOperand(0), Builder
.getInt32Ty());
4435 NewCall
= Builder
.CreateCall(NewFn
, Arg
);
4437 Builder
.CreateIntCast(NewCall
, CI
->getType(), /*isSigned*/ true);
4438 NewCall
->takeName(CI
);
4439 CI
->replaceAllUsesWith(Res
);
4440 CI
->eraseFromParent();
4444 case Intrinsic::x86_xop_vfrcz_ss
:
4445 case Intrinsic::x86_xop_vfrcz_sd
:
4446 NewCall
= Builder
.CreateCall(NewFn
, {CI
->getArgOperand(1)});
4449 case Intrinsic::x86_xop_vpermil2pd
:
4450 case Intrinsic::x86_xop_vpermil2ps
:
4451 case Intrinsic::x86_xop_vpermil2pd_256
:
4452 case Intrinsic::x86_xop_vpermil2ps_256
: {
4453 SmallVector
<Value
*, 4> Args(CI
->args());
4454 VectorType
*FltIdxTy
= cast
<VectorType
>(Args
[2]->getType());
4455 VectorType
*IntIdxTy
= VectorType::getInteger(FltIdxTy
);
4456 Args
[2] = Builder
.CreateBitCast(Args
[2], IntIdxTy
);
4457 NewCall
= Builder
.CreateCall(NewFn
, Args
);
4461 case Intrinsic::x86_sse41_ptestc
:
4462 case Intrinsic::x86_sse41_ptestz
:
4463 case Intrinsic::x86_sse41_ptestnzc
: {
4464 // The arguments for these intrinsics used to be v4f32, and changed
4465 // to v2i64. This is purely a nop, since those are bitwise intrinsics.
4466 // So, the only thing required is a bitcast for both arguments.
4467 // First, check the arguments have the old type.
4468 Value
*Arg0
= CI
->getArgOperand(0);
4469 if (Arg0
->getType() != FixedVectorType::get(Type::getFloatTy(C
), 4))
4472 // Old intrinsic, add bitcasts
4473 Value
*Arg1
= CI
->getArgOperand(1);
4475 auto *NewVecTy
= FixedVectorType::get(Type::getInt64Ty(C
), 2);
4477 Value
*BC0
= Builder
.CreateBitCast(Arg0
, NewVecTy
, "cast");
4478 Value
*BC1
= Builder
.CreateBitCast(Arg1
, NewVecTy
, "cast");
4480 NewCall
= Builder
.CreateCall(NewFn
, {BC0
, BC1
});
4484 case Intrinsic::x86_rdtscp
: {
4485 // This used to take 1 arguments. If we have no arguments, it is already
4487 if (CI
->getNumOperands() == 0)
4490 NewCall
= Builder
.CreateCall(NewFn
);
4491 // Extract the second result and store it.
4492 Value
*Data
= Builder
.CreateExtractValue(NewCall
, 1);
4493 // Cast the pointer to the right type.
4494 Value
*Ptr
= Builder
.CreateBitCast(CI
->getArgOperand(0),
4495 llvm::PointerType::getUnqual(Data
->getType()));
4496 Builder
.CreateAlignedStore(Data
, Ptr
, Align(1));
4497 // Replace the original call result with the first result of the new call.
4498 Value
*TSC
= Builder
.CreateExtractValue(NewCall
, 0);
4500 NewCall
->takeName(CI
);
4501 CI
->replaceAllUsesWith(TSC
);
4502 CI
->eraseFromParent();
4506 case Intrinsic::x86_sse41_insertps
:
4507 case Intrinsic::x86_sse41_dppd
:
4508 case Intrinsic::x86_sse41_dpps
:
4509 case Intrinsic::x86_sse41_mpsadbw
:
4510 case Intrinsic::x86_avx_dp_ps_256
:
4511 case Intrinsic::x86_avx2_mpsadbw
: {
4512 // Need to truncate the last argument from i32 to i8 -- this argument models
4513 // an inherently 8-bit immediate operand to these x86 instructions.
4514 SmallVector
<Value
*, 4> Args(CI
->args());
4516 // Replace the last argument with a trunc.
4517 Args
.back() = Builder
.CreateTrunc(Args
.back(), Type::getInt8Ty(C
), "trunc");
4518 NewCall
= Builder
.CreateCall(NewFn
, Args
);
4522 case Intrinsic::x86_avx512_mask_cmp_pd_128
:
4523 case Intrinsic::x86_avx512_mask_cmp_pd_256
:
4524 case Intrinsic::x86_avx512_mask_cmp_pd_512
:
4525 case Intrinsic::x86_avx512_mask_cmp_ps_128
:
4526 case Intrinsic::x86_avx512_mask_cmp_ps_256
:
4527 case Intrinsic::x86_avx512_mask_cmp_ps_512
: {
4528 SmallVector
<Value
*, 4> Args(CI
->args());
4530 cast
<FixedVectorType
>(Args
[0]->getType())->getNumElements();
4531 Args
[3] = getX86MaskVec(Builder
, Args
[3], NumElts
);
4533 NewCall
= Builder
.CreateCall(NewFn
, Args
);
4534 Value
*Res
= ApplyX86MaskOn1BitsVec(Builder
, NewCall
, nullptr);
4536 NewCall
->takeName(CI
);
4537 CI
->replaceAllUsesWith(Res
);
4538 CI
->eraseFromParent();
4542 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128
:
4543 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256
:
4544 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512
:
4545 case Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128
:
4546 case Intrinsic::x86_avx512bf16_cvtneps2bf16_256
:
4547 case Intrinsic::x86_avx512bf16_cvtneps2bf16_512
: {
4548 SmallVector
<Value
*, 4> Args(CI
->args());
4549 unsigned NumElts
= cast
<FixedVectorType
>(CI
->getType())->getNumElements();
4550 if (NewFn
->getIntrinsicID() ==
4551 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128
)
4552 Args
[1] = Builder
.CreateBitCast(
4553 Args
[1], FixedVectorType::get(Builder
.getBFloatTy(), NumElts
));
4555 NewCall
= Builder
.CreateCall(NewFn
, Args
);
4556 Value
*Res
= Builder
.CreateBitCast(
4557 NewCall
, FixedVectorType::get(Builder
.getInt16Ty(), NumElts
));
4559 NewCall
->takeName(CI
);
4560 CI
->replaceAllUsesWith(Res
);
4561 CI
->eraseFromParent();
4564 case Intrinsic::x86_avx512bf16_dpbf16ps_128
:
4565 case Intrinsic::x86_avx512bf16_dpbf16ps_256
:
4566 case Intrinsic::x86_avx512bf16_dpbf16ps_512
:{
4567 SmallVector
<Value
*, 4> Args(CI
->args());
4569 cast
<FixedVectorType
>(CI
->getType())->getNumElements() * 2;
4570 Args
[1] = Builder
.CreateBitCast(
4571 Args
[1], FixedVectorType::get(Builder
.getBFloatTy(), NumElts
));
4572 Args
[2] = Builder
.CreateBitCast(
4573 Args
[2], FixedVectorType::get(Builder
.getBFloatTy(), NumElts
));
4575 NewCall
= Builder
.CreateCall(NewFn
, Args
);
4579 case Intrinsic::thread_pointer
: {
4580 NewCall
= Builder
.CreateCall(NewFn
, {});
4584 case Intrinsic::memcpy
:
4585 case Intrinsic::memmove
:
4586 case Intrinsic::memset
: {
4587 // We have to make sure that the call signature is what we're expecting.
4588 // We only want to change the old signatures by removing the alignment arg:
4589 // @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i32, i1)
4590 // -> @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i1)
4591 // @llvm.memset...(i8*, i8, i[32|64], i32, i1)
4592 // -> @llvm.memset...(i8*, i8, i[32|64], i1)
4593 // Note: i8*'s in the above can be any pointer type
4594 if (CI
->arg_size() != 5) {
4598 // Remove alignment argument (3), and add alignment attributes to the
4599 // dest/src pointers.
4600 Value
*Args
[4] = {CI
->getArgOperand(0), CI
->getArgOperand(1),
4601 CI
->getArgOperand(2), CI
->getArgOperand(4)};
4602 NewCall
= Builder
.CreateCall(NewFn
, Args
);
4603 AttributeList OldAttrs
= CI
->getAttributes();
4604 AttributeList NewAttrs
= AttributeList::get(
4605 C
, OldAttrs
.getFnAttrs(), OldAttrs
.getRetAttrs(),
4606 {OldAttrs
.getParamAttrs(0), OldAttrs
.getParamAttrs(1),
4607 OldAttrs
.getParamAttrs(2), OldAttrs
.getParamAttrs(4)});
4608 NewCall
->setAttributes(NewAttrs
);
4609 auto *MemCI
= cast
<MemIntrinsic
>(NewCall
);
4610 // All mem intrinsics support dest alignment.
4611 const ConstantInt
*Align
= cast
<ConstantInt
>(CI
->getArgOperand(3));
4612 MemCI
->setDestAlignment(Align
->getMaybeAlignValue());
4613 // Memcpy/Memmove also support source alignment.
4614 if (auto *MTI
= dyn_cast
<MemTransferInst
>(MemCI
))
4615 MTI
->setSourceAlignment(Align
->getMaybeAlignValue());
4619 assert(NewCall
&& "Should have either set this variable or returned through "
4620 "the default case");
4621 NewCall
->takeName(CI
);
4622 CI
->replaceAllUsesWith(NewCall
);
4623 CI
->eraseFromParent();
4626 void llvm::UpgradeCallsToIntrinsic(Function
*F
) {
4627 assert(F
&& "Illegal attempt to upgrade a non-existent intrinsic.");
4629 // Check if this function should be upgraded and get the replacement function
4632 if (UpgradeIntrinsicFunction(F
, NewFn
)) {
4633 // Replace all users of the old function with the new function or new
4634 // instructions. This is not a range loop because the call is deleted.
4635 for (User
*U
: make_early_inc_range(F
->users()))
4636 if (CallBase
*CB
= dyn_cast
<CallBase
>(U
))
4637 UpgradeIntrinsicCall(CB
, NewFn
);
4639 // Remove old function, no longer used, from the module.
4640 F
->eraseFromParent();
4644 MDNode
*llvm::UpgradeTBAANode(MDNode
&MD
) {
4645 const unsigned NumOperands
= MD
.getNumOperands();
4646 if (NumOperands
== 0)
4647 return &MD
; // Invalid, punt to a verifier error.
4649 // Check if the tag uses struct-path aware TBAA format.
4650 if (isa
<MDNode
>(MD
.getOperand(0)) && NumOperands
>= 3)
4653 auto &Context
= MD
.getContext();
4654 if (NumOperands
== 3) {
4655 Metadata
*Elts
[] = {MD
.getOperand(0), MD
.getOperand(1)};
4656 MDNode
*ScalarType
= MDNode::get(Context
, Elts
);
4657 // Create a MDNode <ScalarType, ScalarType, offset 0, const>
4658 Metadata
*Elts2
[] = {ScalarType
, ScalarType
,
4659 ConstantAsMetadata::get(
4660 Constant::getNullValue(Type::getInt64Ty(Context
))),
4662 return MDNode::get(Context
, Elts2
);
4664 // Create a MDNode <MD, MD, offset 0>
4665 Metadata
*Elts
[] = {&MD
, &MD
, ConstantAsMetadata::get(Constant::getNullValue(
4666 Type::getInt64Ty(Context
)))};
4667 return MDNode::get(Context
, Elts
);
4670 Instruction
*llvm::UpgradeBitCastInst(unsigned Opc
, Value
*V
, Type
*DestTy
,
4671 Instruction
*&Temp
) {
4672 if (Opc
!= Instruction::BitCast
)
4676 Type
*SrcTy
= V
->getType();
4677 if (SrcTy
->isPtrOrPtrVectorTy() && DestTy
->isPtrOrPtrVectorTy() &&
4678 SrcTy
->getPointerAddressSpace() != DestTy
->getPointerAddressSpace()) {
4679 LLVMContext
&Context
= V
->getContext();
4681 // We have no information about target data layout, so we assume that
4682 // the maximum pointer size is 64bit.
4683 Type
*MidTy
= Type::getInt64Ty(Context
);
4684 Temp
= CastInst::Create(Instruction::PtrToInt
, V
, MidTy
);
4686 return CastInst::Create(Instruction::IntToPtr
, Temp
, DestTy
);
4692 Constant
*llvm::UpgradeBitCastExpr(unsigned Opc
, Constant
*C
, Type
*DestTy
) {
4693 if (Opc
!= Instruction::BitCast
)
4696 Type
*SrcTy
= C
->getType();
4697 if (SrcTy
->isPtrOrPtrVectorTy() && DestTy
->isPtrOrPtrVectorTy() &&
4698 SrcTy
->getPointerAddressSpace() != DestTy
->getPointerAddressSpace()) {
4699 LLVMContext
&Context
= C
->getContext();
4701 // We have no information about target data layout, so we assume that
4702 // the maximum pointer size is 64bit.
4703 Type
*MidTy
= Type::getInt64Ty(Context
);
4705 return ConstantExpr::getIntToPtr(ConstantExpr::getPtrToInt(C
, MidTy
),
4712 /// Check the debug info version number, if it is out-dated, drop the debug
4713 /// info. Return true if module is modified.
4714 bool llvm::UpgradeDebugInfo(Module
&M
) {
4715 if (DisableAutoUpgradeDebugInfo
)
4718 unsigned Version
= getDebugMetadataVersionFromModule(M
);
4719 if (Version
== DEBUG_METADATA_VERSION
) {
4720 bool BrokenDebugInfo
= false;
4721 if (verifyModule(M
, &llvm::errs(), &BrokenDebugInfo
))
4722 report_fatal_error("Broken module found, compilation aborted!");
4723 if (!BrokenDebugInfo
)
4724 // Everything is ok.
4727 // Diagnose malformed debug info.
4728 DiagnosticInfoIgnoringInvalidDebugMetadata
Diag(M
);
4729 M
.getContext().diagnose(Diag
);
4732 bool Modified
= StripDebugInfo(M
);
4733 if (Modified
&& Version
!= DEBUG_METADATA_VERSION
) {
4734 // Diagnose a version mismatch.
4735 DiagnosticInfoDebugMetadataVersion
DiagVersion(M
, Version
);
4736 M
.getContext().diagnose(DiagVersion
);
4741 /// This checks for objc retain release marker which should be upgraded. It
4742 /// returns true if module is modified.
4743 static bool UpgradeRetainReleaseMarker(Module
&M
) {
4744 bool Changed
= false;
4745 const char *MarkerKey
= "clang.arc.retainAutoreleasedReturnValueMarker";
4746 NamedMDNode
*ModRetainReleaseMarker
= M
.getNamedMetadata(MarkerKey
);
4747 if (ModRetainReleaseMarker
) {
4748 MDNode
*Op
= ModRetainReleaseMarker
->getOperand(0);
4750 MDString
*ID
= dyn_cast_or_null
<MDString
>(Op
->getOperand(0));
4752 SmallVector
<StringRef
, 4> ValueComp
;
4753 ID
->getString().split(ValueComp
, "#");
4754 if (ValueComp
.size() == 2) {
4755 std::string NewValue
= ValueComp
[0].str() + ";" + ValueComp
[1].str();
4756 ID
= MDString::get(M
.getContext(), NewValue
);
4758 M
.addModuleFlag(Module::Error
, MarkerKey
, ID
);
4759 M
.eraseNamedMetadata(ModRetainReleaseMarker
);
4767 void llvm::UpgradeARCRuntime(Module
&M
) {
4768 // This lambda converts normal function calls to ARC runtime functions to
4770 auto UpgradeToIntrinsic
= [&](const char *OldFunc
,
4771 llvm::Intrinsic::ID IntrinsicFunc
) {
4772 Function
*Fn
= M
.getFunction(OldFunc
);
4777 Function
*NewFn
= llvm::Intrinsic::getDeclaration(&M
, IntrinsicFunc
);
4779 for (User
*U
: make_early_inc_range(Fn
->users())) {
4780 CallInst
*CI
= dyn_cast
<CallInst
>(U
);
4781 if (!CI
|| CI
->getCalledFunction() != Fn
)
4784 IRBuilder
<> Builder(CI
->getParent(), CI
->getIterator());
4785 FunctionType
*NewFuncTy
= NewFn
->getFunctionType();
4786 SmallVector
<Value
*, 2> Args
;
4788 // Don't upgrade the intrinsic if it's not valid to bitcast the return
4789 // value to the return type of the old function.
4790 if (NewFuncTy
->getReturnType() != CI
->getType() &&
4791 !CastInst::castIsValid(Instruction::BitCast
, CI
,
4792 NewFuncTy
->getReturnType()))
4795 bool InvalidCast
= false;
4797 for (unsigned I
= 0, E
= CI
->arg_size(); I
!= E
; ++I
) {
4798 Value
*Arg
= CI
->getArgOperand(I
);
4800 // Bitcast argument to the parameter type of the new function if it's
4801 // not a variadic argument.
4802 if (I
< NewFuncTy
->getNumParams()) {
4803 // Don't upgrade the intrinsic if it's not valid to bitcast the argument
4804 // to the parameter type of the new function.
4805 if (!CastInst::castIsValid(Instruction::BitCast
, Arg
,
4806 NewFuncTy
->getParamType(I
))) {
4810 Arg
= Builder
.CreateBitCast(Arg
, NewFuncTy
->getParamType(I
));
4812 Args
.push_back(Arg
);
4818 // Create a call instruction that calls the new function.
4819 CallInst
*NewCall
= Builder
.CreateCall(NewFuncTy
, NewFn
, Args
);
4820 NewCall
->setTailCallKind(cast
<CallInst
>(CI
)->getTailCallKind());
4821 NewCall
->takeName(CI
);
4823 // Bitcast the return value back to the type of the old call.
4824 Value
*NewRetVal
= Builder
.CreateBitCast(NewCall
, CI
->getType());
4826 if (!CI
->use_empty())
4827 CI
->replaceAllUsesWith(NewRetVal
);
4828 CI
->eraseFromParent();
4831 if (Fn
->use_empty())
4832 Fn
->eraseFromParent();
4835 // Unconditionally convert a call to "clang.arc.use" to a call to
4836 // "llvm.objc.clang.arc.use".
4837 UpgradeToIntrinsic("clang.arc.use", llvm::Intrinsic::objc_clang_arc_use
);
4839 // Upgrade the retain release marker. If there is no need to upgrade
4840 // the marker, that means either the module is already new enough to contain
4841 // new intrinsics or it is not ARC. There is no need to upgrade runtime call.
4842 if (!UpgradeRetainReleaseMarker(M
))
4845 std::pair
<const char *, llvm::Intrinsic::ID
> RuntimeFuncs
[] = {
4846 {"objc_autorelease", llvm::Intrinsic::objc_autorelease
},
4847 {"objc_autoreleasePoolPop", llvm::Intrinsic::objc_autoreleasePoolPop
},
4848 {"objc_autoreleasePoolPush", llvm::Intrinsic::objc_autoreleasePoolPush
},
4849 {"objc_autoreleaseReturnValue",
4850 llvm::Intrinsic::objc_autoreleaseReturnValue
},
4851 {"objc_copyWeak", llvm::Intrinsic::objc_copyWeak
},
4852 {"objc_destroyWeak", llvm::Intrinsic::objc_destroyWeak
},
4853 {"objc_initWeak", llvm::Intrinsic::objc_initWeak
},
4854 {"objc_loadWeak", llvm::Intrinsic::objc_loadWeak
},
4855 {"objc_loadWeakRetained", llvm::Intrinsic::objc_loadWeakRetained
},
4856 {"objc_moveWeak", llvm::Intrinsic::objc_moveWeak
},
4857 {"objc_release", llvm::Intrinsic::objc_release
},
4858 {"objc_retain", llvm::Intrinsic::objc_retain
},
4859 {"objc_retainAutorelease", llvm::Intrinsic::objc_retainAutorelease
},
4860 {"objc_retainAutoreleaseReturnValue",
4861 llvm::Intrinsic::objc_retainAutoreleaseReturnValue
},
4862 {"objc_retainAutoreleasedReturnValue",
4863 llvm::Intrinsic::objc_retainAutoreleasedReturnValue
},
4864 {"objc_retainBlock", llvm::Intrinsic::objc_retainBlock
},
4865 {"objc_storeStrong", llvm::Intrinsic::objc_storeStrong
},
4866 {"objc_storeWeak", llvm::Intrinsic::objc_storeWeak
},
4867 {"objc_unsafeClaimAutoreleasedReturnValue",
4868 llvm::Intrinsic::objc_unsafeClaimAutoreleasedReturnValue
},
4869 {"objc_retainedObject", llvm::Intrinsic::objc_retainedObject
},
4870 {"objc_unretainedObject", llvm::Intrinsic::objc_unretainedObject
},
4871 {"objc_unretainedPointer", llvm::Intrinsic::objc_unretainedPointer
},
4872 {"objc_retain_autorelease", llvm::Intrinsic::objc_retain_autorelease
},
4873 {"objc_sync_enter", llvm::Intrinsic::objc_sync_enter
},
4874 {"objc_sync_exit", llvm::Intrinsic::objc_sync_exit
},
4875 {"objc_arc_annotation_topdown_bbstart",
4876 llvm::Intrinsic::objc_arc_annotation_topdown_bbstart
},
4877 {"objc_arc_annotation_topdown_bbend",
4878 llvm::Intrinsic::objc_arc_annotation_topdown_bbend
},
4879 {"objc_arc_annotation_bottomup_bbstart",
4880 llvm::Intrinsic::objc_arc_annotation_bottomup_bbstart
},
4881 {"objc_arc_annotation_bottomup_bbend",
4882 llvm::Intrinsic::objc_arc_annotation_bottomup_bbend
}};
4884 for (auto &I
: RuntimeFuncs
)
4885 UpgradeToIntrinsic(I
.first
, I
.second
);
4888 bool llvm::UpgradeModuleFlags(Module
&M
) {
4889 NamedMDNode
*ModFlags
= M
.getModuleFlagsMetadata();
4893 bool HasObjCFlag
= false, HasClassProperties
= false, Changed
= false;
4894 bool HasSwiftVersionFlag
= false;
4895 uint8_t SwiftMajorVersion
, SwiftMinorVersion
;
4896 uint32_t SwiftABIVersion
;
4897 auto Int8Ty
= Type::getInt8Ty(M
.getContext());
4898 auto Int32Ty
= Type::getInt32Ty(M
.getContext());
4900 for (unsigned I
= 0, E
= ModFlags
->getNumOperands(); I
!= E
; ++I
) {
4901 MDNode
*Op
= ModFlags
->getOperand(I
);
4902 if (Op
->getNumOperands() != 3)
4904 MDString
*ID
= dyn_cast_or_null
<MDString
>(Op
->getOperand(1));
4907 auto SetBehavior
= [&](Module::ModFlagBehavior B
) {
4908 Metadata
*Ops
[3] = {ConstantAsMetadata::get(ConstantInt::get(
4909 Type::getInt32Ty(M
.getContext()), B
)),
4910 MDString::get(M
.getContext(), ID
->getString()),
4912 ModFlags
->setOperand(I
, MDNode::get(M
.getContext(), Ops
));
4916 if (ID
->getString() == "Objective-C Image Info Version")
4918 if (ID
->getString() == "Objective-C Class Properties")
4919 HasClassProperties
= true;
4920 // Upgrade PIC from Error/Max to Min.
4921 if (ID
->getString() == "PIC Level") {
4922 if (auto *Behavior
=
4923 mdconst::dyn_extract_or_null
<ConstantInt
>(Op
->getOperand(0))) {
4924 uint64_t V
= Behavior
->getLimitedValue();
4925 if (V
== Module::Error
|| V
== Module::Max
)
4926 SetBehavior(Module::Min
);
4929 // Upgrade "PIE Level" from Error to Max.
4930 if (ID
->getString() == "PIE Level")
4931 if (auto *Behavior
=
4932 mdconst::dyn_extract_or_null
<ConstantInt
>(Op
->getOperand(0)))
4933 if (Behavior
->getLimitedValue() == Module::Error
)
4934 SetBehavior(Module::Max
);
4936 // Upgrade branch protection and return address signing module flags. The
4937 // module flag behavior for these fields were Error and now they are Min.
4938 if (ID
->getString() == "branch-target-enforcement" ||
4939 ID
->getString().startswith("sign-return-address")) {
4940 if (auto *Behavior
=
4941 mdconst::dyn_extract_or_null
<ConstantInt
>(Op
->getOperand(0))) {
4942 if (Behavior
->getLimitedValue() == Module::Error
) {
4943 Type
*Int32Ty
= Type::getInt32Ty(M
.getContext());
4944 Metadata
*Ops
[3] = {
4945 ConstantAsMetadata::get(ConstantInt::get(Int32Ty
, Module::Min
)),
4946 Op
->getOperand(1), Op
->getOperand(2)};
4947 ModFlags
->setOperand(I
, MDNode::get(M
.getContext(), Ops
));
4953 // Upgrade Objective-C Image Info Section. Removed the whitespce in the
4954 // section name so that llvm-lto will not complain about mismatching
4955 // module flags that is functionally the same.
4956 if (ID
->getString() == "Objective-C Image Info Section") {
4957 if (auto *Value
= dyn_cast_or_null
<MDString
>(Op
->getOperand(2))) {
4958 SmallVector
<StringRef
, 4> ValueComp
;
4959 Value
->getString().split(ValueComp
, " ");
4960 if (ValueComp
.size() != 1) {
4961 std::string NewValue
;
4962 for (auto &S
: ValueComp
)
4963 NewValue
+= S
.str();
4964 Metadata
*Ops
[3] = {Op
->getOperand(0), Op
->getOperand(1),
4965 MDString::get(M
.getContext(), NewValue
)};
4966 ModFlags
->setOperand(I
, MDNode::get(M
.getContext(), Ops
));
4972 // IRUpgrader turns a i32 type "Objective-C Garbage Collection" into i8 value.
4973 // If the higher bits are set, it adds new module flag for swift info.
4974 if (ID
->getString() == "Objective-C Garbage Collection") {
4975 auto Md
= dyn_cast
<ConstantAsMetadata
>(Op
->getOperand(2));
4977 assert(Md
->getValue() && "Expected non-empty metadata");
4978 auto Type
= Md
->getValue()->getType();
4981 unsigned Val
= Md
->getValue()->getUniqueInteger().getZExtValue();
4982 if ((Val
& 0xff) != Val
) {
4983 HasSwiftVersionFlag
= true;
4984 SwiftABIVersion
= (Val
& 0xff00) >> 8;
4985 SwiftMajorVersion
= (Val
& 0xff000000) >> 24;
4986 SwiftMinorVersion
= (Val
& 0xff0000) >> 16;
4988 Metadata
*Ops
[3] = {
4989 ConstantAsMetadata::get(ConstantInt::get(Int32Ty
,Module::Error
)),
4991 ConstantAsMetadata::get(ConstantInt::get(Int8Ty
,Val
& 0xff))};
4992 ModFlags
->setOperand(I
, MDNode::get(M
.getContext(), Ops
));
4998 // "Objective-C Class Properties" is recently added for Objective-C. We
4999 // upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module
5000 // flag of value 0, so we can correclty downgrade this flag when trying to
5001 // link an ObjC bitcode without this module flag with an ObjC bitcode with
5002 // this module flag.
5003 if (HasObjCFlag
&& !HasClassProperties
) {
5004 M
.addModuleFlag(llvm::Module::Override
, "Objective-C Class Properties",
5009 if (HasSwiftVersionFlag
) {
5010 M
.addModuleFlag(Module::Error
, "Swift ABI Version",
5012 M
.addModuleFlag(Module::Error
, "Swift Major Version",
5013 ConstantInt::get(Int8Ty
, SwiftMajorVersion
));
5014 M
.addModuleFlag(Module::Error
, "Swift Minor Version",
5015 ConstantInt::get(Int8Ty
, SwiftMinorVersion
));
5022 void llvm::UpgradeSectionAttributes(Module
&M
) {
5023 auto TrimSpaces
= [](StringRef Section
) -> std::string
{
5024 SmallVector
<StringRef
, 5> Components
;
5025 Section
.split(Components
, ',');
5027 SmallString
<32> Buffer
;
5028 raw_svector_ostream
OS(Buffer
);
5030 for (auto Component
: Components
)
5031 OS
<< ',' << Component
.trim();
5033 return std::string(OS
.str().substr(1));
5036 for (auto &GV
: M
.globals()) {
5037 if (!GV
.hasSection())
5040 StringRef Section
= GV
.getSection();
5042 if (!Section
.startswith("__DATA, __objc_catlist"))
5045 // __DATA, __objc_catlist, regular, no_dead_strip
5046 // __DATA,__objc_catlist,regular,no_dead_strip
5047 GV
.setSection(TrimSpaces(Section
));
5052 // Prior to LLVM 10.0, the strictfp attribute could be used on individual
5053 // callsites within a function that did not also have the strictfp attribute.
5054 // Since 10.0, if strict FP semantics are needed within a function, the
5055 // function must have the strictfp attribute and all calls within the function
5056 // must also have the strictfp attribute. This latter restriction is
5057 // necessary to prevent unwanted libcall simplification when a function is
5058 // being cloned (such as for inlining).
5060 // The "dangling" strictfp attribute usage was only used to prevent constant
5061 // folding and other libcall simplification. The nobuiltin attribute on the
5062 // callsite has the same effect.
5063 struct StrictFPUpgradeVisitor
: public InstVisitor
<StrictFPUpgradeVisitor
> {
5064 StrictFPUpgradeVisitor() = default;
5066 void visitCallBase(CallBase
&Call
) {
5067 if (!Call
.isStrictFP())
5069 if (isa
<ConstrainedFPIntrinsic
>(&Call
))
5071 // If we get here, the caller doesn't have the strictfp attribute
5072 // but this callsite does. Replace the strictfp attribute with nobuiltin.
5073 Call
.removeFnAttr(Attribute::StrictFP
);
5074 Call
.addFnAttr(Attribute::NoBuiltin
);
5079 void llvm::UpgradeFunctionAttributes(Function
&F
) {
5080 // If a function definition doesn't have the strictfp attribute,
5081 // convert any callsite strictfp attributes to nobuiltin.
5082 if (!F
.isDeclaration() && !F
.hasFnAttribute(Attribute::StrictFP
)) {
5083 StrictFPUpgradeVisitor SFPV
;
5087 // Remove all incompatibile attributes from function.
5088 F
.removeRetAttrs(AttributeFuncs::typeIncompatible(F
.getReturnType()));
5089 for (auto &Arg
: F
.args())
5090 Arg
.removeAttrs(AttributeFuncs::typeIncompatible(Arg
.getType()));
5093 static bool isOldLoopArgument(Metadata
*MD
) {
5094 auto *T
= dyn_cast_or_null
<MDTuple
>(MD
);
5097 if (T
->getNumOperands() < 1)
5099 auto *S
= dyn_cast_or_null
<MDString
>(T
->getOperand(0));
5102 return S
->getString().startswith("llvm.vectorizer.");
5105 static MDString
*upgradeLoopTag(LLVMContext
&C
, StringRef OldTag
) {
5106 StringRef OldPrefix
= "llvm.vectorizer.";
5107 assert(OldTag
.startswith(OldPrefix
) && "Expected old prefix");
5109 if (OldTag
== "llvm.vectorizer.unroll")
5110 return MDString::get(C
, "llvm.loop.interleave.count");
5112 return MDString::get(
5113 C
, (Twine("llvm.loop.vectorize.") + OldTag
.drop_front(OldPrefix
.size()))
5117 static Metadata
*upgradeLoopArgument(Metadata
*MD
) {
5118 auto *T
= dyn_cast_or_null
<MDTuple
>(MD
);
5121 if (T
->getNumOperands() < 1)
5123 auto *OldTag
= dyn_cast_or_null
<MDString
>(T
->getOperand(0));
5126 if (!OldTag
->getString().startswith("llvm.vectorizer."))
5129 // This has an old tag. Upgrade it.
5130 SmallVector
<Metadata
*, 8> Ops
;
5131 Ops
.reserve(T
->getNumOperands());
5132 Ops
.push_back(upgradeLoopTag(T
->getContext(), OldTag
->getString()));
5133 for (unsigned I
= 1, E
= T
->getNumOperands(); I
!= E
; ++I
)
5134 Ops
.push_back(T
->getOperand(I
));
5136 return MDTuple::get(T
->getContext(), Ops
);
5139 MDNode
*llvm::upgradeInstructionLoopAttachment(MDNode
&N
) {
5140 auto *T
= dyn_cast
<MDTuple
>(&N
);
5144 if (none_of(T
->operands(), isOldLoopArgument
))
5147 SmallVector
<Metadata
*, 8> Ops
;
5148 Ops
.reserve(T
->getNumOperands());
5149 for (Metadata
*MD
: T
->operands())
5150 Ops
.push_back(upgradeLoopArgument(MD
));
5152 return MDTuple::get(T
->getContext(), Ops
);
5155 std::string
llvm::UpgradeDataLayoutString(StringRef DL
, StringRef TT
) {
5157 // The only data layout upgrades needed for pre-GCN are setting the address
5158 // space of globals to 1.
5159 if (T
.isAMDGPU() && !T
.isAMDGCN() && !DL
.contains("-G") &&
5160 !DL
.startswith("G")) {
5161 return DL
.empty() ? std::string("G1") : (DL
+ "-G1").str();
5164 if (T
.isRISCV64()) {
5165 // Make i32 a native type for 64-bit RISC-V.
5166 auto I
= DL
.find("-n64-");
5167 if (I
!= StringRef::npos
)
5168 return (DL
.take_front(I
) + "-n32:64-" + DL
.drop_front(I
+ 5)).str();
5172 std::string Res
= DL
.str();
5173 // AMDGCN data layout upgrades.
5175 // Define address spaces for constants.
5176 if (!DL
.contains("-G") && !DL
.starts_with("G"))
5177 Res
.append(Res
.empty() ? "G1" : "-G1");
5179 // Add missing non-integral declarations.
5180 // This goes before adding new address spaces to prevent incoherent string
5182 if (!DL
.contains("-ni") && !DL
.startswith("ni"))
5183 Res
.append("-ni:7:8");
5184 // Update ni:7 to ni:7:8.
5185 if (DL
.ends_with("ni:7"))
5188 // Add sizing for address spaces 7 and 8 (fat raw buffers and buffer
5189 // resources) An empty data layout has already been upgraded to G1 by now.
5190 if (!DL
.contains("-p7") && !DL
.startswith("p7"))
5191 Res
.append("-p7:160:256:256:32");
5192 if (!DL
.contains("-p8") && !DL
.startswith("p8"))
5193 Res
.append("-p8:128:128");
5201 // If the datalayout matches the expected format, add pointer size address
5202 // spaces to the datalayout.
5203 std::string AddrSpaces
= "-p270:32:32-p271:32:32-p272:64:64";
5204 if (StringRef Ref
= Res
; !Ref
.contains(AddrSpaces
)) {
5205 SmallVector
<StringRef
, 4> Groups
;
5206 Regex
R("(e-m:[a-z](-p:32:32)?)(-[if]64:.*$)");
5207 if (R
.match(Res
, &Groups
))
5208 Res
= (Groups
[1] + AddrSpaces
+ Groups
[3]).str();
5211 // i128 values need to be 16-byte-aligned. LLVM already called into libgcc
5212 // for i128 operations prior to this being reflected in the data layout, and
5213 // clang mostly produced LLVM IR that already aligned i128 to 16 byte
5214 // boundaries, so although this is a breaking change, the upgrade is expected
5215 // to fix more IR than it breaks.
5216 // Intel MCU is an exception and uses 4-byte-alignment.
5217 if (!T
.isOSIAMCU()) {
5218 std::string I128
= "-i128:128";
5219 if (StringRef Ref
= Res
; !Ref
.contains(I128
)) {
5220 SmallVector
<StringRef
, 4> Groups
;
5221 Regex
R("^(e(-[mpi][^-]*)*)((-[^mpi][^-]*)*)$");
5222 if (R
.match(Res
, &Groups
))
5223 Res
= (Groups
[1] + I128
+ Groups
[3]).str();
5227 // For 32-bit MSVC targets, raise the alignment of f80 values to 16 bytes.
5228 // Raising the alignment is safe because Clang did not produce f80 values in
5229 // the MSVC environment before this upgrade was added.
5230 if (T
.isWindowsMSVCEnvironment() && !T
.isArch64Bit()) {
5231 StringRef Ref
= Res
;
5232 auto I
= Ref
.find("-f80:32-");
5233 if (I
!= StringRef::npos
)
5234 Res
= (Ref
.take_front(I
) + "-f80:128-" + Ref
.drop_front(I
+ 8)).str();
5240 void llvm::UpgradeAttributes(AttrBuilder
&B
) {
5241 StringRef FramePointer
;
5242 Attribute A
= B
.getAttribute("no-frame-pointer-elim");
5244 // The value can be "true" or "false".
5245 FramePointer
= A
.getValueAsString() == "true" ? "all" : "none";
5246 B
.removeAttribute("no-frame-pointer-elim");
5248 if (B
.contains("no-frame-pointer-elim-non-leaf")) {
5249 // The value is ignored. "no-frame-pointer-elim"="true" takes priority.
5250 if (FramePointer
!= "all")
5251 FramePointer
= "non-leaf";
5252 B
.removeAttribute("no-frame-pointer-elim-non-leaf");
5254 if (!FramePointer
.empty())
5255 B
.addAttribute("frame-pointer", FramePointer
);
5257 A
= B
.getAttribute("null-pointer-is-valid");
5259 // The value can be "true" or "false".
5260 bool NullPointerIsValid
= A
.getValueAsString() == "true";
5261 B
.removeAttribute("null-pointer-is-valid");
5262 if (NullPointerIsValid
)
5263 B
.addAttribute(Attribute::NullPointerIsValid
);
5267 void llvm::UpgradeOperandBundles(std::vector
<OperandBundleDef
> &Bundles
) {
5268 // clang.arc.attachedcall bundles are now required to have an operand.
5269 // If they don't, it's okay to drop them entirely: when there is an operand,
5270 // the "attachedcall" is meaningful and required, but without an operand,
5271 // it's just a marker NOP. Dropping it merely prevents an optimization.
5272 erase_if(Bundles
, [&](OperandBundleDef
&OBD
) {
5273 return OBD
.getTag() == "clang.arc.attachedcall" &&
5274 OBD
.inputs().empty();