[clang][bytecode][NFC] Only get expr when checking for UB (#125397)
[llvm-project.git] / llvm / lib / IR / AutoUpgrade.cpp
blobe886a6012b219ae9e1302a975ef2aafc64694a81
1 //===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the auto-upgrade helper functions.
10 // This is where deprecated IR intrinsics and other IR features are updated to
11 // current specifications.
13 //===----------------------------------------------------------------------===//
15 #include "llvm/IR/AutoUpgrade.h"
16 #include "llvm/ADT/StringRef.h"
17 #include "llvm/ADT/StringSwitch.h"
18 #include "llvm/BinaryFormat/Dwarf.h"
19 #include "llvm/IR/AttributeMask.h"
20 #include "llvm/IR/CallingConv.h"
21 #include "llvm/IR/Constants.h"
22 #include "llvm/IR/DebugInfo.h"
23 #include "llvm/IR/DebugInfoMetadata.h"
24 #include "llvm/IR/DiagnosticInfo.h"
25 #include "llvm/IR/Function.h"
26 #include "llvm/IR/IRBuilder.h"
27 #include "llvm/IR/InstVisitor.h"
28 #include "llvm/IR/Instruction.h"
29 #include "llvm/IR/IntrinsicInst.h"
30 #include "llvm/IR/Intrinsics.h"
31 #include "llvm/IR/IntrinsicsAArch64.h"
32 #include "llvm/IR/IntrinsicsARM.h"
33 #include "llvm/IR/IntrinsicsNVPTX.h"
34 #include "llvm/IR/IntrinsicsRISCV.h"
35 #include "llvm/IR/IntrinsicsWebAssembly.h"
36 #include "llvm/IR/IntrinsicsX86.h"
37 #include "llvm/IR/LLVMContext.h"
38 #include "llvm/IR/MDBuilder.h"
39 #include "llvm/IR/Metadata.h"
40 #include "llvm/IR/Module.h"
41 #include "llvm/IR/Value.h"
42 #include "llvm/IR/Verifier.h"
43 #include "llvm/Support/AMDGPUAddrSpace.h"
44 #include "llvm/Support/CommandLine.h"
45 #include "llvm/Support/ErrorHandling.h"
46 #include "llvm/Support/Regex.h"
47 #include "llvm/TargetParser/Triple.h"
48 #include <cstring>
49 #include <numeric>
51 using namespace llvm;
53 static cl::opt<bool>
54 DisableAutoUpgradeDebugInfo("disable-auto-upgrade-debug-info",
55 cl::desc("Disable autoupgrade of debug info"));
57 static void rename(GlobalValue *GV) { GV->setName(GV->getName() + ".old"); }
59 // Upgrade the declarations of the SSE4.1 ptest intrinsics whose arguments have
60 // changed their type from v4f32 to v2i64.
61 static bool upgradePTESTIntrinsic(Function *F, Intrinsic::ID IID,
62 Function *&NewFn) {
63 // Check whether this is an old version of the function, which received
64 // v4f32 arguments.
65 Type *Arg0Type = F->getFunctionType()->getParamType(0);
66 if (Arg0Type != FixedVectorType::get(Type::getFloatTy(F->getContext()), 4))
67 return false;
69 // Yes, it's old, replace it with new version.
70 rename(F);
71 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
72 return true;
75 // Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
76 // arguments have changed their type from i32 to i8.
77 static bool upgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID,
78 Function *&NewFn) {
79 // Check that the last argument is an i32.
80 Type *LastArgType = F->getFunctionType()->getParamType(
81 F->getFunctionType()->getNumParams() - 1);
82 if (!LastArgType->isIntegerTy(32))
83 return false;
85 // Move this function aside and map down.
86 rename(F);
87 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
88 return true;
91 // Upgrade the declaration of fp compare intrinsics that change return type
92 // from scalar to vXi1 mask.
93 static bool upgradeX86MaskedFPCompare(Function *F, Intrinsic::ID IID,
94 Function *&NewFn) {
95 // Check if the return type is a vector.
96 if (F->getReturnType()->isVectorTy())
97 return false;
99 rename(F);
100 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
101 return true;
104 static bool upgradeX86BF16Intrinsic(Function *F, Intrinsic::ID IID,
105 Function *&NewFn) {
106 if (F->getReturnType()->getScalarType()->isBFloatTy())
107 return false;
109 rename(F);
110 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
111 return true;
114 static bool upgradeX86BF16DPIntrinsic(Function *F, Intrinsic::ID IID,
115 Function *&NewFn) {
116 if (F->getFunctionType()->getParamType(1)->getScalarType()->isBFloatTy())
117 return false;
119 rename(F);
120 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
121 return true;
124 static bool shouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
125 // All of the intrinsics matches below should be marked with which llvm
126 // version started autoupgrading them. At some point in the future we would
127 // like to use this information to remove upgrade code for some older
128 // intrinsics. It is currently undecided how we will determine that future
129 // point.
130 if (Name.consume_front("avx."))
131 return (Name.starts_with("blend.p") || // Added in 3.7
132 Name == "cvt.ps2.pd.256" || // Added in 3.9
133 Name == "cvtdq2.pd.256" || // Added in 3.9
134 Name == "cvtdq2.ps.256" || // Added in 7.0
135 Name.starts_with("movnt.") || // Added in 3.2
136 Name.starts_with("sqrt.p") || // Added in 7.0
137 Name.starts_with("storeu.") || // Added in 3.9
138 Name.starts_with("vbroadcast.s") || // Added in 3.5
139 Name.starts_with("vbroadcastf128") || // Added in 4.0
140 Name.starts_with("vextractf128.") || // Added in 3.7
141 Name.starts_with("vinsertf128.") || // Added in 3.7
142 Name.starts_with("vperm2f128.") || // Added in 6.0
143 Name.starts_with("vpermil.")); // Added in 3.1
145 if (Name.consume_front("avx2."))
146 return (Name == "movntdqa" || // Added in 5.0
147 Name.starts_with("pabs.") || // Added in 6.0
148 Name.starts_with("padds.") || // Added in 8.0
149 Name.starts_with("paddus.") || // Added in 8.0
150 Name.starts_with("pblendd.") || // Added in 3.7
151 Name == "pblendw" || // Added in 3.7
152 Name.starts_with("pbroadcast") || // Added in 3.8
153 Name.starts_with("pcmpeq.") || // Added in 3.1
154 Name.starts_with("pcmpgt.") || // Added in 3.1
155 Name.starts_with("pmax") || // Added in 3.9
156 Name.starts_with("pmin") || // Added in 3.9
157 Name.starts_with("pmovsx") || // Added in 3.9
158 Name.starts_with("pmovzx") || // Added in 3.9
159 Name == "pmul.dq" || // Added in 7.0
160 Name == "pmulu.dq" || // Added in 7.0
161 Name.starts_with("psll.dq") || // Added in 3.7
162 Name.starts_with("psrl.dq") || // Added in 3.7
163 Name.starts_with("psubs.") || // Added in 8.0
164 Name.starts_with("psubus.") || // Added in 8.0
165 Name.starts_with("vbroadcast") || // Added in 3.8
166 Name == "vbroadcasti128" || // Added in 3.7
167 Name == "vextracti128" || // Added in 3.7
168 Name == "vinserti128" || // Added in 3.7
169 Name == "vperm2i128"); // Added in 6.0
171 if (Name.consume_front("avx512.")) {
172 if (Name.consume_front("mask."))
173 // 'avx512.mask.*'
174 return (Name.starts_with("add.p") || // Added in 7.0. 128/256 in 4.0
175 Name.starts_with("and.") || // Added in 3.9
176 Name.starts_with("andn.") || // Added in 3.9
177 Name.starts_with("broadcast.s") || // Added in 3.9
178 Name.starts_with("broadcastf32x4.") || // Added in 6.0
179 Name.starts_with("broadcastf32x8.") || // Added in 6.0
180 Name.starts_with("broadcastf64x2.") || // Added in 6.0
181 Name.starts_with("broadcastf64x4.") || // Added in 6.0
182 Name.starts_with("broadcasti32x4.") || // Added in 6.0
183 Name.starts_with("broadcasti32x8.") || // Added in 6.0
184 Name.starts_with("broadcasti64x2.") || // Added in 6.0
185 Name.starts_with("broadcasti64x4.") || // Added in 6.0
186 Name.starts_with("cmp.b") || // Added in 5.0
187 Name.starts_with("cmp.d") || // Added in 5.0
188 Name.starts_with("cmp.q") || // Added in 5.0
189 Name.starts_with("cmp.w") || // Added in 5.0
190 Name.starts_with("compress.b") || // Added in 9.0
191 Name.starts_with("compress.d") || // Added in 9.0
192 Name.starts_with("compress.p") || // Added in 9.0
193 Name.starts_with("compress.q") || // Added in 9.0
194 Name.starts_with("compress.store.") || // Added in 7.0
195 Name.starts_with("compress.w") || // Added in 9.0
196 Name.starts_with("conflict.") || // Added in 9.0
197 Name.starts_with("cvtdq2pd.") || // Added in 4.0
198 Name.starts_with("cvtdq2ps.") || // Added in 7.0 updated 9.0
199 Name == "cvtpd2dq.256" || // Added in 7.0
200 Name == "cvtpd2ps.256" || // Added in 7.0
201 Name == "cvtps2pd.128" || // Added in 7.0
202 Name == "cvtps2pd.256" || // Added in 7.0
203 Name.starts_with("cvtqq2pd.") || // Added in 7.0 updated 9.0
204 Name == "cvtqq2ps.256" || // Added in 9.0
205 Name == "cvtqq2ps.512" || // Added in 9.0
206 Name == "cvttpd2dq.256" || // Added in 7.0
207 Name == "cvttps2dq.128" || // Added in 7.0
208 Name == "cvttps2dq.256" || // Added in 7.0
209 Name.starts_with("cvtudq2pd.") || // Added in 4.0
210 Name.starts_with("cvtudq2ps.") || // Added in 7.0 updated 9.0
211 Name.starts_with("cvtuqq2pd.") || // Added in 7.0 updated 9.0
212 Name == "cvtuqq2ps.256" || // Added in 9.0
213 Name == "cvtuqq2ps.512" || // Added in 9.0
214 Name.starts_with("dbpsadbw.") || // Added in 7.0
215 Name.starts_with("div.p") || // Added in 7.0. 128/256 in 4.0
216 Name.starts_with("expand.b") || // Added in 9.0
217 Name.starts_with("expand.d") || // Added in 9.0
218 Name.starts_with("expand.load.") || // Added in 7.0
219 Name.starts_with("expand.p") || // Added in 9.0
220 Name.starts_with("expand.q") || // Added in 9.0
221 Name.starts_with("expand.w") || // Added in 9.0
222 Name.starts_with("fpclass.p") || // Added in 7.0
223 Name.starts_with("insert") || // Added in 4.0
224 Name.starts_with("load.") || // Added in 3.9
225 Name.starts_with("loadu.") || // Added in 3.9
226 Name.starts_with("lzcnt.") || // Added in 5.0
227 Name.starts_with("max.p") || // Added in 7.0. 128/256 in 5.0
228 Name.starts_with("min.p") || // Added in 7.0. 128/256 in 5.0
229 Name.starts_with("movddup") || // Added in 3.9
230 Name.starts_with("move.s") || // Added in 4.0
231 Name.starts_with("movshdup") || // Added in 3.9
232 Name.starts_with("movsldup") || // Added in 3.9
233 Name.starts_with("mul.p") || // Added in 7.0. 128/256 in 4.0
234 Name.starts_with("or.") || // Added in 3.9
235 Name.starts_with("pabs.") || // Added in 6.0
236 Name.starts_with("packssdw.") || // Added in 5.0
237 Name.starts_with("packsswb.") || // Added in 5.0
238 Name.starts_with("packusdw.") || // Added in 5.0
239 Name.starts_with("packuswb.") || // Added in 5.0
240 Name.starts_with("padd.") || // Added in 4.0
241 Name.starts_with("padds.") || // Added in 8.0
242 Name.starts_with("paddus.") || // Added in 8.0
243 Name.starts_with("palignr.") || // Added in 3.9
244 Name.starts_with("pand.") || // Added in 3.9
245 Name.starts_with("pandn.") || // Added in 3.9
246 Name.starts_with("pavg") || // Added in 6.0
247 Name.starts_with("pbroadcast") || // Added in 6.0
248 Name.starts_with("pcmpeq.") || // Added in 3.9
249 Name.starts_with("pcmpgt.") || // Added in 3.9
250 Name.starts_with("perm.df.") || // Added in 3.9
251 Name.starts_with("perm.di.") || // Added in 3.9
252 Name.starts_with("permvar.") || // Added in 7.0
253 Name.starts_with("pmaddubs.w.") || // Added in 7.0
254 Name.starts_with("pmaddw.d.") || // Added in 7.0
255 Name.starts_with("pmax") || // Added in 4.0
256 Name.starts_with("pmin") || // Added in 4.0
257 Name == "pmov.qd.256" || // Added in 9.0
258 Name == "pmov.qd.512" || // Added in 9.0
259 Name == "pmov.wb.256" || // Added in 9.0
260 Name == "pmov.wb.512" || // Added in 9.0
261 Name.starts_with("pmovsx") || // Added in 4.0
262 Name.starts_with("pmovzx") || // Added in 4.0
263 Name.starts_with("pmul.dq.") || // Added in 4.0
264 Name.starts_with("pmul.hr.sw.") || // Added in 7.0
265 Name.starts_with("pmulh.w.") || // Added in 7.0
266 Name.starts_with("pmulhu.w.") || // Added in 7.0
267 Name.starts_with("pmull.") || // Added in 4.0
268 Name.starts_with("pmultishift.qb.") || // Added in 8.0
269 Name.starts_with("pmulu.dq.") || // Added in 4.0
270 Name.starts_with("por.") || // Added in 3.9
271 Name.starts_with("prol.") || // Added in 8.0
272 Name.starts_with("prolv.") || // Added in 8.0
273 Name.starts_with("pror.") || // Added in 8.0
274 Name.starts_with("prorv.") || // Added in 8.0
275 Name.starts_with("pshuf.b.") || // Added in 4.0
276 Name.starts_with("pshuf.d.") || // Added in 3.9
277 Name.starts_with("pshufh.w.") || // Added in 3.9
278 Name.starts_with("pshufl.w.") || // Added in 3.9
279 Name.starts_with("psll.d") || // Added in 4.0
280 Name.starts_with("psll.q") || // Added in 4.0
281 Name.starts_with("psll.w") || // Added in 4.0
282 Name.starts_with("pslli") || // Added in 4.0
283 Name.starts_with("psllv") || // Added in 4.0
284 Name.starts_with("psra.d") || // Added in 4.0
285 Name.starts_with("psra.q") || // Added in 4.0
286 Name.starts_with("psra.w") || // Added in 4.0
287 Name.starts_with("psrai") || // Added in 4.0
288 Name.starts_with("psrav") || // Added in 4.0
289 Name.starts_with("psrl.d") || // Added in 4.0
290 Name.starts_with("psrl.q") || // Added in 4.0
291 Name.starts_with("psrl.w") || // Added in 4.0
292 Name.starts_with("psrli") || // Added in 4.0
293 Name.starts_with("psrlv") || // Added in 4.0
294 Name.starts_with("psub.") || // Added in 4.0
295 Name.starts_with("psubs.") || // Added in 8.0
296 Name.starts_with("psubus.") || // Added in 8.0
297 Name.starts_with("pternlog.") || // Added in 7.0
298 Name.starts_with("punpckh") || // Added in 3.9
299 Name.starts_with("punpckl") || // Added in 3.9
300 Name.starts_with("pxor.") || // Added in 3.9
301 Name.starts_with("shuf.f") || // Added in 6.0
302 Name.starts_with("shuf.i") || // Added in 6.0
303 Name.starts_with("shuf.p") || // Added in 4.0
304 Name.starts_with("sqrt.p") || // Added in 7.0
305 Name.starts_with("store.b.") || // Added in 3.9
306 Name.starts_with("store.d.") || // Added in 3.9
307 Name.starts_with("store.p") || // Added in 3.9
308 Name.starts_with("store.q.") || // Added in 3.9
309 Name.starts_with("store.w.") || // Added in 3.9
310 Name == "store.ss" || // Added in 7.0
311 Name.starts_with("storeu.") || // Added in 3.9
312 Name.starts_with("sub.p") || // Added in 7.0. 128/256 in 4.0
313 Name.starts_with("ucmp.") || // Added in 5.0
314 Name.starts_with("unpckh.") || // Added in 3.9
315 Name.starts_with("unpckl.") || // Added in 3.9
316 Name.starts_with("valign.") || // Added in 4.0
317 Name == "vcvtph2ps.128" || // Added in 11.0
318 Name == "vcvtph2ps.256" || // Added in 11.0
319 Name.starts_with("vextract") || // Added in 4.0
320 Name.starts_with("vfmadd.") || // Added in 7.0
321 Name.starts_with("vfmaddsub.") || // Added in 7.0
322 Name.starts_with("vfnmadd.") || // Added in 7.0
323 Name.starts_with("vfnmsub.") || // Added in 7.0
324 Name.starts_with("vpdpbusd.") || // Added in 7.0
325 Name.starts_with("vpdpbusds.") || // Added in 7.0
326 Name.starts_with("vpdpwssd.") || // Added in 7.0
327 Name.starts_with("vpdpwssds.") || // Added in 7.0
328 Name.starts_with("vpermi2var.") || // Added in 7.0
329 Name.starts_with("vpermil.p") || // Added in 3.9
330 Name.starts_with("vpermilvar.") || // Added in 4.0
331 Name.starts_with("vpermt2var.") || // Added in 7.0
332 Name.starts_with("vpmadd52") || // Added in 7.0
333 Name.starts_with("vpshld.") || // Added in 7.0
334 Name.starts_with("vpshldv.") || // Added in 8.0
335 Name.starts_with("vpshrd.") || // Added in 7.0
336 Name.starts_with("vpshrdv.") || // Added in 8.0
337 Name.starts_with("vpshufbitqmb.") || // Added in 8.0
338 Name.starts_with("xor.")); // Added in 3.9
340 if (Name.consume_front("mask3."))
341 // 'avx512.mask3.*'
342 return (Name.starts_with("vfmadd.") || // Added in 7.0
343 Name.starts_with("vfmaddsub.") || // Added in 7.0
344 Name.starts_with("vfmsub.") || // Added in 7.0
345 Name.starts_with("vfmsubadd.") || // Added in 7.0
346 Name.starts_with("vfnmsub.")); // Added in 7.0
348 if (Name.consume_front("maskz."))
349 // 'avx512.maskz.*'
350 return (Name.starts_with("pternlog.") || // Added in 7.0
351 Name.starts_with("vfmadd.") || // Added in 7.0
352 Name.starts_with("vfmaddsub.") || // Added in 7.0
353 Name.starts_with("vpdpbusd.") || // Added in 7.0
354 Name.starts_with("vpdpbusds.") || // Added in 7.0
355 Name.starts_with("vpdpwssd.") || // Added in 7.0
356 Name.starts_with("vpdpwssds.") || // Added in 7.0
357 Name.starts_with("vpermt2var.") || // Added in 7.0
358 Name.starts_with("vpmadd52") || // Added in 7.0
359 Name.starts_with("vpshldv.") || // Added in 8.0
360 Name.starts_with("vpshrdv.")); // Added in 8.0
362 // 'avx512.*'
363 return (Name == "movntdqa" || // Added in 5.0
364 Name == "pmul.dq.512" || // Added in 7.0
365 Name == "pmulu.dq.512" || // Added in 7.0
366 Name.starts_with("broadcastm") || // Added in 6.0
367 Name.starts_with("cmp.p") || // Added in 12.0
368 Name.starts_with("cvtb2mask.") || // Added in 7.0
369 Name.starts_with("cvtd2mask.") || // Added in 7.0
370 Name.starts_with("cvtmask2") || // Added in 5.0
371 Name.starts_with("cvtq2mask.") || // Added in 7.0
372 Name == "cvtusi2sd" || // Added in 7.0
373 Name.starts_with("cvtw2mask.") || // Added in 7.0
374 Name == "kand.w" || // Added in 7.0
375 Name == "kandn.w" || // Added in 7.0
376 Name == "knot.w" || // Added in 7.0
377 Name == "kor.w" || // Added in 7.0
378 Name == "kortestc.w" || // Added in 7.0
379 Name == "kortestz.w" || // Added in 7.0
380 Name.starts_with("kunpck") || // added in 6.0
381 Name == "kxnor.w" || // Added in 7.0
382 Name == "kxor.w" || // Added in 7.0
383 Name.starts_with("padds.") || // Added in 8.0
384 Name.starts_with("pbroadcast") || // Added in 3.9
385 Name.starts_with("prol") || // Added in 8.0
386 Name.starts_with("pror") || // Added in 8.0
387 Name.starts_with("psll.dq") || // Added in 3.9
388 Name.starts_with("psrl.dq") || // Added in 3.9
389 Name.starts_with("psubs.") || // Added in 8.0
390 Name.starts_with("ptestm") || // Added in 6.0
391 Name.starts_with("ptestnm") || // Added in 6.0
392 Name.starts_with("storent.") || // Added in 3.9
393 Name.starts_with("vbroadcast.s") || // Added in 7.0
394 Name.starts_with("vpshld.") || // Added in 8.0
395 Name.starts_with("vpshrd.")); // Added in 8.0
398 if (Name.consume_front("fma."))
399 return (Name.starts_with("vfmadd.") || // Added in 7.0
400 Name.starts_with("vfmsub.") || // Added in 7.0
401 Name.starts_with("vfmsubadd.") || // Added in 7.0
402 Name.starts_with("vfnmadd.") || // Added in 7.0
403 Name.starts_with("vfnmsub.")); // Added in 7.0
405 if (Name.consume_front("fma4."))
406 return Name.starts_with("vfmadd.s"); // Added in 7.0
408 if (Name.consume_front("sse."))
409 return (Name == "add.ss" || // Added in 4.0
410 Name == "cvtsi2ss" || // Added in 7.0
411 Name == "cvtsi642ss" || // Added in 7.0
412 Name == "div.ss" || // Added in 4.0
413 Name == "mul.ss" || // Added in 4.0
414 Name.starts_with("sqrt.p") || // Added in 7.0
415 Name == "sqrt.ss" || // Added in 7.0
416 Name.starts_with("storeu.") || // Added in 3.9
417 Name == "sub.ss"); // Added in 4.0
419 if (Name.consume_front("sse2."))
420 return (Name == "add.sd" || // Added in 4.0
421 Name == "cvtdq2pd" || // Added in 3.9
422 Name == "cvtdq2ps" || // Added in 7.0
423 Name == "cvtps2pd" || // Added in 3.9
424 Name == "cvtsi2sd" || // Added in 7.0
425 Name == "cvtsi642sd" || // Added in 7.0
426 Name == "cvtss2sd" || // Added in 7.0
427 Name == "div.sd" || // Added in 4.0
428 Name == "mul.sd" || // Added in 4.0
429 Name.starts_with("padds.") || // Added in 8.0
430 Name.starts_with("paddus.") || // Added in 8.0
431 Name.starts_with("pcmpeq.") || // Added in 3.1
432 Name.starts_with("pcmpgt.") || // Added in 3.1
433 Name == "pmaxs.w" || // Added in 3.9
434 Name == "pmaxu.b" || // Added in 3.9
435 Name == "pmins.w" || // Added in 3.9
436 Name == "pminu.b" || // Added in 3.9
437 Name == "pmulu.dq" || // Added in 7.0
438 Name.starts_with("pshuf") || // Added in 3.9
439 Name.starts_with("psll.dq") || // Added in 3.7
440 Name.starts_with("psrl.dq") || // Added in 3.7
441 Name.starts_with("psubs.") || // Added in 8.0
442 Name.starts_with("psubus.") || // Added in 8.0
443 Name.starts_with("sqrt.p") || // Added in 7.0
444 Name == "sqrt.sd" || // Added in 7.0
445 Name == "storel.dq" || // Added in 3.9
446 Name.starts_with("storeu.") || // Added in 3.9
447 Name == "sub.sd"); // Added in 4.0
449 if (Name.consume_front("sse41."))
450 return (Name.starts_with("blendp") || // Added in 3.7
451 Name == "movntdqa" || // Added in 5.0
452 Name == "pblendw" || // Added in 3.7
453 Name == "pmaxsb" || // Added in 3.9
454 Name == "pmaxsd" || // Added in 3.9
455 Name == "pmaxud" || // Added in 3.9
456 Name == "pmaxuw" || // Added in 3.9
457 Name == "pminsb" || // Added in 3.9
458 Name == "pminsd" || // Added in 3.9
459 Name == "pminud" || // Added in 3.9
460 Name == "pminuw" || // Added in 3.9
461 Name.starts_with("pmovsx") || // Added in 3.8
462 Name.starts_with("pmovzx") || // Added in 3.9
463 Name == "pmuldq"); // Added in 7.0
465 if (Name.consume_front("sse42."))
466 return Name == "crc32.64.8"; // Added in 3.4
468 if (Name.consume_front("sse4a."))
469 return Name.starts_with("movnt."); // Added in 3.9
471 if (Name.consume_front("ssse3."))
472 return (Name == "pabs.b.128" || // Added in 6.0
473 Name == "pabs.d.128" || // Added in 6.0
474 Name == "pabs.w.128"); // Added in 6.0
476 if (Name.consume_front("xop."))
477 return (Name == "vpcmov" || // Added in 3.8
478 Name == "vpcmov.256" || // Added in 5.0
479 Name.starts_with("vpcom") || // Added in 3.2, Updated in 9.0
480 Name.starts_with("vprot")); // Added in 8.0
482 return (Name == "addcarry.u32" || // Added in 8.0
483 Name == "addcarry.u64" || // Added in 8.0
484 Name == "addcarryx.u32" || // Added in 8.0
485 Name == "addcarryx.u64" || // Added in 8.0
486 Name == "subborrow.u32" || // Added in 8.0
487 Name == "subborrow.u64" || // Added in 8.0
488 Name.starts_with("vcvtph2ps.")); // Added in 11.0
491 static bool upgradeX86IntrinsicFunction(Function *F, StringRef Name,
492 Function *&NewFn) {
493 // Only handle intrinsics that start with "x86.".
494 if (!Name.consume_front("x86."))
495 return false;
497 if (shouldUpgradeX86Intrinsic(F, Name)) {
498 NewFn = nullptr;
499 return true;
502 if (Name == "rdtscp") { // Added in 8.0
503 // If this intrinsic has 0 operands, it's the new version.
504 if (F->getFunctionType()->getNumParams() == 0)
505 return false;
507 rename(F);
508 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
509 Intrinsic::x86_rdtscp);
510 return true;
513 Intrinsic::ID ID;
515 // SSE4.1 ptest functions may have an old signature.
516 if (Name.consume_front("sse41.ptest")) { // Added in 3.2
517 ID = StringSwitch<Intrinsic::ID>(Name)
518 .Case("c", Intrinsic::x86_sse41_ptestc)
519 .Case("z", Intrinsic::x86_sse41_ptestz)
520 .Case("nzc", Intrinsic::x86_sse41_ptestnzc)
521 .Default(Intrinsic::not_intrinsic);
522 if (ID != Intrinsic::not_intrinsic)
523 return upgradePTESTIntrinsic(F, ID, NewFn);
525 return false;
528 // Several blend and other instructions with masks used the wrong number of
529 // bits.
531 // Added in 3.6
532 ID = StringSwitch<Intrinsic::ID>(Name)
533 .Case("sse41.insertps", Intrinsic::x86_sse41_insertps)
534 .Case("sse41.dppd", Intrinsic::x86_sse41_dppd)
535 .Case("sse41.dpps", Intrinsic::x86_sse41_dpps)
536 .Case("sse41.mpsadbw", Intrinsic::x86_sse41_mpsadbw)
537 .Case("avx.dp.ps.256", Intrinsic::x86_avx_dp_ps_256)
538 .Case("avx2.mpsadbw", Intrinsic::x86_avx2_mpsadbw)
539 .Default(Intrinsic::not_intrinsic);
540 if (ID != Intrinsic::not_intrinsic)
541 return upgradeX86IntrinsicsWith8BitMask(F, ID, NewFn);
543 if (Name.consume_front("avx512.mask.cmp.")) {
544 // Added in 7.0
545 ID = StringSwitch<Intrinsic::ID>(Name)
546 .Case("pd.128", Intrinsic::x86_avx512_mask_cmp_pd_128)
547 .Case("pd.256", Intrinsic::x86_avx512_mask_cmp_pd_256)
548 .Case("pd.512", Intrinsic::x86_avx512_mask_cmp_pd_512)
549 .Case("ps.128", Intrinsic::x86_avx512_mask_cmp_ps_128)
550 .Case("ps.256", Intrinsic::x86_avx512_mask_cmp_ps_256)
551 .Case("ps.512", Intrinsic::x86_avx512_mask_cmp_ps_512)
552 .Default(Intrinsic::not_intrinsic);
553 if (ID != Intrinsic::not_intrinsic)
554 return upgradeX86MaskedFPCompare(F, ID, NewFn);
555 return false; // No other 'x86.avx523.mask.cmp.*'.
558 if (Name.consume_front("avx512bf16.")) {
559 // Added in 9.0
560 ID = StringSwitch<Intrinsic::ID>(Name)
561 .Case("cvtne2ps2bf16.128",
562 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128)
563 .Case("cvtne2ps2bf16.256",
564 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256)
565 .Case("cvtne2ps2bf16.512",
566 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512)
567 .Case("mask.cvtneps2bf16.128",
568 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
569 .Case("cvtneps2bf16.256",
570 Intrinsic::x86_avx512bf16_cvtneps2bf16_256)
571 .Case("cvtneps2bf16.512",
572 Intrinsic::x86_avx512bf16_cvtneps2bf16_512)
573 .Default(Intrinsic::not_intrinsic);
574 if (ID != Intrinsic::not_intrinsic)
575 return upgradeX86BF16Intrinsic(F, ID, NewFn);
577 // Added in 9.0
578 ID = StringSwitch<Intrinsic::ID>(Name)
579 .Case("dpbf16ps.128", Intrinsic::x86_avx512bf16_dpbf16ps_128)
580 .Case("dpbf16ps.256", Intrinsic::x86_avx512bf16_dpbf16ps_256)
581 .Case("dpbf16ps.512", Intrinsic::x86_avx512bf16_dpbf16ps_512)
582 .Default(Intrinsic::not_intrinsic);
583 if (ID != Intrinsic::not_intrinsic)
584 return upgradeX86BF16DPIntrinsic(F, ID, NewFn);
585 return false; // No other 'x86.avx512bf16.*'.
588 if (Name.consume_front("xop.")) {
589 Intrinsic::ID ID = Intrinsic::not_intrinsic;
590 if (Name.starts_with("vpermil2")) { // Added in 3.9
591 // Upgrade any XOP PERMIL2 index operand still using a float/double
592 // vector.
593 auto Idx = F->getFunctionType()->getParamType(2);
594 if (Idx->isFPOrFPVectorTy()) {
595 unsigned IdxSize = Idx->getPrimitiveSizeInBits();
596 unsigned EltSize = Idx->getScalarSizeInBits();
597 if (EltSize == 64 && IdxSize == 128)
598 ID = Intrinsic::x86_xop_vpermil2pd;
599 else if (EltSize == 32 && IdxSize == 128)
600 ID = Intrinsic::x86_xop_vpermil2ps;
601 else if (EltSize == 64 && IdxSize == 256)
602 ID = Intrinsic::x86_xop_vpermil2pd_256;
603 else
604 ID = Intrinsic::x86_xop_vpermil2ps_256;
606 } else if (F->arg_size() == 2)
607 // frcz.ss/sd may need to have an argument dropped. Added in 3.2
608 ID = StringSwitch<Intrinsic::ID>(Name)
609 .Case("vfrcz.ss", Intrinsic::x86_xop_vfrcz_ss)
610 .Case("vfrcz.sd", Intrinsic::x86_xop_vfrcz_sd)
611 .Default(Intrinsic::not_intrinsic);
613 if (ID != Intrinsic::not_intrinsic) {
614 rename(F);
615 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
616 return true;
618 return false; // No other 'x86.xop.*'
621 if (Name == "seh.recoverfp") {
622 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
623 Intrinsic::eh_recoverfp);
624 return true;
627 return false;
630 // Upgrade ARM (IsArm) or Aarch64 (!IsArm) intrinsic fns. Return true iff so.
631 // IsArm: 'arm.*', !IsArm: 'aarch64.*'.
632 static bool upgradeArmOrAarch64IntrinsicFunction(bool IsArm, Function *F,
633 StringRef Name,
634 Function *&NewFn) {
635 if (Name.starts_with("rbit")) {
636 // '(arm|aarch64).rbit'.
637 NewFn = Intrinsic::getOrInsertDeclaration(
638 F->getParent(), Intrinsic::bitreverse, F->arg_begin()->getType());
639 return true;
642 if (Name == "thread.pointer") {
643 // '(arm|aarch64).thread.pointer'.
644 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
645 Intrinsic::thread_pointer);
646 return true;
649 bool Neon = Name.consume_front("neon.");
650 if (Neon) {
651 // '(arm|aarch64).neon.*'.
652 // Changed in 12.0: bfdot accept v4bf16 and v8bf16 instead of v8i8 and
653 // v16i8 respectively.
654 if (Name.consume_front("bfdot.")) {
655 // (arm|aarch64).neon.bfdot.*'.
656 Intrinsic::ID ID =
657 StringSwitch<Intrinsic::ID>(Name)
658 .Cases("v2f32.v8i8", "v4f32.v16i8",
659 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfdot
660 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfdot)
661 .Default(Intrinsic::not_intrinsic);
662 if (ID != Intrinsic::not_intrinsic) {
663 size_t OperandWidth = F->getReturnType()->getPrimitiveSizeInBits();
664 assert((OperandWidth == 64 || OperandWidth == 128) &&
665 "Unexpected operand width");
666 LLVMContext &Ctx = F->getParent()->getContext();
667 std::array<Type *, 2> Tys{
668 {F->getReturnType(),
669 FixedVectorType::get(Type::getBFloatTy(Ctx), OperandWidth / 16)}};
670 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID, Tys);
671 return true;
673 return false; // No other '(arm|aarch64).neon.bfdot.*'.
676 // Changed in 12.0: bfmmla, bfmlalb and bfmlalt are not polymorphic
677 // anymore and accept v8bf16 instead of v16i8.
678 if (Name.consume_front("bfm")) {
679 // (arm|aarch64).neon.bfm*'.
680 if (Name.consume_back(".v4f32.v16i8")) {
681 // (arm|aarch64).neon.bfm*.v4f32.v16i8'.
682 Intrinsic::ID ID =
683 StringSwitch<Intrinsic::ID>(Name)
684 .Case("mla",
685 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmmla
686 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmmla)
687 .Case("lalb",
688 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmlalb
689 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmlalb)
690 .Case("lalt",
691 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmlalt
692 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmlalt)
693 .Default(Intrinsic::not_intrinsic);
694 if (ID != Intrinsic::not_intrinsic) {
695 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
696 return true;
698 return false; // No other '(arm|aarch64).neon.bfm*.v16i8'.
700 return false; // No other '(arm|aarch64).neon.bfm*.
702 // Continue on to Aarch64 Neon or Arm Neon.
704 // Continue on to Arm or Aarch64.
706 if (IsArm) {
707 // 'arm.*'.
708 if (Neon) {
709 // 'arm.neon.*'.
710 Intrinsic::ID ID = StringSwitch<Intrinsic::ID>(Name)
711 .StartsWith("vclz.", Intrinsic::ctlz)
712 .StartsWith("vcnt.", Intrinsic::ctpop)
713 .StartsWith("vqadds.", Intrinsic::sadd_sat)
714 .StartsWith("vqaddu.", Intrinsic::uadd_sat)
715 .StartsWith("vqsubs.", Intrinsic::ssub_sat)
716 .StartsWith("vqsubu.", Intrinsic::usub_sat)
717 .Default(Intrinsic::not_intrinsic);
718 if (ID != Intrinsic::not_intrinsic) {
719 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
720 F->arg_begin()->getType());
721 return true;
724 if (Name.consume_front("vst")) {
725 // 'arm.neon.vst*'.
726 static const Regex vstRegex("^([1234]|[234]lane)\\.v[a-z0-9]*$");
727 SmallVector<StringRef, 2> Groups;
728 if (vstRegex.match(Name, &Groups)) {
729 static const Intrinsic::ID StoreInts[] = {
730 Intrinsic::arm_neon_vst1, Intrinsic::arm_neon_vst2,
731 Intrinsic::arm_neon_vst3, Intrinsic::arm_neon_vst4};
733 static const Intrinsic::ID StoreLaneInts[] = {
734 Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
735 Intrinsic::arm_neon_vst4lane};
737 auto fArgs = F->getFunctionType()->params();
738 Type *Tys[] = {fArgs[0], fArgs[1]};
739 if (Groups[1].size() == 1)
740 NewFn = Intrinsic::getOrInsertDeclaration(
741 F->getParent(), StoreInts[fArgs.size() - 3], Tys);
742 else
743 NewFn = Intrinsic::getOrInsertDeclaration(
744 F->getParent(), StoreLaneInts[fArgs.size() - 5], Tys);
745 return true;
747 return false; // No other 'arm.neon.vst*'.
750 return false; // No other 'arm.neon.*'.
753 if (Name.consume_front("mve.")) {
754 // 'arm.mve.*'.
755 if (Name == "vctp64") {
756 if (cast<FixedVectorType>(F->getReturnType())->getNumElements() == 4) {
757 // A vctp64 returning a v4i1 is converted to return a v2i1. Rename
758 // the function and deal with it below in UpgradeIntrinsicCall.
759 rename(F);
760 return true;
762 return false; // Not 'arm.mve.vctp64'.
765 // These too are changed to accept a v2i1 instead of the old v4i1.
766 if (Name.consume_back(".v4i1")) {
767 // 'arm.mve.*.v4i1'.
768 if (Name.consume_back(".predicated.v2i64.v4i32"))
769 // 'arm.mve.*.predicated.v2i64.v4i32.v4i1'
770 return Name == "mull.int" || Name == "vqdmull";
772 if (Name.consume_back(".v2i64")) {
773 // 'arm.mve.*.v2i64.v4i1'
774 bool IsGather = Name.consume_front("vldr.gather.");
775 if (IsGather || Name.consume_front("vstr.scatter.")) {
776 if (Name.consume_front("base.")) {
777 // Optional 'wb.' prefix.
778 Name.consume_front("wb.");
779 // 'arm.mve.(vldr.gather|vstr.scatter).base.(wb.)?
780 // predicated.v2i64.v2i64.v4i1'.
781 return Name == "predicated.v2i64";
784 if (Name.consume_front("offset.predicated."))
785 return Name == (IsGather ? "v2i64.p0i64" : "p0i64.v2i64") ||
786 Name == (IsGather ? "v2i64.p0" : "p0.v2i64");
788 // No other 'arm.mve.(vldr.gather|vstr.scatter).*.v2i64.v4i1'.
789 return false;
792 return false; // No other 'arm.mve.*.v2i64.v4i1'.
794 return false; // No other 'arm.mve.*.v4i1'.
796 return false; // No other 'arm.mve.*'.
799 if (Name.consume_front("cde.vcx")) {
800 // 'arm.cde.vcx*'.
801 if (Name.consume_back(".predicated.v2i64.v4i1"))
802 // 'arm.cde.vcx*.predicated.v2i64.v4i1'.
803 return Name == "1q" || Name == "1qa" || Name == "2q" || Name == "2qa" ||
804 Name == "3q" || Name == "3qa";
806 return false; // No other 'arm.cde.vcx*'.
808 } else {
809 // 'aarch64.*'.
810 if (Neon) {
811 // 'aarch64.neon.*'.
812 Intrinsic::ID ID = StringSwitch<Intrinsic::ID>(Name)
813 .StartsWith("frintn", Intrinsic::roundeven)
814 .StartsWith("rbit", Intrinsic::bitreverse)
815 .Default(Intrinsic::not_intrinsic);
816 if (ID != Intrinsic::not_intrinsic) {
817 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
818 F->arg_begin()->getType());
819 return true;
822 if (Name.starts_with("addp")) {
823 // 'aarch64.neon.addp*'.
824 if (F->arg_size() != 2)
825 return false; // Invalid IR.
826 VectorType *Ty = dyn_cast<VectorType>(F->getReturnType());
827 if (Ty && Ty->getElementType()->isFloatingPointTy()) {
828 NewFn = Intrinsic::getOrInsertDeclaration(
829 F->getParent(), Intrinsic::aarch64_neon_faddp, Ty);
830 return true;
834 // Changed in 20.0: bfcvt/bfcvtn/bcvtn2 have been replaced with fptrunc.
835 if (Name.starts_with("bfcvt")) {
836 NewFn = nullptr;
837 return true;
840 return false; // No other 'aarch64.neon.*'.
842 if (Name.consume_front("sve.")) {
843 // 'aarch64.sve.*'.
844 if (Name.consume_front("bf")) {
845 if (Name.consume_back(".lane")) {
846 // 'aarch64.sve.bf*.lane'.
847 Intrinsic::ID ID =
848 StringSwitch<Intrinsic::ID>(Name)
849 .Case("dot", Intrinsic::aarch64_sve_bfdot_lane_v2)
850 .Case("mlalb", Intrinsic::aarch64_sve_bfmlalb_lane_v2)
851 .Case("mlalt", Intrinsic::aarch64_sve_bfmlalt_lane_v2)
852 .Default(Intrinsic::not_intrinsic);
853 if (ID != Intrinsic::not_intrinsic) {
854 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
855 return true;
857 return false; // No other 'aarch64.sve.bf*.lane'.
859 return false; // No other 'aarch64.sve.bf*'.
862 // 'aarch64.sve.fcvt.bf16f32' || 'aarch64.sve.fcvtnt.bf16f32'
863 if (Name == "fcvt.bf16f32" || Name == "fcvtnt.bf16f32") {
864 NewFn = nullptr;
865 return true;
868 if (Name.consume_front("addqv")) {
869 // 'aarch64.sve.addqv'.
870 if (!F->getReturnType()->isFPOrFPVectorTy())
871 return false;
873 auto Args = F->getFunctionType()->params();
874 Type *Tys[] = {F->getReturnType(), Args[1]};
875 NewFn = Intrinsic::getOrInsertDeclaration(
876 F->getParent(), Intrinsic::aarch64_sve_faddqv, Tys);
877 return true;
880 if (Name.consume_front("ld")) {
881 // 'aarch64.sve.ld*'.
882 static const Regex LdRegex("^[234](.nxv[a-z0-9]+|$)");
883 if (LdRegex.match(Name)) {
884 Type *ScalarTy =
885 cast<VectorType>(F->getReturnType())->getElementType();
886 ElementCount EC =
887 cast<VectorType>(F->arg_begin()->getType())->getElementCount();
888 Type *Ty = VectorType::get(ScalarTy, EC);
889 static const Intrinsic::ID LoadIDs[] = {
890 Intrinsic::aarch64_sve_ld2_sret,
891 Intrinsic::aarch64_sve_ld3_sret,
892 Intrinsic::aarch64_sve_ld4_sret,
894 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
895 LoadIDs[Name[0] - '2'], Ty);
896 return true;
898 return false; // No other 'aarch64.sve.ld*'.
901 if (Name.consume_front("tuple.")) {
902 // 'aarch64.sve.tuple.*'.
903 if (Name.starts_with("get")) {
904 // 'aarch64.sve.tuple.get*'.
905 Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()};
906 NewFn = Intrinsic::getOrInsertDeclaration(
907 F->getParent(), Intrinsic::vector_extract, Tys);
908 return true;
911 if (Name.starts_with("set")) {
912 // 'aarch64.sve.tuple.set*'.
913 auto Args = F->getFunctionType()->params();
914 Type *Tys[] = {Args[0], Args[2], Args[1]};
915 NewFn = Intrinsic::getOrInsertDeclaration(
916 F->getParent(), Intrinsic::vector_insert, Tys);
917 return true;
920 static const Regex CreateTupleRegex("^create[234](.nxv[a-z0-9]+|$)");
921 if (CreateTupleRegex.match(Name)) {
922 // 'aarch64.sve.tuple.create*'.
923 auto Args = F->getFunctionType()->params();
924 Type *Tys[] = {F->getReturnType(), Args[1]};
925 NewFn = Intrinsic::getOrInsertDeclaration(
926 F->getParent(), Intrinsic::vector_insert, Tys);
927 return true;
929 return false; // No other 'aarch64.sve.tuple.*'.
931 return false; // No other 'aarch64.sve.*'.
934 return false; // No other 'arm.*', 'aarch64.*'.
937 static Intrinsic::ID shouldUpgradeNVPTXBF16Intrinsic(StringRef Name) {
938 if (Name.consume_front("abs."))
939 return StringSwitch<Intrinsic::ID>(Name)
940 .Case("bf16", Intrinsic::nvvm_abs_bf16)
941 .Case("bf16x2", Intrinsic::nvvm_abs_bf16x2)
942 .Default(Intrinsic::not_intrinsic);
944 if (Name.consume_front("fma.rn."))
945 return StringSwitch<Intrinsic::ID>(Name)
946 .Case("bf16", Intrinsic::nvvm_fma_rn_bf16)
947 .Case("bf16x2", Intrinsic::nvvm_fma_rn_bf16x2)
948 .Case("ftz.bf16", Intrinsic::nvvm_fma_rn_ftz_bf16)
949 .Case("ftz.bf16x2", Intrinsic::nvvm_fma_rn_ftz_bf16x2)
950 .Case("ftz.relu.bf16", Intrinsic::nvvm_fma_rn_ftz_relu_bf16)
951 .Case("ftz.relu.bf16x2", Intrinsic::nvvm_fma_rn_ftz_relu_bf16x2)
952 .Case("ftz.sat.bf16", Intrinsic::nvvm_fma_rn_ftz_sat_bf16)
953 .Case("ftz.sat.bf16x2", Intrinsic::nvvm_fma_rn_ftz_sat_bf16x2)
954 .Case("relu.bf16", Intrinsic::nvvm_fma_rn_relu_bf16)
955 .Case("relu.bf16x2", Intrinsic::nvvm_fma_rn_relu_bf16x2)
956 .Case("sat.bf16", Intrinsic::nvvm_fma_rn_sat_bf16)
957 .Case("sat.bf16x2", Intrinsic::nvvm_fma_rn_sat_bf16x2)
958 .Default(Intrinsic::not_intrinsic);
960 if (Name.consume_front("fmax."))
961 return StringSwitch<Intrinsic::ID>(Name)
962 .Case("bf16", Intrinsic::nvvm_fmax_bf16)
963 .Case("bf16x2", Intrinsic::nvvm_fmax_bf16x2)
964 .Case("ftz.bf16", Intrinsic::nvvm_fmax_ftz_bf16)
965 .Case("ftz.bf16x2", Intrinsic::nvvm_fmax_ftz_bf16x2)
966 .Case("ftz.nan.bf16", Intrinsic::nvvm_fmax_ftz_nan_bf16)
967 .Case("ftz.nan.bf16x2", Intrinsic::nvvm_fmax_ftz_nan_bf16x2)
968 .Case("ftz.nan.xorsign.abs.bf16",
969 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16)
970 .Case("ftz.nan.xorsign.abs.bf16x2",
971 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16x2)
972 .Case("ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16)
973 .Case("ftz.xorsign.abs.bf16x2",
974 Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16x2)
975 .Case("nan.bf16", Intrinsic::nvvm_fmax_nan_bf16)
976 .Case("nan.bf16x2", Intrinsic::nvvm_fmax_nan_bf16x2)
977 .Case("nan.xorsign.abs.bf16", Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16)
978 .Case("nan.xorsign.abs.bf16x2",
979 Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16x2)
980 .Case("xorsign.abs.bf16", Intrinsic::nvvm_fmax_xorsign_abs_bf16)
981 .Case("xorsign.abs.bf16x2", Intrinsic::nvvm_fmax_xorsign_abs_bf16x2)
982 .Default(Intrinsic::not_intrinsic);
984 if (Name.consume_front("fmin."))
985 return StringSwitch<Intrinsic::ID>(Name)
986 .Case("bf16", Intrinsic::nvvm_fmin_bf16)
987 .Case("bf16x2", Intrinsic::nvvm_fmin_bf16x2)
988 .Case("ftz.bf16", Intrinsic::nvvm_fmin_ftz_bf16)
989 .Case("ftz.bf16x2", Intrinsic::nvvm_fmin_ftz_bf16x2)
990 .Case("ftz.nan.bf16", Intrinsic::nvvm_fmin_ftz_nan_bf16)
991 .Case("ftz.nan.bf16x2", Intrinsic::nvvm_fmin_ftz_nan_bf16x2)
992 .Case("ftz.nan.xorsign.abs.bf16",
993 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16)
994 .Case("ftz.nan.xorsign.abs.bf16x2",
995 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16x2)
996 .Case("ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16)
997 .Case("ftz.xorsign.abs.bf16x2",
998 Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16x2)
999 .Case("nan.bf16", Intrinsic::nvvm_fmin_nan_bf16)
1000 .Case("nan.bf16x2", Intrinsic::nvvm_fmin_nan_bf16x2)
1001 .Case("nan.xorsign.abs.bf16", Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16)
1002 .Case("nan.xorsign.abs.bf16x2",
1003 Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16x2)
1004 .Case("xorsign.abs.bf16", Intrinsic::nvvm_fmin_xorsign_abs_bf16)
1005 .Case("xorsign.abs.bf16x2", Intrinsic::nvvm_fmin_xorsign_abs_bf16x2)
1006 .Default(Intrinsic::not_intrinsic);
1008 if (Name.consume_front("neg."))
1009 return StringSwitch<Intrinsic::ID>(Name)
1010 .Case("bf16", Intrinsic::nvvm_neg_bf16)
1011 .Case("bf16x2", Intrinsic::nvvm_neg_bf16x2)
1012 .Default(Intrinsic::not_intrinsic);
1014 return Intrinsic::not_intrinsic;
1017 static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn,
1018 bool CanUpgradeDebugIntrinsicsToRecords) {
1019 assert(F && "Illegal to upgrade a non-existent Function.");
1021 StringRef Name = F->getName();
1023 // Quickly eliminate it, if it's not a candidate.
1024 if (!Name.consume_front("llvm.") || Name.empty())
1025 return false;
1027 switch (Name[0]) {
1028 default: break;
1029 case 'a': {
1030 bool IsArm = Name.consume_front("arm.");
1031 if (IsArm || Name.consume_front("aarch64.")) {
1032 if (upgradeArmOrAarch64IntrinsicFunction(IsArm, F, Name, NewFn))
1033 return true;
1034 break;
1037 if (Name.consume_front("amdgcn.")) {
1038 if (Name == "alignbit") {
1039 // Target specific intrinsic became redundant
1040 NewFn = Intrinsic::getOrInsertDeclaration(
1041 F->getParent(), Intrinsic::fshr, {F->getReturnType()});
1042 return true;
1045 if (Name.consume_front("atomic.")) {
1046 if (Name.starts_with("inc") || Name.starts_with("dec")) {
1047 // These were replaced with atomicrmw uinc_wrap and udec_wrap, so
1048 // there's no new declaration.
1049 NewFn = nullptr;
1050 return true;
1052 break; // No other 'amdgcn.atomic.*'
1055 if (Name.consume_front("ds.") || Name.consume_front("global.atomic.") ||
1056 Name.consume_front("flat.atomic.")) {
1057 if (Name.starts_with("fadd") ||
1058 // FIXME: We should also remove fmin.num and fmax.num intrinsics.
1059 (Name.starts_with("fmin") && !Name.starts_with("fmin.num")) ||
1060 (Name.starts_with("fmax") && !Name.starts_with("fmax.num"))) {
1061 // Replaced with atomicrmw fadd/fmin/fmax, so there's no new
1062 // declaration.
1063 NewFn = nullptr;
1064 return true;
1068 if (Name.starts_with("ldexp.")) {
1069 // Target specific intrinsic became redundant
1070 NewFn = Intrinsic::getOrInsertDeclaration(
1071 F->getParent(), Intrinsic::ldexp,
1072 {F->getReturnType(), F->getArg(1)->getType()});
1073 return true;
1075 break; // No other 'amdgcn.*'
1078 break;
1080 case 'c': {
1081 if (F->arg_size() == 1) {
1082 Intrinsic::ID ID = StringSwitch<Intrinsic::ID>(Name)
1083 .StartsWith("ctlz.", Intrinsic::ctlz)
1084 .StartsWith("cttz.", Intrinsic::cttz)
1085 .Default(Intrinsic::not_intrinsic);
1086 if (ID != Intrinsic::not_intrinsic) {
1087 rename(F);
1088 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
1089 F->arg_begin()->getType());
1090 return true;
1094 if (F->arg_size() == 2 && Name == "coro.end") {
1095 rename(F);
1096 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1097 Intrinsic::coro_end);
1098 return true;
1101 break;
1103 case 'd':
1104 if (Name.consume_front("dbg.")) {
1105 // Mark debug intrinsics for upgrade to new debug format.
1106 if (CanUpgradeDebugIntrinsicsToRecords &&
1107 F->getParent()->IsNewDbgInfoFormat) {
1108 if (Name == "addr" || Name == "value" || Name == "assign" ||
1109 Name == "declare" || Name == "label") {
1110 // There's no function to replace these with.
1111 NewFn = nullptr;
1112 // But we do want these to get upgraded.
1113 return true;
1116 // Update llvm.dbg.addr intrinsics even in "new debug mode"; they'll get
1117 // converted to DbgVariableRecords later.
1118 if (Name == "addr" || (Name == "value" && F->arg_size() == 4)) {
1119 rename(F);
1120 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1121 Intrinsic::dbg_value);
1122 return true;
1124 break; // No other 'dbg.*'.
1126 break;
1127 case 'e':
1128 if (Name.consume_front("experimental.vector.")) {
1129 Intrinsic::ID ID =
1130 StringSwitch<Intrinsic::ID>(Name)
1131 // Skip over extract.last.active, otherwise it will be 'upgraded'
1132 // to a regular vector extract which is a different operation.
1133 .StartsWith("extract.last.active.", Intrinsic::not_intrinsic)
1134 .StartsWith("extract.", Intrinsic::vector_extract)
1135 .StartsWith("insert.", Intrinsic::vector_insert)
1136 .StartsWith("splice.", Intrinsic::vector_splice)
1137 .StartsWith("reverse.", Intrinsic::vector_reverse)
1138 .StartsWith("interleave2.", Intrinsic::vector_interleave2)
1139 .StartsWith("deinterleave2.", Intrinsic::vector_deinterleave2)
1140 .Default(Intrinsic::not_intrinsic);
1141 if (ID != Intrinsic::not_intrinsic) {
1142 const auto *FT = F->getFunctionType();
1143 SmallVector<Type *, 2> Tys;
1144 if (ID == Intrinsic::vector_extract ||
1145 ID == Intrinsic::vector_interleave2)
1146 // Extracting overloads the return type.
1147 Tys.push_back(FT->getReturnType());
1148 if (ID != Intrinsic::vector_interleave2)
1149 Tys.push_back(FT->getParamType(0));
1150 if (ID == Intrinsic::vector_insert)
1151 // Inserting overloads the inserted type.
1152 Tys.push_back(FT->getParamType(1));
1153 rename(F);
1154 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID, Tys);
1155 return true;
1158 if (Name.consume_front("reduce.")) {
1159 SmallVector<StringRef, 2> Groups;
1160 static const Regex R("^([a-z]+)\\.[a-z][0-9]+");
1161 if (R.match(Name, &Groups))
1162 ID = StringSwitch<Intrinsic::ID>(Groups[1])
1163 .Case("add", Intrinsic::vector_reduce_add)
1164 .Case("mul", Intrinsic::vector_reduce_mul)
1165 .Case("and", Intrinsic::vector_reduce_and)
1166 .Case("or", Intrinsic::vector_reduce_or)
1167 .Case("xor", Intrinsic::vector_reduce_xor)
1168 .Case("smax", Intrinsic::vector_reduce_smax)
1169 .Case("smin", Intrinsic::vector_reduce_smin)
1170 .Case("umax", Intrinsic::vector_reduce_umax)
1171 .Case("umin", Intrinsic::vector_reduce_umin)
1172 .Case("fmax", Intrinsic::vector_reduce_fmax)
1173 .Case("fmin", Intrinsic::vector_reduce_fmin)
1174 .Default(Intrinsic::not_intrinsic);
1176 bool V2 = false;
1177 if (ID == Intrinsic::not_intrinsic) {
1178 static const Regex R2("^v2\\.([a-z]+)\\.[fi][0-9]+");
1179 Groups.clear();
1180 V2 = true;
1181 if (R2.match(Name, &Groups))
1182 ID = StringSwitch<Intrinsic::ID>(Groups[1])
1183 .Case("fadd", Intrinsic::vector_reduce_fadd)
1184 .Case("fmul", Intrinsic::vector_reduce_fmul)
1185 .Default(Intrinsic::not_intrinsic);
1187 if (ID != Intrinsic::not_intrinsic) {
1188 rename(F);
1189 auto Args = F->getFunctionType()->params();
1190 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
1191 {Args[V2 ? 1 : 0]});
1192 return true;
1194 break; // No other 'expermental.vector.reduce.*'.
1196 break; // No other 'experimental.vector.*'.
1198 if (Name.consume_front("experimental.stepvector.")) {
1199 Intrinsic::ID ID = Intrinsic::stepvector;
1200 rename(F);
1201 NewFn = Intrinsic::getOrInsertDeclaration(
1202 F->getParent(), ID, F->getFunctionType()->getReturnType());
1203 return true;
1205 break; // No other 'e*'.
1206 case 'f':
1207 if (Name.starts_with("flt.rounds")) {
1208 rename(F);
1209 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1210 Intrinsic::get_rounding);
1211 return true;
1213 break;
1214 case 'i':
1215 if (Name.starts_with("invariant.group.barrier")) {
1216 // Rename invariant.group.barrier to launder.invariant.group
1217 auto Args = F->getFunctionType()->params();
1218 Type* ObjectPtr[1] = {Args[0]};
1219 rename(F);
1220 NewFn = Intrinsic::getOrInsertDeclaration(
1221 F->getParent(), Intrinsic::launder_invariant_group, ObjectPtr);
1222 return true;
1224 break;
1225 case 'm': {
1226 // Updating the memory intrinsics (memcpy/memmove/memset) that have an
1227 // alignment parameter to embedding the alignment as an attribute of
1228 // the pointer args.
1229 if (unsigned ID = StringSwitch<unsigned>(Name)
1230 .StartsWith("memcpy.", Intrinsic::memcpy)
1231 .StartsWith("memmove.", Intrinsic::memmove)
1232 .Default(0)) {
1233 if (F->arg_size() == 5) {
1234 rename(F);
1235 // Get the types of dest, src, and len
1236 ArrayRef<Type *> ParamTypes =
1237 F->getFunctionType()->params().slice(0, 3);
1238 NewFn =
1239 Intrinsic::getOrInsertDeclaration(F->getParent(), ID, ParamTypes);
1240 return true;
1243 if (Name.starts_with("memset.") && F->arg_size() == 5) {
1244 rename(F);
1245 // Get the types of dest, and len
1246 const auto *FT = F->getFunctionType();
1247 Type *ParamTypes[2] = {
1248 FT->getParamType(0), // Dest
1249 FT->getParamType(2) // len
1251 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1252 Intrinsic::memset, ParamTypes);
1253 return true;
1255 break;
1257 case 'n': {
1258 if (Name.consume_front("nvvm.")) {
1259 // Check for nvvm intrinsics corresponding exactly to an LLVM intrinsic.
1260 if (F->arg_size() == 1) {
1261 Intrinsic::ID IID =
1262 StringSwitch<Intrinsic::ID>(Name)
1263 .Cases("brev32", "brev64", Intrinsic::bitreverse)
1264 .Case("clz.i", Intrinsic::ctlz)
1265 .Case("popc.i", Intrinsic::ctpop)
1266 .Default(Intrinsic::not_intrinsic);
1267 if (IID != Intrinsic::not_intrinsic) {
1268 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID,
1269 {F->getReturnType()});
1270 return true;
1274 // Check for nvvm intrinsics that need a return type adjustment.
1275 if (!F->getReturnType()->getScalarType()->isBFloatTy()) {
1276 Intrinsic::ID IID = shouldUpgradeNVPTXBF16Intrinsic(Name);
1277 if (IID != Intrinsic::not_intrinsic) {
1278 NewFn = nullptr;
1279 return true;
1283 // The following nvvm intrinsics correspond exactly to an LLVM idiom, but
1284 // not to an intrinsic alone. We expand them in UpgradeIntrinsicCall.
1286 // TODO: We could add lohi.i2d.
1287 bool Expand = false;
1288 if (Name.consume_front("abs."))
1289 // nvvm.abs.{i,ii}
1290 Expand = Name == "i" || Name == "ll";
1291 else if (Name == "clz.ll" || Name == "popc.ll" || Name == "h2f")
1292 Expand = true;
1293 else if (Name.consume_front("max.") || Name.consume_front("min."))
1294 // nvvm.{min,max}.{i,ii,ui,ull}
1295 Expand = Name == "s" || Name == "i" || Name == "ll" || Name == "us" ||
1296 Name == "ui" || Name == "ull";
1297 else if (Name.consume_front("atomic.load.add."))
1298 // nvvm.atomic.load.add.{f32.p,f64.p}
1299 Expand = Name.starts_with("f32.p") || Name.starts_with("f64.p");
1300 else if (Name.consume_front("bitcast."))
1301 // nvvm.bitcast.{f2i,i2f,ll2d,d2ll}
1302 Expand =
1303 Name == "f2i" || Name == "i2f" || Name == "ll2d" || Name == "d2ll";
1304 else if (Name.consume_front("rotate."))
1305 // nvvm.rotate.{b32,b64,right.b64}
1306 Expand = Name == "b32" || Name == "b64" || Name == "right.b64";
1307 else if (Name.consume_front("ptr.gen.to."))
1308 // nvvm.ptr.gen.to.{local,shared,global,constant}
1309 Expand = Name.starts_with("local") || Name.starts_with("shared") ||
1310 Name.starts_with("global") || Name.starts_with("constant");
1311 else if (Name.consume_front("ptr."))
1312 // nvvm.ptr.{local,shared,global,constant}.to.gen
1313 Expand =
1314 (Name.consume_front("local") || Name.consume_front("shared") ||
1315 Name.consume_front("global") || Name.consume_front("constant")) &&
1316 Name.starts_with(".to.gen");
1317 else if (Name.consume_front("ldg.global."))
1318 // nvvm.ldg.global.{i,p,f}
1319 Expand = (Name.starts_with("i.") || Name.starts_with("f.") ||
1320 Name.starts_with("p."));
1321 else
1322 Expand = false;
1324 if (Expand) {
1325 NewFn = nullptr;
1326 return true;
1328 break; // No other 'nvvm.*'.
1330 break;
1332 case 'o':
1333 // We only need to change the name to match the mangling including the
1334 // address space.
1335 if (Name.starts_with("objectsize.")) {
1336 Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
1337 if (F->arg_size() == 2 || F->arg_size() == 3 ||
1338 F->getName() !=
1339 Intrinsic::getName(Intrinsic::objectsize, Tys, F->getParent())) {
1340 rename(F);
1341 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1342 Intrinsic::objectsize, Tys);
1343 return true;
1346 break;
1348 case 'p':
1349 if (Name.starts_with("ptr.annotation.") && F->arg_size() == 4) {
1350 rename(F);
1351 NewFn = Intrinsic::getOrInsertDeclaration(
1352 F->getParent(), Intrinsic::ptr_annotation,
1353 {F->arg_begin()->getType(), F->getArg(1)->getType()});
1354 return true;
1356 break;
1358 case 'r': {
1359 if (Name.consume_front("riscv.")) {
1360 Intrinsic::ID ID;
1361 ID = StringSwitch<Intrinsic::ID>(Name)
1362 .Case("aes32dsi", Intrinsic::riscv_aes32dsi)
1363 .Case("aes32dsmi", Intrinsic::riscv_aes32dsmi)
1364 .Case("aes32esi", Intrinsic::riscv_aes32esi)
1365 .Case("aes32esmi", Intrinsic::riscv_aes32esmi)
1366 .Default(Intrinsic::not_intrinsic);
1367 if (ID != Intrinsic::not_intrinsic) {
1368 if (!F->getFunctionType()->getParamType(2)->isIntegerTy(32)) {
1369 rename(F);
1370 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1371 return true;
1373 break; // No other applicable upgrades.
1376 ID = StringSwitch<Intrinsic::ID>(Name)
1377 .StartsWith("sm4ks", Intrinsic::riscv_sm4ks)
1378 .StartsWith("sm4ed", Intrinsic::riscv_sm4ed)
1379 .Default(Intrinsic::not_intrinsic);
1380 if (ID != Intrinsic::not_intrinsic) {
1381 if (!F->getFunctionType()->getParamType(2)->isIntegerTy(32) ||
1382 F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1383 rename(F);
1384 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1385 return true;
1387 break; // No other applicable upgrades.
1390 ID = StringSwitch<Intrinsic::ID>(Name)
1391 .StartsWith("sha256sig0", Intrinsic::riscv_sha256sig0)
1392 .StartsWith("sha256sig1", Intrinsic::riscv_sha256sig1)
1393 .StartsWith("sha256sum0", Intrinsic::riscv_sha256sum0)
1394 .StartsWith("sha256sum1", Intrinsic::riscv_sha256sum1)
1395 .StartsWith("sm3p0", Intrinsic::riscv_sm3p0)
1396 .StartsWith("sm3p1", Intrinsic::riscv_sm3p1)
1397 .Default(Intrinsic::not_intrinsic);
1398 if (ID != Intrinsic::not_intrinsic) {
1399 if (F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1400 rename(F);
1401 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1402 return true;
1404 break; // No other applicable upgrades.
1406 break; // No other 'riscv.*' intrinsics
1408 } break;
1410 case 's':
1411 if (Name == "stackprotectorcheck") {
1412 NewFn = nullptr;
1413 return true;
1415 break;
1417 case 'v': {
1418 if (Name == "var.annotation" && F->arg_size() == 4) {
1419 rename(F);
1420 NewFn = Intrinsic::getOrInsertDeclaration(
1421 F->getParent(), Intrinsic::var_annotation,
1422 {{F->arg_begin()->getType(), F->getArg(1)->getType()}});
1423 return true;
1425 break;
1428 case 'w':
1429 if (Name.consume_front("wasm.")) {
1430 Intrinsic::ID ID =
1431 StringSwitch<Intrinsic::ID>(Name)
1432 .StartsWith("fma.", Intrinsic::wasm_relaxed_madd)
1433 .StartsWith("fms.", Intrinsic::wasm_relaxed_nmadd)
1434 .StartsWith("laneselect.", Intrinsic::wasm_relaxed_laneselect)
1435 .Default(Intrinsic::not_intrinsic);
1436 if (ID != Intrinsic::not_intrinsic) {
1437 rename(F);
1438 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
1439 F->getReturnType());
1440 return true;
1443 if (Name.consume_front("dot.i8x16.i7x16.")) {
1444 ID = StringSwitch<Intrinsic::ID>(Name)
1445 .Case("signed", Intrinsic::wasm_relaxed_dot_i8x16_i7x16_signed)
1446 .Case("add.signed",
1447 Intrinsic::wasm_relaxed_dot_i8x16_i7x16_add_signed)
1448 .Default(Intrinsic::not_intrinsic);
1449 if (ID != Intrinsic::not_intrinsic) {
1450 rename(F);
1451 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1452 return true;
1454 break; // No other 'wasm.dot.i8x16.i7x16.*'.
1456 break; // No other 'wasm.*'.
1458 break;
1460 case 'x':
1461 if (upgradeX86IntrinsicFunction(F, Name, NewFn))
1462 return true;
1465 auto *ST = dyn_cast<StructType>(F->getReturnType());
1466 if (ST && (!ST->isLiteral() || ST->isPacked()) &&
1467 F->getIntrinsicID() != Intrinsic::not_intrinsic) {
1468 // Replace return type with literal non-packed struct. Only do this for
1469 // intrinsics declared to return a struct, not for intrinsics with
1470 // overloaded return type, in which case the exact struct type will be
1471 // mangled into the name.
1472 SmallVector<Intrinsic::IITDescriptor> Desc;
1473 Intrinsic::getIntrinsicInfoTableEntries(F->getIntrinsicID(), Desc);
1474 if (Desc.front().Kind == Intrinsic::IITDescriptor::Struct) {
1475 auto *FT = F->getFunctionType();
1476 auto *NewST = StructType::get(ST->getContext(), ST->elements());
1477 auto *NewFT = FunctionType::get(NewST, FT->params(), FT->isVarArg());
1478 std::string Name = F->getName().str();
1479 rename(F);
1480 NewFn = Function::Create(NewFT, F->getLinkage(), F->getAddressSpace(),
1481 Name, F->getParent());
1483 // The new function may also need remangling.
1484 if (auto Result = llvm::Intrinsic::remangleIntrinsicFunction(NewFn))
1485 NewFn = *Result;
1486 return true;
1490 // Remangle our intrinsic since we upgrade the mangling
1491 auto Result = llvm::Intrinsic::remangleIntrinsicFunction(F);
1492 if (Result != std::nullopt) {
1493 NewFn = *Result;
1494 return true;
1497 // This may not belong here. This function is effectively being overloaded
1498 // to both detect an intrinsic which needs upgrading, and to provide the
1499 // upgraded form of the intrinsic. We should perhaps have two separate
1500 // functions for this.
1501 return false;
1504 bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn,
1505 bool CanUpgradeDebugIntrinsicsToRecords) {
1506 NewFn = nullptr;
1507 bool Upgraded =
1508 upgradeIntrinsicFunction1(F, NewFn, CanUpgradeDebugIntrinsicsToRecords);
1509 assert(F != NewFn && "Intrinsic function upgraded to the same function");
1511 // Upgrade intrinsic attributes. This does not change the function.
1512 if (NewFn)
1513 F = NewFn;
1514 if (Intrinsic::ID id = F->getIntrinsicID())
1515 F->setAttributes(Intrinsic::getAttributes(F->getContext(), id));
1516 return Upgraded;
1519 GlobalVariable *llvm::UpgradeGlobalVariable(GlobalVariable *GV) {
1520 if (!(GV->hasName() && (GV->getName() == "llvm.global_ctors" ||
1521 GV->getName() == "llvm.global_dtors")) ||
1522 !GV->hasInitializer())
1523 return nullptr;
1524 ArrayType *ATy = dyn_cast<ArrayType>(GV->getValueType());
1525 if (!ATy)
1526 return nullptr;
1527 StructType *STy = dyn_cast<StructType>(ATy->getElementType());
1528 if (!STy || STy->getNumElements() != 2)
1529 return nullptr;
1531 LLVMContext &C = GV->getContext();
1532 IRBuilder<> IRB(C);
1533 auto EltTy = StructType::get(STy->getElementType(0), STy->getElementType(1),
1534 IRB.getPtrTy());
1535 Constant *Init = GV->getInitializer();
1536 unsigned N = Init->getNumOperands();
1537 std::vector<Constant *> NewCtors(N);
1538 for (unsigned i = 0; i != N; ++i) {
1539 auto Ctor = cast<Constant>(Init->getOperand(i));
1540 NewCtors[i] = ConstantStruct::get(EltTy, Ctor->getAggregateElement(0u),
1541 Ctor->getAggregateElement(1),
1542 Constant::getNullValue(IRB.getPtrTy()));
1544 Constant *NewInit = ConstantArray::get(ArrayType::get(EltTy, N), NewCtors);
1546 return new GlobalVariable(NewInit->getType(), false, GV->getLinkage(),
1547 NewInit, GV->getName());
1550 // Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them
1551 // to byte shuffles.
1552 static Value *upgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder, Value *Op,
1553 unsigned Shift) {
1554 auto *ResultTy = cast<FixedVectorType>(Op->getType());
1555 unsigned NumElts = ResultTy->getNumElements() * 8;
1557 // Bitcast from a 64-bit element type to a byte element type.
1558 Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
1559 Op = Builder.CreateBitCast(Op, VecTy, "cast");
1561 // We'll be shuffling in zeroes.
1562 Value *Res = Constant::getNullValue(VecTy);
1564 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1565 // we'll just return the zero vector.
1566 if (Shift < 16) {
1567 int Idxs[64];
1568 // 256/512-bit version is split into 2/4 16-byte lanes.
1569 for (unsigned l = 0; l != NumElts; l += 16)
1570 for (unsigned i = 0; i != 16; ++i) {
1571 unsigned Idx = NumElts + i - Shift;
1572 if (Idx < NumElts)
1573 Idx -= NumElts - 16; // end of lane, switch operand.
1574 Idxs[l + i] = Idx + l;
1577 Res = Builder.CreateShuffleVector(Res, Op, ArrayRef(Idxs, NumElts));
1580 // Bitcast back to a 64-bit element type.
1581 return Builder.CreateBitCast(Res, ResultTy, "cast");
1584 // Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them
1585 // to byte shuffles.
1586 static Value *upgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op,
1587 unsigned Shift) {
1588 auto *ResultTy = cast<FixedVectorType>(Op->getType());
1589 unsigned NumElts = ResultTy->getNumElements() * 8;
1591 // Bitcast from a 64-bit element type to a byte element type.
1592 Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
1593 Op = Builder.CreateBitCast(Op, VecTy, "cast");
1595 // We'll be shuffling in zeroes.
1596 Value *Res = Constant::getNullValue(VecTy);
1598 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1599 // we'll just return the zero vector.
1600 if (Shift < 16) {
1601 int Idxs[64];
1602 // 256/512-bit version is split into 2/4 16-byte lanes.
1603 for (unsigned l = 0; l != NumElts; l += 16)
1604 for (unsigned i = 0; i != 16; ++i) {
1605 unsigned Idx = i + Shift;
1606 if (Idx >= 16)
1607 Idx += NumElts - 16; // end of lane, switch operand.
1608 Idxs[l + i] = Idx + l;
1611 Res = Builder.CreateShuffleVector(Op, Res, ArrayRef(Idxs, NumElts));
1614 // Bitcast back to a 64-bit element type.
1615 return Builder.CreateBitCast(Res, ResultTy, "cast");
1618 static Value *getX86MaskVec(IRBuilder<> &Builder, Value *Mask,
1619 unsigned NumElts) {
1620 assert(isPowerOf2_32(NumElts) && "Expected power-of-2 mask elements");
1621 llvm::VectorType *MaskTy = FixedVectorType::get(
1622 Builder.getInt1Ty(), cast<IntegerType>(Mask->getType())->getBitWidth());
1623 Mask = Builder.CreateBitCast(Mask, MaskTy);
1625 // If we have less than 8 elements (1, 2 or 4), then the starting mask was an
1626 // i8 and we need to extract down to the right number of elements.
1627 if (NumElts <= 4) {
1628 int Indices[4];
1629 for (unsigned i = 0; i != NumElts; ++i)
1630 Indices[i] = i;
1631 Mask = Builder.CreateShuffleVector(Mask, Mask, ArrayRef(Indices, NumElts),
1632 "extract");
1635 return Mask;
1638 static Value *emitX86Select(IRBuilder<> &Builder, Value *Mask, Value *Op0,
1639 Value *Op1) {
1640 // If the mask is all ones just emit the first operation.
1641 if (const auto *C = dyn_cast<Constant>(Mask))
1642 if (C->isAllOnesValue())
1643 return Op0;
1645 Mask = getX86MaskVec(Builder, Mask,
1646 cast<FixedVectorType>(Op0->getType())->getNumElements());
1647 return Builder.CreateSelect(Mask, Op0, Op1);
1650 static Value *emitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask, Value *Op0,
1651 Value *Op1) {
1652 // If the mask is all ones just emit the first operation.
1653 if (const auto *C = dyn_cast<Constant>(Mask))
1654 if (C->isAllOnesValue())
1655 return Op0;
1657 auto *MaskTy = FixedVectorType::get(Builder.getInt1Ty(),
1658 Mask->getType()->getIntegerBitWidth());
1659 Mask = Builder.CreateBitCast(Mask, MaskTy);
1660 Mask = Builder.CreateExtractElement(Mask, (uint64_t)0);
1661 return Builder.CreateSelect(Mask, Op0, Op1);
1664 // Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics.
1665 // PALIGNR handles large immediates by shifting while VALIGN masks the immediate
1666 // so we need to handle both cases. VALIGN also doesn't have 128-bit lanes.
1667 static Value *upgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0,
1668 Value *Op1, Value *Shift,
1669 Value *Passthru, Value *Mask,
1670 bool IsVALIGN) {
1671 unsigned ShiftVal = cast<llvm::ConstantInt>(Shift)->getZExtValue();
1673 unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
1674 assert((IsVALIGN || NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!");
1675 assert((!IsVALIGN || NumElts <= 16) && "NumElts too large for VALIGN!");
1676 assert(isPowerOf2_32(NumElts) && "NumElts not a power of 2!");
1678 // Mask the immediate for VALIGN.
1679 if (IsVALIGN)
1680 ShiftVal &= (NumElts - 1);
1682 // If palignr is shifting the pair of vectors more than the size of two
1683 // lanes, emit zero.
1684 if (ShiftVal >= 32)
1685 return llvm::Constant::getNullValue(Op0->getType());
1687 // If palignr is shifting the pair of input vectors more than one lane,
1688 // but less than two lanes, convert to shifting in zeroes.
1689 if (ShiftVal > 16) {
1690 ShiftVal -= 16;
1691 Op1 = Op0;
1692 Op0 = llvm::Constant::getNullValue(Op0->getType());
1695 int Indices[64];
1696 // 256-bit palignr operates on 128-bit lanes so we need to handle that
1697 for (unsigned l = 0; l < NumElts; l += 16) {
1698 for (unsigned i = 0; i != 16; ++i) {
1699 unsigned Idx = ShiftVal + i;
1700 if (!IsVALIGN && Idx >= 16) // Disable wrap for VALIGN.
1701 Idx += NumElts - 16; // End of lane, switch operand.
1702 Indices[l + i] = Idx + l;
1706 Value *Align = Builder.CreateShuffleVector(
1707 Op1, Op0, ArrayRef(Indices, NumElts), "palignr");
1709 return emitX86Select(Builder, Mask, Align, Passthru);
1712 static Value *upgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallBase &CI,
1713 bool ZeroMask, bool IndexForm) {
1714 Type *Ty = CI.getType();
1715 unsigned VecWidth = Ty->getPrimitiveSizeInBits();
1716 unsigned EltWidth = Ty->getScalarSizeInBits();
1717 bool IsFloat = Ty->isFPOrFPVectorTy();
1718 Intrinsic::ID IID;
1719 if (VecWidth == 128 && EltWidth == 32 && IsFloat)
1720 IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
1721 else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
1722 IID = Intrinsic::x86_avx512_vpermi2var_d_128;
1723 else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
1724 IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
1725 else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
1726 IID = Intrinsic::x86_avx512_vpermi2var_q_128;
1727 else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
1728 IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
1729 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
1730 IID = Intrinsic::x86_avx512_vpermi2var_d_256;
1731 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
1732 IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
1733 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
1734 IID = Intrinsic::x86_avx512_vpermi2var_q_256;
1735 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
1736 IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
1737 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
1738 IID = Intrinsic::x86_avx512_vpermi2var_d_512;
1739 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
1740 IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
1741 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
1742 IID = Intrinsic::x86_avx512_vpermi2var_q_512;
1743 else if (VecWidth == 128 && EltWidth == 16)
1744 IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
1745 else if (VecWidth == 256 && EltWidth == 16)
1746 IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
1747 else if (VecWidth == 512 && EltWidth == 16)
1748 IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
1749 else if (VecWidth == 128 && EltWidth == 8)
1750 IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
1751 else if (VecWidth == 256 && EltWidth == 8)
1752 IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
1753 else if (VecWidth == 512 && EltWidth == 8)
1754 IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
1755 else
1756 llvm_unreachable("Unexpected intrinsic");
1758 Value *Args[] = { CI.getArgOperand(0) , CI.getArgOperand(1),
1759 CI.getArgOperand(2) };
1761 // If this isn't index form we need to swap operand 0 and 1.
1762 if (!IndexForm)
1763 std::swap(Args[0], Args[1]);
1765 Value *V = Builder.CreateIntrinsic(IID, {}, Args);
1766 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty)
1767 : Builder.CreateBitCast(CI.getArgOperand(1),
1768 Ty);
1769 return emitX86Select(Builder, CI.getArgOperand(3), V, PassThru);
1772 static Value *upgradeX86BinaryIntrinsics(IRBuilder<> &Builder, CallBase &CI,
1773 Intrinsic::ID IID) {
1774 Type *Ty = CI.getType();
1775 Value *Op0 = CI.getOperand(0);
1776 Value *Op1 = CI.getOperand(1);
1777 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Op0, Op1});
1779 if (CI.arg_size() == 4) { // For masked intrinsics.
1780 Value *VecSrc = CI.getOperand(2);
1781 Value *Mask = CI.getOperand(3);
1782 Res = emitX86Select(Builder, Mask, Res, VecSrc);
1784 return Res;
1787 static Value *upgradeX86Rotate(IRBuilder<> &Builder, CallBase &CI,
1788 bool IsRotateRight) {
1789 Type *Ty = CI.getType();
1790 Value *Src = CI.getArgOperand(0);
1791 Value *Amt = CI.getArgOperand(1);
1793 // Amount may be scalar immediate, in which case create a splat vector.
1794 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
1795 // we only care about the lowest log2 bits anyway.
1796 if (Amt->getType() != Ty) {
1797 unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
1798 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
1799 Amt = Builder.CreateVectorSplat(NumElts, Amt);
1802 Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
1803 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Src, Src, Amt});
1805 if (CI.arg_size() == 4) { // For masked intrinsics.
1806 Value *VecSrc = CI.getOperand(2);
1807 Value *Mask = CI.getOperand(3);
1808 Res = emitX86Select(Builder, Mask, Res, VecSrc);
1810 return Res;
1813 static Value *upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm,
1814 bool IsSigned) {
1815 Type *Ty = CI.getType();
1816 Value *LHS = CI.getArgOperand(0);
1817 Value *RHS = CI.getArgOperand(1);
1819 CmpInst::Predicate Pred;
1820 switch (Imm) {
1821 case 0x0:
1822 Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
1823 break;
1824 case 0x1:
1825 Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
1826 break;
1827 case 0x2:
1828 Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
1829 break;
1830 case 0x3:
1831 Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
1832 break;
1833 case 0x4:
1834 Pred = ICmpInst::ICMP_EQ;
1835 break;
1836 case 0x5:
1837 Pred = ICmpInst::ICMP_NE;
1838 break;
1839 case 0x6:
1840 return Constant::getNullValue(Ty); // FALSE
1841 case 0x7:
1842 return Constant::getAllOnesValue(Ty); // TRUE
1843 default:
1844 llvm_unreachable("Unknown XOP vpcom/vpcomu predicate");
1847 Value *Cmp = Builder.CreateICmp(Pred, LHS, RHS);
1848 Value *Ext = Builder.CreateSExt(Cmp, Ty);
1849 return Ext;
1852 static Value *upgradeX86ConcatShift(IRBuilder<> &Builder, CallBase &CI,
1853 bool IsShiftRight, bool ZeroMask) {
1854 Type *Ty = CI.getType();
1855 Value *Op0 = CI.getArgOperand(0);
1856 Value *Op1 = CI.getArgOperand(1);
1857 Value *Amt = CI.getArgOperand(2);
1859 if (IsShiftRight)
1860 std::swap(Op0, Op1);
1862 // Amount may be scalar immediate, in which case create a splat vector.
1863 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
1864 // we only care about the lowest log2 bits anyway.
1865 if (Amt->getType() != Ty) {
1866 unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
1867 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
1868 Amt = Builder.CreateVectorSplat(NumElts, Amt);
1871 Intrinsic::ID IID = IsShiftRight ? Intrinsic::fshr : Intrinsic::fshl;
1872 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Op0, Op1, Amt});
1874 unsigned NumArgs = CI.arg_size();
1875 if (NumArgs >= 4) { // For masked intrinsics.
1876 Value *VecSrc = NumArgs == 5 ? CI.getArgOperand(3) :
1877 ZeroMask ? ConstantAggregateZero::get(CI.getType()) :
1878 CI.getArgOperand(0);
1879 Value *Mask = CI.getOperand(NumArgs - 1);
1880 Res = emitX86Select(Builder, Mask, Res, VecSrc);
1882 return Res;
1885 static Value *upgradeMaskedStore(IRBuilder<> &Builder, Value *Ptr, Value *Data,
1886 Value *Mask, bool Aligned) {
1887 const Align Alignment =
1888 Aligned
1889 ? Align(Data->getType()->getPrimitiveSizeInBits().getFixedValue() / 8)
1890 : Align(1);
1892 // If the mask is all ones just emit a regular store.
1893 if (const auto *C = dyn_cast<Constant>(Mask))
1894 if (C->isAllOnesValue())
1895 return Builder.CreateAlignedStore(Data, Ptr, Alignment);
1897 // Convert the mask from an integer type to a vector of i1.
1898 unsigned NumElts = cast<FixedVectorType>(Data->getType())->getNumElements();
1899 Mask = getX86MaskVec(Builder, Mask, NumElts);
1900 return Builder.CreateMaskedStore(Data, Ptr, Alignment, Mask);
1903 static Value *upgradeMaskedLoad(IRBuilder<> &Builder, Value *Ptr,
1904 Value *Passthru, Value *Mask, bool Aligned) {
1905 Type *ValTy = Passthru->getType();
1906 const Align Alignment =
1907 Aligned
1908 ? Align(
1909 Passthru->getType()->getPrimitiveSizeInBits().getFixedValue() /
1911 : Align(1);
1913 // If the mask is all ones just emit a regular store.
1914 if (const auto *C = dyn_cast<Constant>(Mask))
1915 if (C->isAllOnesValue())
1916 return Builder.CreateAlignedLoad(ValTy, Ptr, Alignment);
1918 // Convert the mask from an integer type to a vector of i1.
1919 unsigned NumElts = cast<FixedVectorType>(ValTy)->getNumElements();
1920 Mask = getX86MaskVec(Builder, Mask, NumElts);
1921 return Builder.CreateMaskedLoad(ValTy, Ptr, Alignment, Mask, Passthru);
1924 static Value *upgradeAbs(IRBuilder<> &Builder, CallBase &CI) {
1925 Type *Ty = CI.getType();
1926 Value *Op0 = CI.getArgOperand(0);
1927 Value *Res = Builder.CreateIntrinsic(Intrinsic::abs, Ty,
1928 {Op0, Builder.getInt1(false)});
1929 if (CI.arg_size() == 3)
1930 Res = emitX86Select(Builder, CI.getArgOperand(2), Res, CI.getArgOperand(1));
1931 return Res;
1934 static Value *upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned) {
1935 Type *Ty = CI.getType();
1937 // Arguments have a vXi32 type so cast to vXi64.
1938 Value *LHS = Builder.CreateBitCast(CI.getArgOperand(0), Ty);
1939 Value *RHS = Builder.CreateBitCast(CI.getArgOperand(1), Ty);
1941 if (IsSigned) {
1942 // Shift left then arithmetic shift right.
1943 Constant *ShiftAmt = ConstantInt::get(Ty, 32);
1944 LHS = Builder.CreateShl(LHS, ShiftAmt);
1945 LHS = Builder.CreateAShr(LHS, ShiftAmt);
1946 RHS = Builder.CreateShl(RHS, ShiftAmt);
1947 RHS = Builder.CreateAShr(RHS, ShiftAmt);
1948 } else {
1949 // Clear the upper bits.
1950 Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
1951 LHS = Builder.CreateAnd(LHS, Mask);
1952 RHS = Builder.CreateAnd(RHS, Mask);
1955 Value *Res = Builder.CreateMul(LHS, RHS);
1957 if (CI.arg_size() == 4)
1958 Res = emitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
1960 return Res;
1963 // Applying mask on vector of i1's and make sure result is at least 8 bits wide.
1964 static Value *applyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec,
1965 Value *Mask) {
1966 unsigned NumElts = cast<FixedVectorType>(Vec->getType())->getNumElements();
1967 if (Mask) {
1968 const auto *C = dyn_cast<Constant>(Mask);
1969 if (!C || !C->isAllOnesValue())
1970 Vec = Builder.CreateAnd(Vec, getX86MaskVec(Builder, Mask, NumElts));
1973 if (NumElts < 8) {
1974 int Indices[8];
1975 for (unsigned i = 0; i != NumElts; ++i)
1976 Indices[i] = i;
1977 for (unsigned i = NumElts; i != 8; ++i)
1978 Indices[i] = NumElts + i % NumElts;
1979 Vec = Builder.CreateShuffleVector(Vec,
1980 Constant::getNullValue(Vec->getType()),
1981 Indices);
1983 return Builder.CreateBitCast(Vec, Builder.getIntNTy(std::max(NumElts, 8U)));
1986 static Value *upgradeMaskedCompare(IRBuilder<> &Builder, CallBase &CI,
1987 unsigned CC, bool Signed) {
1988 Value *Op0 = CI.getArgOperand(0);
1989 unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
1991 Value *Cmp;
1992 if (CC == 3) {
1993 Cmp = Constant::getNullValue(
1994 FixedVectorType::get(Builder.getInt1Ty(), NumElts));
1995 } else if (CC == 7) {
1996 Cmp = Constant::getAllOnesValue(
1997 FixedVectorType::get(Builder.getInt1Ty(), NumElts));
1998 } else {
1999 ICmpInst::Predicate Pred;
2000 switch (CC) {
2001 default: llvm_unreachable("Unknown condition code");
2002 case 0: Pred = ICmpInst::ICMP_EQ; break;
2003 case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
2004 case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
2005 case 4: Pred = ICmpInst::ICMP_NE; break;
2006 case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
2007 case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
2009 Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1));
2012 Value *Mask = CI.getArgOperand(CI.arg_size() - 1);
2014 return applyX86MaskOn1BitsVec(Builder, Cmp, Mask);
2017 // Replace a masked intrinsic with an older unmasked intrinsic.
2018 static Value *upgradeX86MaskedShift(IRBuilder<> &Builder, CallBase &CI,
2019 Intrinsic::ID IID) {
2020 Value *Rep = Builder.CreateIntrinsic(
2021 IID, {}, {CI.getArgOperand(0), CI.getArgOperand(1)});
2022 return emitX86Select(Builder, CI.getArgOperand(3), Rep, CI.getArgOperand(2));
2025 static Value *upgradeMaskedMove(IRBuilder<> &Builder, CallBase &CI) {
2026 Value* A = CI.getArgOperand(0);
2027 Value* B = CI.getArgOperand(1);
2028 Value* Src = CI.getArgOperand(2);
2029 Value* Mask = CI.getArgOperand(3);
2031 Value* AndNode = Builder.CreateAnd(Mask, APInt(8, 1));
2032 Value* Cmp = Builder.CreateIsNotNull(AndNode);
2033 Value* Extract1 = Builder.CreateExtractElement(B, (uint64_t)0);
2034 Value* Extract2 = Builder.CreateExtractElement(Src, (uint64_t)0);
2035 Value* Select = Builder.CreateSelect(Cmp, Extract1, Extract2);
2036 return Builder.CreateInsertElement(A, Select, (uint64_t)0);
2039 static Value *upgradeMaskToInt(IRBuilder<> &Builder, CallBase &CI) {
2040 Value* Op = CI.getArgOperand(0);
2041 Type* ReturnOp = CI.getType();
2042 unsigned NumElts = cast<FixedVectorType>(CI.getType())->getNumElements();
2043 Value *Mask = getX86MaskVec(Builder, Op, NumElts);
2044 return Builder.CreateSExt(Mask, ReturnOp, "vpmovm2");
2047 // Replace intrinsic with unmasked version and a select.
2048 static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder,
2049 CallBase &CI, Value *&Rep) {
2050 Name = Name.substr(12); // Remove avx512.mask.
2052 unsigned VecWidth = CI.getType()->getPrimitiveSizeInBits();
2053 unsigned EltWidth = CI.getType()->getScalarSizeInBits();
2054 Intrinsic::ID IID;
2055 if (Name.starts_with("max.p")) {
2056 if (VecWidth == 128 && EltWidth == 32)
2057 IID = Intrinsic::x86_sse_max_ps;
2058 else if (VecWidth == 128 && EltWidth == 64)
2059 IID = Intrinsic::x86_sse2_max_pd;
2060 else if (VecWidth == 256 && EltWidth == 32)
2061 IID = Intrinsic::x86_avx_max_ps_256;
2062 else if (VecWidth == 256 && EltWidth == 64)
2063 IID = Intrinsic::x86_avx_max_pd_256;
2064 else
2065 llvm_unreachable("Unexpected intrinsic");
2066 } else if (Name.starts_with("min.p")) {
2067 if (VecWidth == 128 && EltWidth == 32)
2068 IID = Intrinsic::x86_sse_min_ps;
2069 else if (VecWidth == 128 && EltWidth == 64)
2070 IID = Intrinsic::x86_sse2_min_pd;
2071 else if (VecWidth == 256 && EltWidth == 32)
2072 IID = Intrinsic::x86_avx_min_ps_256;
2073 else if (VecWidth == 256 && EltWidth == 64)
2074 IID = Intrinsic::x86_avx_min_pd_256;
2075 else
2076 llvm_unreachable("Unexpected intrinsic");
2077 } else if (Name.starts_with("pshuf.b.")) {
2078 if (VecWidth == 128)
2079 IID = Intrinsic::x86_ssse3_pshuf_b_128;
2080 else if (VecWidth == 256)
2081 IID = Intrinsic::x86_avx2_pshuf_b;
2082 else if (VecWidth == 512)
2083 IID = Intrinsic::x86_avx512_pshuf_b_512;
2084 else
2085 llvm_unreachable("Unexpected intrinsic");
2086 } else if (Name.starts_with("pmul.hr.sw.")) {
2087 if (VecWidth == 128)
2088 IID = Intrinsic::x86_ssse3_pmul_hr_sw_128;
2089 else if (VecWidth == 256)
2090 IID = Intrinsic::x86_avx2_pmul_hr_sw;
2091 else if (VecWidth == 512)
2092 IID = Intrinsic::x86_avx512_pmul_hr_sw_512;
2093 else
2094 llvm_unreachable("Unexpected intrinsic");
2095 } else if (Name.starts_with("pmulh.w.")) {
2096 if (VecWidth == 128)
2097 IID = Intrinsic::x86_sse2_pmulh_w;
2098 else if (VecWidth == 256)
2099 IID = Intrinsic::x86_avx2_pmulh_w;
2100 else if (VecWidth == 512)
2101 IID = Intrinsic::x86_avx512_pmulh_w_512;
2102 else
2103 llvm_unreachable("Unexpected intrinsic");
2104 } else if (Name.starts_with("pmulhu.w.")) {
2105 if (VecWidth == 128)
2106 IID = Intrinsic::x86_sse2_pmulhu_w;
2107 else if (VecWidth == 256)
2108 IID = Intrinsic::x86_avx2_pmulhu_w;
2109 else if (VecWidth == 512)
2110 IID = Intrinsic::x86_avx512_pmulhu_w_512;
2111 else
2112 llvm_unreachable("Unexpected intrinsic");
2113 } else if (Name.starts_with("pmaddw.d.")) {
2114 if (VecWidth == 128)
2115 IID = Intrinsic::x86_sse2_pmadd_wd;
2116 else if (VecWidth == 256)
2117 IID = Intrinsic::x86_avx2_pmadd_wd;
2118 else if (VecWidth == 512)
2119 IID = Intrinsic::x86_avx512_pmaddw_d_512;
2120 else
2121 llvm_unreachable("Unexpected intrinsic");
2122 } else if (Name.starts_with("pmaddubs.w.")) {
2123 if (VecWidth == 128)
2124 IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128;
2125 else if (VecWidth == 256)
2126 IID = Intrinsic::x86_avx2_pmadd_ub_sw;
2127 else if (VecWidth == 512)
2128 IID = Intrinsic::x86_avx512_pmaddubs_w_512;
2129 else
2130 llvm_unreachable("Unexpected intrinsic");
2131 } else if (Name.starts_with("packsswb.")) {
2132 if (VecWidth == 128)
2133 IID = Intrinsic::x86_sse2_packsswb_128;
2134 else if (VecWidth == 256)
2135 IID = Intrinsic::x86_avx2_packsswb;
2136 else if (VecWidth == 512)
2137 IID = Intrinsic::x86_avx512_packsswb_512;
2138 else
2139 llvm_unreachable("Unexpected intrinsic");
2140 } else if (Name.starts_with("packssdw.")) {
2141 if (VecWidth == 128)
2142 IID = Intrinsic::x86_sse2_packssdw_128;
2143 else if (VecWidth == 256)
2144 IID = Intrinsic::x86_avx2_packssdw;
2145 else if (VecWidth == 512)
2146 IID = Intrinsic::x86_avx512_packssdw_512;
2147 else
2148 llvm_unreachable("Unexpected intrinsic");
2149 } else if (Name.starts_with("packuswb.")) {
2150 if (VecWidth == 128)
2151 IID = Intrinsic::x86_sse2_packuswb_128;
2152 else if (VecWidth == 256)
2153 IID = Intrinsic::x86_avx2_packuswb;
2154 else if (VecWidth == 512)
2155 IID = Intrinsic::x86_avx512_packuswb_512;
2156 else
2157 llvm_unreachable("Unexpected intrinsic");
2158 } else if (Name.starts_with("packusdw.")) {
2159 if (VecWidth == 128)
2160 IID = Intrinsic::x86_sse41_packusdw;
2161 else if (VecWidth == 256)
2162 IID = Intrinsic::x86_avx2_packusdw;
2163 else if (VecWidth == 512)
2164 IID = Intrinsic::x86_avx512_packusdw_512;
2165 else
2166 llvm_unreachable("Unexpected intrinsic");
2167 } else if (Name.starts_with("vpermilvar.")) {
2168 if (VecWidth == 128 && EltWidth == 32)
2169 IID = Intrinsic::x86_avx_vpermilvar_ps;
2170 else if (VecWidth == 128 && EltWidth == 64)
2171 IID = Intrinsic::x86_avx_vpermilvar_pd;
2172 else if (VecWidth == 256 && EltWidth == 32)
2173 IID = Intrinsic::x86_avx_vpermilvar_ps_256;
2174 else if (VecWidth == 256 && EltWidth == 64)
2175 IID = Intrinsic::x86_avx_vpermilvar_pd_256;
2176 else if (VecWidth == 512 && EltWidth == 32)
2177 IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
2178 else if (VecWidth == 512 && EltWidth == 64)
2179 IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
2180 else
2181 llvm_unreachable("Unexpected intrinsic");
2182 } else if (Name == "cvtpd2dq.256") {
2183 IID = Intrinsic::x86_avx_cvt_pd2dq_256;
2184 } else if (Name == "cvtpd2ps.256") {
2185 IID = Intrinsic::x86_avx_cvt_pd2_ps_256;
2186 } else if (Name == "cvttpd2dq.256") {
2187 IID = Intrinsic::x86_avx_cvtt_pd2dq_256;
2188 } else if (Name == "cvttps2dq.128") {
2189 IID = Intrinsic::x86_sse2_cvttps2dq;
2190 } else if (Name == "cvttps2dq.256") {
2191 IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
2192 } else if (Name.starts_with("permvar.")) {
2193 bool IsFloat = CI.getType()->isFPOrFPVectorTy();
2194 if (VecWidth == 256 && EltWidth == 32 && IsFloat)
2195 IID = Intrinsic::x86_avx2_permps;
2196 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
2197 IID = Intrinsic::x86_avx2_permd;
2198 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
2199 IID = Intrinsic::x86_avx512_permvar_df_256;
2200 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
2201 IID = Intrinsic::x86_avx512_permvar_di_256;
2202 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
2203 IID = Intrinsic::x86_avx512_permvar_sf_512;
2204 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
2205 IID = Intrinsic::x86_avx512_permvar_si_512;
2206 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
2207 IID = Intrinsic::x86_avx512_permvar_df_512;
2208 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
2209 IID = Intrinsic::x86_avx512_permvar_di_512;
2210 else if (VecWidth == 128 && EltWidth == 16)
2211 IID = Intrinsic::x86_avx512_permvar_hi_128;
2212 else if (VecWidth == 256 && EltWidth == 16)
2213 IID = Intrinsic::x86_avx512_permvar_hi_256;
2214 else if (VecWidth == 512 && EltWidth == 16)
2215 IID = Intrinsic::x86_avx512_permvar_hi_512;
2216 else if (VecWidth == 128 && EltWidth == 8)
2217 IID = Intrinsic::x86_avx512_permvar_qi_128;
2218 else if (VecWidth == 256 && EltWidth == 8)
2219 IID = Intrinsic::x86_avx512_permvar_qi_256;
2220 else if (VecWidth == 512 && EltWidth == 8)
2221 IID = Intrinsic::x86_avx512_permvar_qi_512;
2222 else
2223 llvm_unreachable("Unexpected intrinsic");
2224 } else if (Name.starts_with("dbpsadbw.")) {
2225 if (VecWidth == 128)
2226 IID = Intrinsic::x86_avx512_dbpsadbw_128;
2227 else if (VecWidth == 256)
2228 IID = Intrinsic::x86_avx512_dbpsadbw_256;
2229 else if (VecWidth == 512)
2230 IID = Intrinsic::x86_avx512_dbpsadbw_512;
2231 else
2232 llvm_unreachable("Unexpected intrinsic");
2233 } else if (Name.starts_with("pmultishift.qb.")) {
2234 if (VecWidth == 128)
2235 IID = Intrinsic::x86_avx512_pmultishift_qb_128;
2236 else if (VecWidth == 256)
2237 IID = Intrinsic::x86_avx512_pmultishift_qb_256;
2238 else if (VecWidth == 512)
2239 IID = Intrinsic::x86_avx512_pmultishift_qb_512;
2240 else
2241 llvm_unreachable("Unexpected intrinsic");
2242 } else if (Name.starts_with("conflict.")) {
2243 if (Name[9] == 'd' && VecWidth == 128)
2244 IID = Intrinsic::x86_avx512_conflict_d_128;
2245 else if (Name[9] == 'd' && VecWidth == 256)
2246 IID = Intrinsic::x86_avx512_conflict_d_256;
2247 else if (Name[9] == 'd' && VecWidth == 512)
2248 IID = Intrinsic::x86_avx512_conflict_d_512;
2249 else if (Name[9] == 'q' && VecWidth == 128)
2250 IID = Intrinsic::x86_avx512_conflict_q_128;
2251 else if (Name[9] == 'q' && VecWidth == 256)
2252 IID = Intrinsic::x86_avx512_conflict_q_256;
2253 else if (Name[9] == 'q' && VecWidth == 512)
2254 IID = Intrinsic::x86_avx512_conflict_q_512;
2255 else
2256 llvm_unreachable("Unexpected intrinsic");
2257 } else if (Name.starts_with("pavg.")) {
2258 if (Name[5] == 'b' && VecWidth == 128)
2259 IID = Intrinsic::x86_sse2_pavg_b;
2260 else if (Name[5] == 'b' && VecWidth == 256)
2261 IID = Intrinsic::x86_avx2_pavg_b;
2262 else if (Name[5] == 'b' && VecWidth == 512)
2263 IID = Intrinsic::x86_avx512_pavg_b_512;
2264 else if (Name[5] == 'w' && VecWidth == 128)
2265 IID = Intrinsic::x86_sse2_pavg_w;
2266 else if (Name[5] == 'w' && VecWidth == 256)
2267 IID = Intrinsic::x86_avx2_pavg_w;
2268 else if (Name[5] == 'w' && VecWidth == 512)
2269 IID = Intrinsic::x86_avx512_pavg_w_512;
2270 else
2271 llvm_unreachable("Unexpected intrinsic");
2272 } else
2273 return false;
2275 SmallVector<Value *, 4> Args(CI.args());
2276 Args.pop_back();
2277 Args.pop_back();
2278 Rep = Builder.CreateIntrinsic(IID, {}, Args);
2279 unsigned NumArgs = CI.arg_size();
2280 Rep = emitX86Select(Builder, CI.getArgOperand(NumArgs - 1), Rep,
2281 CI.getArgOperand(NumArgs - 2));
2282 return true;
2285 /// Upgrade comment in call to inline asm that represents an objc retain release
2286 /// marker.
2287 void llvm::UpgradeInlineAsmString(std::string *AsmStr) {
2288 size_t Pos;
2289 if (AsmStr->find("mov\tfp") == 0 &&
2290 AsmStr->find("objc_retainAutoreleaseReturnValue") != std::string::npos &&
2291 (Pos = AsmStr->find("# marker")) != std::string::npos) {
2292 AsmStr->replace(Pos, 1, ";");
2296 static Value *upgradeNVVMIntrinsicCall(StringRef Name, CallBase *CI,
2297 Function *F, IRBuilder<> &Builder) {
2298 Value *Rep = nullptr;
2300 if (Name == "abs.i" || Name == "abs.ll") {
2301 Value *Arg = CI->getArgOperand(0);
2302 Value *Neg = Builder.CreateNeg(Arg, "neg");
2303 Value *Cmp = Builder.CreateICmpSGE(
2304 Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond");
2305 Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs");
2306 } else if (Name.starts_with("atomic.load.add.f32.p") ||
2307 Name.starts_with("atomic.load.add.f64.p")) {
2308 Value *Ptr = CI->getArgOperand(0);
2309 Value *Val = CI->getArgOperand(1);
2310 Rep = Builder.CreateAtomicRMW(AtomicRMWInst::FAdd, Ptr, Val, MaybeAlign(),
2311 AtomicOrdering::SequentiallyConsistent);
2312 } else if (Name.consume_front("max.") &&
2313 (Name == "s" || Name == "i" || Name == "ll" || Name == "us" ||
2314 Name == "ui" || Name == "ull")) {
2315 Value *Arg0 = CI->getArgOperand(0);
2316 Value *Arg1 = CI->getArgOperand(1);
2317 Value *Cmp = Name.starts_with("u")
2318 ? Builder.CreateICmpUGE(Arg0, Arg1, "max.cond")
2319 : Builder.CreateICmpSGE(Arg0, Arg1, "max.cond");
2320 Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "max");
2321 } else if (Name.consume_front("min.") &&
2322 (Name == "s" || Name == "i" || Name == "ll" || Name == "us" ||
2323 Name == "ui" || Name == "ull")) {
2324 Value *Arg0 = CI->getArgOperand(0);
2325 Value *Arg1 = CI->getArgOperand(1);
2326 Value *Cmp = Name.starts_with("u")
2327 ? Builder.CreateICmpULE(Arg0, Arg1, "min.cond")
2328 : Builder.CreateICmpSLE(Arg0, Arg1, "min.cond");
2329 Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "min");
2330 } else if (Name == "clz.ll") {
2331 // llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 returns an i64.
2332 Value *Arg = CI->getArgOperand(0);
2333 Value *Ctlz = Builder.CreateIntrinsic(Intrinsic::ctlz, {Arg->getType()},
2334 {Arg, Builder.getFalse()},
2335 /*FMFSource=*/nullptr, "ctlz");
2336 Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(), "ctlz.trunc");
2337 } else if (Name == "popc.ll") {
2338 // llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 returns an
2339 // i64.
2340 Value *Arg = CI->getArgOperand(0);
2341 Value *Popc = Builder.CreateIntrinsic(Intrinsic::ctpop, {Arg->getType()},
2342 Arg, /*FMFSource=*/nullptr, "ctpop");
2343 Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(), "ctpop.trunc");
2344 } else if (Name == "h2f") {
2345 Rep = Builder.CreateIntrinsic(Intrinsic::convert_from_fp16,
2346 {Builder.getFloatTy()}, CI->getArgOperand(0),
2347 /*FMFSource=*/nullptr, "h2f");
2348 } else if (Name.consume_front("bitcast.") &&
2349 (Name == "f2i" || Name == "i2f" || Name == "ll2d" ||
2350 Name == "d2ll")) {
2351 Rep = Builder.CreateBitCast(CI->getArgOperand(0), CI->getType());
2352 } else if (Name == "rotate.b32") {
2353 Value *Arg = CI->getOperand(0);
2354 Value *ShiftAmt = CI->getOperand(1);
2355 Rep = Builder.CreateIntrinsic(Builder.getInt32Ty(), Intrinsic::fshl,
2356 {Arg, Arg, ShiftAmt});
2357 } else if (Name == "rotate.b64") {
2358 Type *Int64Ty = Builder.getInt64Ty();
2359 Value *Arg = CI->getOperand(0);
2360 Value *ZExtShiftAmt = Builder.CreateZExt(CI->getOperand(1), Int64Ty);
2361 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl,
2362 {Arg, Arg, ZExtShiftAmt});
2363 } else if (Name == "rotate.right.b64") {
2364 Type *Int64Ty = Builder.getInt64Ty();
2365 Value *Arg = CI->getOperand(0);
2366 Value *ZExtShiftAmt = Builder.CreateZExt(CI->getOperand(1), Int64Ty);
2367 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshr,
2368 {Arg, Arg, ZExtShiftAmt});
2369 } else if ((Name.consume_front("ptr.gen.to.") &&
2370 (Name.starts_with("local") || Name.starts_with("shared") ||
2371 Name.starts_with("global") || Name.starts_with("constant"))) ||
2372 (Name.consume_front("ptr.") &&
2373 (Name.consume_front("local") || Name.consume_front("shared") ||
2374 Name.consume_front("global") ||
2375 Name.consume_front("constant")) &&
2376 Name.starts_with(".to.gen"))) {
2377 Rep = Builder.CreateAddrSpaceCast(CI->getArgOperand(0), CI->getType());
2378 } else if (Name.consume_front("ldg.global")) {
2379 Value *Ptr = CI->getArgOperand(0);
2380 Align PtrAlign = cast<ConstantInt>(CI->getArgOperand(1))->getAlignValue();
2381 // Use addrspace(1) for NVPTX ADDRESS_SPACE_GLOBAL
2382 Value *ASC = Builder.CreateAddrSpaceCast(Ptr, Builder.getPtrTy(1));
2383 Instruction *LD = Builder.CreateAlignedLoad(CI->getType(), ASC, PtrAlign);
2384 MDNode *MD = MDNode::get(Builder.getContext(), {});
2385 LD->setMetadata(LLVMContext::MD_invariant_load, MD);
2386 return LD;
2387 } else {
2388 Intrinsic::ID IID = shouldUpgradeNVPTXBF16Intrinsic(Name);
2389 if (IID != Intrinsic::not_intrinsic &&
2390 !F->getReturnType()->getScalarType()->isBFloatTy()) {
2391 rename(F);
2392 Function *NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
2393 SmallVector<Value *, 2> Args;
2394 for (size_t I = 0; I < NewFn->arg_size(); ++I) {
2395 Value *Arg = CI->getArgOperand(I);
2396 Type *OldType = Arg->getType();
2397 Type *NewType = NewFn->getArg(I)->getType();
2398 Args.push_back(
2399 (OldType->isIntegerTy() && NewType->getScalarType()->isBFloatTy())
2400 ? Builder.CreateBitCast(Arg, NewType)
2401 : Arg);
2403 Rep = Builder.CreateCall(NewFn, Args);
2404 if (F->getReturnType()->isIntegerTy())
2405 Rep = Builder.CreateBitCast(Rep, F->getReturnType());
2409 return Rep;
2412 static Value *upgradeX86IntrinsicCall(StringRef Name, CallBase *CI, Function *F,
2413 IRBuilder<> &Builder) {
2414 LLVMContext &C = F->getContext();
2415 Value *Rep = nullptr;
2417 if (Name.starts_with("sse4a.movnt.")) {
2418 SmallVector<Metadata *, 1> Elts;
2419 Elts.push_back(
2420 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
2421 MDNode *Node = MDNode::get(C, Elts);
2423 Value *Arg0 = CI->getArgOperand(0);
2424 Value *Arg1 = CI->getArgOperand(1);
2426 // Nontemporal (unaligned) store of the 0'th element of the float/double
2427 // vector.
2428 Value *Extract =
2429 Builder.CreateExtractElement(Arg1, (uint64_t)0, "extractelement");
2431 StoreInst *SI = Builder.CreateAlignedStore(Extract, Arg0, Align(1));
2432 SI->setMetadata(LLVMContext::MD_nontemporal, Node);
2433 } else if (Name.starts_with("avx.movnt.") ||
2434 Name.starts_with("avx512.storent.")) {
2435 SmallVector<Metadata *, 1> Elts;
2436 Elts.push_back(
2437 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
2438 MDNode *Node = MDNode::get(C, Elts);
2440 Value *Arg0 = CI->getArgOperand(0);
2441 Value *Arg1 = CI->getArgOperand(1);
2443 StoreInst *SI = Builder.CreateAlignedStore(
2444 Arg1, Arg0,
2445 Align(Arg1->getType()->getPrimitiveSizeInBits().getFixedValue() / 8));
2446 SI->setMetadata(LLVMContext::MD_nontemporal, Node);
2447 } else if (Name == "sse2.storel.dq") {
2448 Value *Arg0 = CI->getArgOperand(0);
2449 Value *Arg1 = CI->getArgOperand(1);
2451 auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
2452 Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
2453 Value *Elt = Builder.CreateExtractElement(BC0, (uint64_t)0);
2454 Builder.CreateAlignedStore(Elt, Arg0, Align(1));
2455 } else if (Name.starts_with("sse.storeu.") ||
2456 Name.starts_with("sse2.storeu.") ||
2457 Name.starts_with("avx.storeu.")) {
2458 Value *Arg0 = CI->getArgOperand(0);
2459 Value *Arg1 = CI->getArgOperand(1);
2460 Builder.CreateAlignedStore(Arg1, Arg0, Align(1));
2461 } else if (Name == "avx512.mask.store.ss") {
2462 Value *Mask = Builder.CreateAnd(CI->getArgOperand(2), Builder.getInt8(1));
2463 upgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2464 Mask, false);
2465 } else if (Name.starts_with("avx512.mask.store")) {
2466 // "avx512.mask.storeu." or "avx512.mask.store."
2467 bool Aligned = Name[17] != 'u'; // "avx512.mask.storeu".
2468 upgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2469 CI->getArgOperand(2), Aligned);
2470 } else if (Name.starts_with("sse2.pcmp") || Name.starts_with("avx2.pcmp")) {
2471 // Upgrade packed integer vector compare intrinsics to compare instructions.
2472 // "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt."
2473 bool CmpEq = Name[9] == 'e';
2474 Rep = Builder.CreateICmp(CmpEq ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_SGT,
2475 CI->getArgOperand(0), CI->getArgOperand(1));
2476 Rep = Builder.CreateSExt(Rep, CI->getType(), "");
2477 } else if (Name.starts_with("avx512.broadcastm")) {
2478 Type *ExtTy = Type::getInt32Ty(C);
2479 if (CI->getOperand(0)->getType()->isIntegerTy(8))
2480 ExtTy = Type::getInt64Ty(C);
2481 unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() /
2482 ExtTy->getPrimitiveSizeInBits();
2483 Rep = Builder.CreateZExt(CI->getArgOperand(0), ExtTy);
2484 Rep = Builder.CreateVectorSplat(NumElts, Rep);
2485 } else if (Name == "sse.sqrt.ss" || Name == "sse2.sqrt.sd") {
2486 Value *Vec = CI->getArgOperand(0);
2487 Value *Elt0 = Builder.CreateExtractElement(Vec, (uint64_t)0);
2488 Elt0 = Builder.CreateIntrinsic(Intrinsic::sqrt, Elt0->getType(), Elt0);
2489 Rep = Builder.CreateInsertElement(Vec, Elt0, (uint64_t)0);
2490 } else if (Name.starts_with("avx.sqrt.p") ||
2491 Name.starts_with("sse2.sqrt.p") ||
2492 Name.starts_with("sse.sqrt.p")) {
2493 Rep = Builder.CreateIntrinsic(Intrinsic::sqrt, CI->getType(),
2494 {CI->getArgOperand(0)});
2495 } else if (Name.starts_with("avx512.mask.sqrt.p")) {
2496 if (CI->arg_size() == 4 &&
2497 (!isa<ConstantInt>(CI->getArgOperand(3)) ||
2498 cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
2499 Intrinsic::ID IID = Name[18] == 's' ? Intrinsic::x86_avx512_sqrt_ps_512
2500 : Intrinsic::x86_avx512_sqrt_pd_512;
2502 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(3)};
2503 Rep = Builder.CreateIntrinsic(IID, {}, Args);
2504 } else {
2505 Rep = Builder.CreateIntrinsic(Intrinsic::sqrt, CI->getType(),
2506 {CI->getArgOperand(0)});
2508 Rep =
2509 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
2510 } else if (Name.starts_with("avx512.ptestm") ||
2511 Name.starts_with("avx512.ptestnm")) {
2512 Value *Op0 = CI->getArgOperand(0);
2513 Value *Op1 = CI->getArgOperand(1);
2514 Value *Mask = CI->getArgOperand(2);
2515 Rep = Builder.CreateAnd(Op0, Op1);
2516 llvm::Type *Ty = Op0->getType();
2517 Value *Zero = llvm::Constant::getNullValue(Ty);
2518 ICmpInst::Predicate Pred = Name.starts_with("avx512.ptestm")
2519 ? ICmpInst::ICMP_NE
2520 : ICmpInst::ICMP_EQ;
2521 Rep = Builder.CreateICmp(Pred, Rep, Zero);
2522 Rep = applyX86MaskOn1BitsVec(Builder, Rep, Mask);
2523 } else if (Name.starts_with("avx512.mask.pbroadcast")) {
2524 unsigned NumElts = cast<FixedVectorType>(CI->getArgOperand(1)->getType())
2525 ->getNumElements();
2526 Rep = Builder.CreateVectorSplat(NumElts, CI->getArgOperand(0));
2527 Rep =
2528 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
2529 } else if (Name.starts_with("avx512.kunpck")) {
2530 unsigned NumElts = CI->getType()->getScalarSizeInBits();
2531 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), NumElts);
2532 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), NumElts);
2533 int Indices[64];
2534 for (unsigned i = 0; i != NumElts; ++i)
2535 Indices[i] = i;
2537 // First extract half of each vector. This gives better codegen than
2538 // doing it in a single shuffle.
2539 LHS = Builder.CreateShuffleVector(LHS, LHS, ArrayRef(Indices, NumElts / 2));
2540 RHS = Builder.CreateShuffleVector(RHS, RHS, ArrayRef(Indices, NumElts / 2));
2541 // Concat the vectors.
2542 // NOTE: Operands have to be swapped to match intrinsic definition.
2543 Rep = Builder.CreateShuffleVector(RHS, LHS, ArrayRef(Indices, NumElts));
2544 Rep = Builder.CreateBitCast(Rep, CI->getType());
2545 } else if (Name == "avx512.kand.w") {
2546 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2547 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2548 Rep = Builder.CreateAnd(LHS, RHS);
2549 Rep = Builder.CreateBitCast(Rep, CI->getType());
2550 } else if (Name == "avx512.kandn.w") {
2551 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2552 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2553 LHS = Builder.CreateNot(LHS);
2554 Rep = Builder.CreateAnd(LHS, RHS);
2555 Rep = Builder.CreateBitCast(Rep, CI->getType());
2556 } else if (Name == "avx512.kor.w") {
2557 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2558 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2559 Rep = Builder.CreateOr(LHS, RHS);
2560 Rep = Builder.CreateBitCast(Rep, CI->getType());
2561 } else if (Name == "avx512.kxor.w") {
2562 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2563 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2564 Rep = Builder.CreateXor(LHS, RHS);
2565 Rep = Builder.CreateBitCast(Rep, CI->getType());
2566 } else if (Name == "avx512.kxnor.w") {
2567 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2568 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2569 LHS = Builder.CreateNot(LHS);
2570 Rep = Builder.CreateXor(LHS, RHS);
2571 Rep = Builder.CreateBitCast(Rep, CI->getType());
2572 } else if (Name == "avx512.knot.w") {
2573 Rep = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2574 Rep = Builder.CreateNot(Rep);
2575 Rep = Builder.CreateBitCast(Rep, CI->getType());
2576 } else if (Name == "avx512.kortestz.w" || Name == "avx512.kortestc.w") {
2577 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2578 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2579 Rep = Builder.CreateOr(LHS, RHS);
2580 Rep = Builder.CreateBitCast(Rep, Builder.getInt16Ty());
2581 Value *C;
2582 if (Name[14] == 'c')
2583 C = ConstantInt::getAllOnesValue(Builder.getInt16Ty());
2584 else
2585 C = ConstantInt::getNullValue(Builder.getInt16Ty());
2586 Rep = Builder.CreateICmpEQ(Rep, C);
2587 Rep = Builder.CreateZExt(Rep, Builder.getInt32Ty());
2588 } else if (Name == "sse.add.ss" || Name == "sse2.add.sd" ||
2589 Name == "sse.sub.ss" || Name == "sse2.sub.sd" ||
2590 Name == "sse.mul.ss" || Name == "sse2.mul.sd" ||
2591 Name == "sse.div.ss" || Name == "sse2.div.sd") {
2592 Type *I32Ty = Type::getInt32Ty(C);
2593 Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
2594 ConstantInt::get(I32Ty, 0));
2595 Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
2596 ConstantInt::get(I32Ty, 0));
2597 Value *EltOp;
2598 if (Name.contains(".add."))
2599 EltOp = Builder.CreateFAdd(Elt0, Elt1);
2600 else if (Name.contains(".sub."))
2601 EltOp = Builder.CreateFSub(Elt0, Elt1);
2602 else if (Name.contains(".mul."))
2603 EltOp = Builder.CreateFMul(Elt0, Elt1);
2604 else
2605 EltOp = Builder.CreateFDiv(Elt0, Elt1);
2606 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), EltOp,
2607 ConstantInt::get(I32Ty, 0));
2608 } else if (Name.starts_with("avx512.mask.pcmp")) {
2609 // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt."
2610 bool CmpEq = Name[16] == 'e';
2611 Rep = upgradeMaskedCompare(Builder, *CI, CmpEq ? 0 : 6, true);
2612 } else if (Name.starts_with("avx512.mask.vpshufbitqmb.")) {
2613 Type *OpTy = CI->getArgOperand(0)->getType();
2614 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2615 Intrinsic::ID IID;
2616 switch (VecWidth) {
2617 default:
2618 llvm_unreachable("Unexpected intrinsic");
2619 case 128:
2620 IID = Intrinsic::x86_avx512_vpshufbitqmb_128;
2621 break;
2622 case 256:
2623 IID = Intrinsic::x86_avx512_vpshufbitqmb_256;
2624 break;
2625 case 512:
2626 IID = Intrinsic::x86_avx512_vpshufbitqmb_512;
2627 break;
2630 Rep = Builder.CreateIntrinsic(IID, {},
2631 {CI->getOperand(0), CI->getArgOperand(1)});
2632 Rep = applyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
2633 } else if (Name.starts_with("avx512.mask.fpclass.p")) {
2634 Type *OpTy = CI->getArgOperand(0)->getType();
2635 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2636 unsigned EltWidth = OpTy->getScalarSizeInBits();
2637 Intrinsic::ID IID;
2638 if (VecWidth == 128 && EltWidth == 32)
2639 IID = Intrinsic::x86_avx512_fpclass_ps_128;
2640 else if (VecWidth == 256 && EltWidth == 32)
2641 IID = Intrinsic::x86_avx512_fpclass_ps_256;
2642 else if (VecWidth == 512 && EltWidth == 32)
2643 IID = Intrinsic::x86_avx512_fpclass_ps_512;
2644 else if (VecWidth == 128 && EltWidth == 64)
2645 IID = Intrinsic::x86_avx512_fpclass_pd_128;
2646 else if (VecWidth == 256 && EltWidth == 64)
2647 IID = Intrinsic::x86_avx512_fpclass_pd_256;
2648 else if (VecWidth == 512 && EltWidth == 64)
2649 IID = Intrinsic::x86_avx512_fpclass_pd_512;
2650 else
2651 llvm_unreachable("Unexpected intrinsic");
2653 Rep = Builder.CreateIntrinsic(IID, {},
2654 {CI->getOperand(0), CI->getArgOperand(1)});
2655 Rep = applyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
2656 } else if (Name.starts_with("avx512.cmp.p")) {
2657 SmallVector<Value *, 4> Args(CI->args());
2658 Type *OpTy = Args[0]->getType();
2659 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2660 unsigned EltWidth = OpTy->getScalarSizeInBits();
2661 Intrinsic::ID IID;
2662 if (VecWidth == 128 && EltWidth == 32)
2663 IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
2664 else if (VecWidth == 256 && EltWidth == 32)
2665 IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
2666 else if (VecWidth == 512 && EltWidth == 32)
2667 IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
2668 else if (VecWidth == 128 && EltWidth == 64)
2669 IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
2670 else if (VecWidth == 256 && EltWidth == 64)
2671 IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
2672 else if (VecWidth == 512 && EltWidth == 64)
2673 IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
2674 else
2675 llvm_unreachable("Unexpected intrinsic");
2677 Value *Mask = Constant::getAllOnesValue(CI->getType());
2678 if (VecWidth == 512)
2679 std::swap(Mask, Args.back());
2680 Args.push_back(Mask);
2682 Rep = Builder.CreateIntrinsic(IID, {}, Args);
2683 } else if (Name.starts_with("avx512.mask.cmp.")) {
2684 // Integer compare intrinsics.
2685 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2686 Rep = upgradeMaskedCompare(Builder, *CI, Imm, true);
2687 } else if (Name.starts_with("avx512.mask.ucmp.")) {
2688 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2689 Rep = upgradeMaskedCompare(Builder, *CI, Imm, false);
2690 } else if (Name.starts_with("avx512.cvtb2mask.") ||
2691 Name.starts_with("avx512.cvtw2mask.") ||
2692 Name.starts_with("avx512.cvtd2mask.") ||
2693 Name.starts_with("avx512.cvtq2mask.")) {
2694 Value *Op = CI->getArgOperand(0);
2695 Value *Zero = llvm::Constant::getNullValue(Op->getType());
2696 Rep = Builder.CreateICmp(ICmpInst::ICMP_SLT, Op, Zero);
2697 Rep = applyX86MaskOn1BitsVec(Builder, Rep, nullptr);
2698 } else if (Name == "ssse3.pabs.b.128" || Name == "ssse3.pabs.w.128" ||
2699 Name == "ssse3.pabs.d.128" || Name.starts_with("avx2.pabs") ||
2700 Name.starts_with("avx512.mask.pabs")) {
2701 Rep = upgradeAbs(Builder, *CI);
2702 } else if (Name == "sse41.pmaxsb" || Name == "sse2.pmaxs.w" ||
2703 Name == "sse41.pmaxsd" || Name.starts_with("avx2.pmaxs") ||
2704 Name.starts_with("avx512.mask.pmaxs")) {
2705 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smax);
2706 } else if (Name == "sse2.pmaxu.b" || Name == "sse41.pmaxuw" ||
2707 Name == "sse41.pmaxud" || Name.starts_with("avx2.pmaxu") ||
2708 Name.starts_with("avx512.mask.pmaxu")) {
2709 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umax);
2710 } else if (Name == "sse41.pminsb" || Name == "sse2.pmins.w" ||
2711 Name == "sse41.pminsd" || Name.starts_with("avx2.pmins") ||
2712 Name.starts_with("avx512.mask.pmins")) {
2713 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smin);
2714 } else if (Name == "sse2.pminu.b" || Name == "sse41.pminuw" ||
2715 Name == "sse41.pminud" || Name.starts_with("avx2.pminu") ||
2716 Name.starts_with("avx512.mask.pminu")) {
2717 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umin);
2718 } else if (Name == "sse2.pmulu.dq" || Name == "avx2.pmulu.dq" ||
2719 Name == "avx512.pmulu.dq.512" ||
2720 Name.starts_with("avx512.mask.pmulu.dq.")) {
2721 Rep = upgradePMULDQ(Builder, *CI, /*Signed*/ false);
2722 } else if (Name == "sse41.pmuldq" || Name == "avx2.pmul.dq" ||
2723 Name == "avx512.pmul.dq.512" ||
2724 Name.starts_with("avx512.mask.pmul.dq.")) {
2725 Rep = upgradePMULDQ(Builder, *CI, /*Signed*/ true);
2726 } else if (Name == "sse.cvtsi2ss" || Name == "sse2.cvtsi2sd" ||
2727 Name == "sse.cvtsi642ss" || Name == "sse2.cvtsi642sd") {
2728 Rep =
2729 Builder.CreateSIToFP(CI->getArgOperand(1),
2730 cast<VectorType>(CI->getType())->getElementType());
2731 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2732 } else if (Name == "avx512.cvtusi2sd") {
2733 Rep =
2734 Builder.CreateUIToFP(CI->getArgOperand(1),
2735 cast<VectorType>(CI->getType())->getElementType());
2736 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2737 } else if (Name == "sse2.cvtss2sd") {
2738 Rep = Builder.CreateExtractElement(CI->getArgOperand(1), (uint64_t)0);
2739 Rep = Builder.CreateFPExt(
2740 Rep, cast<VectorType>(CI->getType())->getElementType());
2741 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2742 } else if (Name == "sse2.cvtdq2pd" || Name == "sse2.cvtdq2ps" ||
2743 Name == "avx.cvtdq2.pd.256" || Name == "avx.cvtdq2.ps.256" ||
2744 Name.starts_with("avx512.mask.cvtdq2pd.") ||
2745 Name.starts_with("avx512.mask.cvtudq2pd.") ||
2746 Name.starts_with("avx512.mask.cvtdq2ps.") ||
2747 Name.starts_with("avx512.mask.cvtudq2ps.") ||
2748 Name.starts_with("avx512.mask.cvtqq2pd.") ||
2749 Name.starts_with("avx512.mask.cvtuqq2pd.") ||
2750 Name == "avx512.mask.cvtqq2ps.256" ||
2751 Name == "avx512.mask.cvtqq2ps.512" ||
2752 Name == "avx512.mask.cvtuqq2ps.256" ||
2753 Name == "avx512.mask.cvtuqq2ps.512" || Name == "sse2.cvtps2pd" ||
2754 Name == "avx.cvt.ps2.pd.256" ||
2755 Name == "avx512.mask.cvtps2pd.128" ||
2756 Name == "avx512.mask.cvtps2pd.256") {
2757 auto *DstTy = cast<FixedVectorType>(CI->getType());
2758 Rep = CI->getArgOperand(0);
2759 auto *SrcTy = cast<FixedVectorType>(Rep->getType());
2761 unsigned NumDstElts = DstTy->getNumElements();
2762 if (NumDstElts < SrcTy->getNumElements()) {
2763 assert(NumDstElts == 2 && "Unexpected vector size");
2764 Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1});
2767 bool IsPS2PD = SrcTy->getElementType()->isFloatTy();
2768 bool IsUnsigned = Name.contains("cvtu");
2769 if (IsPS2PD)
2770 Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
2771 else if (CI->arg_size() == 4 &&
2772 (!isa<ConstantInt>(CI->getArgOperand(3)) ||
2773 cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
2774 Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round
2775 : Intrinsic::x86_avx512_sitofp_round;
2776 Rep = Builder.CreateIntrinsic(IID, {DstTy, SrcTy},
2777 {Rep, CI->getArgOperand(3)});
2778 } else {
2779 Rep = IsUnsigned ? Builder.CreateUIToFP(Rep, DstTy, "cvt")
2780 : Builder.CreateSIToFP(Rep, DstTy, "cvt");
2783 if (CI->arg_size() >= 3)
2784 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
2785 CI->getArgOperand(1));
2786 } else if (Name.starts_with("avx512.mask.vcvtph2ps.") ||
2787 Name.starts_with("vcvtph2ps.")) {
2788 auto *DstTy = cast<FixedVectorType>(CI->getType());
2789 Rep = CI->getArgOperand(0);
2790 auto *SrcTy = cast<FixedVectorType>(Rep->getType());
2791 unsigned NumDstElts = DstTy->getNumElements();
2792 if (NumDstElts != SrcTy->getNumElements()) {
2793 assert(NumDstElts == 4 && "Unexpected vector size");
2794 Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1, 2, 3});
2796 Rep = Builder.CreateBitCast(
2797 Rep, FixedVectorType::get(Type::getHalfTy(C), NumDstElts));
2798 Rep = Builder.CreateFPExt(Rep, DstTy, "cvtph2ps");
2799 if (CI->arg_size() >= 3)
2800 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
2801 CI->getArgOperand(1));
2802 } else if (Name.starts_with("avx512.mask.load")) {
2803 // "avx512.mask.loadu." or "avx512.mask.load."
2804 bool Aligned = Name[16] != 'u'; // "avx512.mask.loadu".
2805 Rep = upgradeMaskedLoad(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2806 CI->getArgOperand(2), Aligned);
2807 } else if (Name.starts_with("avx512.mask.expand.load.")) {
2808 auto *ResultTy = cast<FixedVectorType>(CI->getType());
2809 Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
2810 ResultTy->getNumElements());
2812 Rep = Builder.CreateIntrinsic(
2813 Intrinsic::masked_expandload, ResultTy,
2814 {CI->getOperand(0), MaskVec, CI->getOperand(1)});
2815 } else if (Name.starts_with("avx512.mask.compress.store.")) {
2816 auto *ResultTy = cast<VectorType>(CI->getArgOperand(1)->getType());
2817 Value *MaskVec =
2818 getX86MaskVec(Builder, CI->getArgOperand(2),
2819 cast<FixedVectorType>(ResultTy)->getNumElements());
2821 Rep = Builder.CreateIntrinsic(
2822 Intrinsic::masked_compressstore, ResultTy,
2823 {CI->getArgOperand(1), CI->getArgOperand(0), MaskVec});
2824 } else if (Name.starts_with("avx512.mask.compress.") ||
2825 Name.starts_with("avx512.mask.expand.")) {
2826 auto *ResultTy = cast<FixedVectorType>(CI->getType());
2828 Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
2829 ResultTy->getNumElements());
2831 bool IsCompress = Name[12] == 'c';
2832 Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
2833 : Intrinsic::x86_avx512_mask_expand;
2834 Rep = Builder.CreateIntrinsic(
2835 IID, ResultTy, {CI->getOperand(0), CI->getOperand(1), MaskVec});
2836 } else if (Name.starts_with("xop.vpcom")) {
2837 bool IsSigned;
2838 if (Name.ends_with("ub") || Name.ends_with("uw") || Name.ends_with("ud") ||
2839 Name.ends_with("uq"))
2840 IsSigned = false;
2841 else if (Name.ends_with("b") || Name.ends_with("w") ||
2842 Name.ends_with("d") || Name.ends_with("q"))
2843 IsSigned = true;
2844 else
2845 llvm_unreachable("Unknown suffix");
2847 unsigned Imm;
2848 if (CI->arg_size() == 3) {
2849 Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2850 } else {
2851 Name = Name.substr(9); // strip off "xop.vpcom"
2852 if (Name.starts_with("lt"))
2853 Imm = 0;
2854 else if (Name.starts_with("le"))
2855 Imm = 1;
2856 else if (Name.starts_with("gt"))
2857 Imm = 2;
2858 else if (Name.starts_with("ge"))
2859 Imm = 3;
2860 else if (Name.starts_with("eq"))
2861 Imm = 4;
2862 else if (Name.starts_with("ne"))
2863 Imm = 5;
2864 else if (Name.starts_with("false"))
2865 Imm = 6;
2866 else if (Name.starts_with("true"))
2867 Imm = 7;
2868 else
2869 llvm_unreachable("Unknown condition");
2872 Rep = upgradeX86vpcom(Builder, *CI, Imm, IsSigned);
2873 } else if (Name.starts_with("xop.vpcmov")) {
2874 Value *Sel = CI->getArgOperand(2);
2875 Value *NotSel = Builder.CreateNot(Sel);
2876 Value *Sel0 = Builder.CreateAnd(CI->getArgOperand(0), Sel);
2877 Value *Sel1 = Builder.CreateAnd(CI->getArgOperand(1), NotSel);
2878 Rep = Builder.CreateOr(Sel0, Sel1);
2879 } else if (Name.starts_with("xop.vprot") || Name.starts_with("avx512.prol") ||
2880 Name.starts_with("avx512.mask.prol")) {
2881 Rep = upgradeX86Rotate(Builder, *CI, false);
2882 } else if (Name.starts_with("avx512.pror") ||
2883 Name.starts_with("avx512.mask.pror")) {
2884 Rep = upgradeX86Rotate(Builder, *CI, true);
2885 } else if (Name.starts_with("avx512.vpshld.") ||
2886 Name.starts_with("avx512.mask.vpshld") ||
2887 Name.starts_with("avx512.maskz.vpshld")) {
2888 bool ZeroMask = Name[11] == 'z';
2889 Rep = upgradeX86ConcatShift(Builder, *CI, false, ZeroMask);
2890 } else if (Name.starts_with("avx512.vpshrd.") ||
2891 Name.starts_with("avx512.mask.vpshrd") ||
2892 Name.starts_with("avx512.maskz.vpshrd")) {
2893 bool ZeroMask = Name[11] == 'z';
2894 Rep = upgradeX86ConcatShift(Builder, *CI, true, ZeroMask);
2895 } else if (Name == "sse42.crc32.64.8") {
2896 Value *Trunc0 =
2897 Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
2898 Rep = Builder.CreateIntrinsic(Intrinsic::x86_sse42_crc32_32_8, {},
2899 {Trunc0, CI->getArgOperand(1)});
2900 Rep = Builder.CreateZExt(Rep, CI->getType(), "");
2901 } else if (Name.starts_with("avx.vbroadcast.s") ||
2902 Name.starts_with("avx512.vbroadcast.s")) {
2903 // Replace broadcasts with a series of insertelements.
2904 auto *VecTy = cast<FixedVectorType>(CI->getType());
2905 Type *EltTy = VecTy->getElementType();
2906 unsigned EltNum = VecTy->getNumElements();
2907 Value *Load = Builder.CreateLoad(EltTy, CI->getArgOperand(0));
2908 Type *I32Ty = Type::getInt32Ty(C);
2909 Rep = PoisonValue::get(VecTy);
2910 for (unsigned I = 0; I < EltNum; ++I)
2911 Rep = Builder.CreateInsertElement(Rep, Load, ConstantInt::get(I32Ty, I));
2912 } else if (Name.starts_with("sse41.pmovsx") ||
2913 Name.starts_with("sse41.pmovzx") ||
2914 Name.starts_with("avx2.pmovsx") ||
2915 Name.starts_with("avx2.pmovzx") ||
2916 Name.starts_with("avx512.mask.pmovsx") ||
2917 Name.starts_with("avx512.mask.pmovzx")) {
2918 auto *DstTy = cast<FixedVectorType>(CI->getType());
2919 unsigned NumDstElts = DstTy->getNumElements();
2921 // Extract a subvector of the first NumDstElts lanes and sign/zero extend.
2922 SmallVector<int, 8> ShuffleMask(NumDstElts);
2923 for (unsigned i = 0; i != NumDstElts; ++i)
2924 ShuffleMask[i] = i;
2926 Value *SV = Builder.CreateShuffleVector(CI->getArgOperand(0), ShuffleMask);
2928 bool DoSext = Name.contains("pmovsx");
2929 Rep =
2930 DoSext ? Builder.CreateSExt(SV, DstTy) : Builder.CreateZExt(SV, DstTy);
2931 // If there are 3 arguments, it's a masked intrinsic so we need a select.
2932 if (CI->arg_size() == 3)
2933 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
2934 CI->getArgOperand(1));
2935 } else if (Name == "avx512.mask.pmov.qd.256" ||
2936 Name == "avx512.mask.pmov.qd.512" ||
2937 Name == "avx512.mask.pmov.wb.256" ||
2938 Name == "avx512.mask.pmov.wb.512") {
2939 Type *Ty = CI->getArgOperand(1)->getType();
2940 Rep = Builder.CreateTrunc(CI->getArgOperand(0), Ty);
2941 Rep =
2942 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
2943 } else if (Name.starts_with("avx.vbroadcastf128") ||
2944 Name == "avx2.vbroadcasti128") {
2945 // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.
2946 Type *EltTy = cast<VectorType>(CI->getType())->getElementType();
2947 unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits();
2948 auto *VT = FixedVectorType::get(EltTy, NumSrcElts);
2949 Value *Load = Builder.CreateAlignedLoad(VT, CI->getArgOperand(0), Align(1));
2950 if (NumSrcElts == 2)
2951 Rep = Builder.CreateShuffleVector(Load, ArrayRef<int>{0, 1, 0, 1});
2952 else
2953 Rep = Builder.CreateShuffleVector(Load,
2954 ArrayRef<int>{0, 1, 2, 3, 0, 1, 2, 3});
2955 } else if (Name.starts_with("avx512.mask.shuf.i") ||
2956 Name.starts_with("avx512.mask.shuf.f")) {
2957 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2958 Type *VT = CI->getType();
2959 unsigned NumLanes = VT->getPrimitiveSizeInBits() / 128;
2960 unsigned NumElementsInLane = 128 / VT->getScalarSizeInBits();
2961 unsigned ControlBitsMask = NumLanes - 1;
2962 unsigned NumControlBits = NumLanes / 2;
2963 SmallVector<int, 8> ShuffleMask(0);
2965 for (unsigned l = 0; l != NumLanes; ++l) {
2966 unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
2967 // We actually need the other source.
2968 if (l >= NumLanes / 2)
2969 LaneMask += NumLanes;
2970 for (unsigned i = 0; i != NumElementsInLane; ++i)
2971 ShuffleMask.push_back(LaneMask * NumElementsInLane + i);
2973 Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
2974 CI->getArgOperand(1), ShuffleMask);
2975 Rep =
2976 emitX86Select(Builder, CI->getArgOperand(4), Rep, CI->getArgOperand(3));
2977 } else if (Name.starts_with("avx512.mask.broadcastf") ||
2978 Name.starts_with("avx512.mask.broadcasti")) {
2979 unsigned NumSrcElts = cast<FixedVectorType>(CI->getArgOperand(0)->getType())
2980 ->getNumElements();
2981 unsigned NumDstElts =
2982 cast<FixedVectorType>(CI->getType())->getNumElements();
2984 SmallVector<int, 8> ShuffleMask(NumDstElts);
2985 for (unsigned i = 0; i != NumDstElts; ++i)
2986 ShuffleMask[i] = i % NumSrcElts;
2988 Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
2989 CI->getArgOperand(0), ShuffleMask);
2990 Rep =
2991 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
2992 } else if (Name.starts_with("avx2.pbroadcast") ||
2993 Name.starts_with("avx2.vbroadcast") ||
2994 Name.starts_with("avx512.pbroadcast") ||
2995 Name.starts_with("avx512.mask.broadcast.s")) {
2996 // Replace vp?broadcasts with a vector shuffle.
2997 Value *Op = CI->getArgOperand(0);
2998 ElementCount EC = cast<VectorType>(CI->getType())->getElementCount();
2999 Type *MaskTy = VectorType::get(Type::getInt32Ty(C), EC);
3000 SmallVector<int, 8> M;
3001 ShuffleVectorInst::getShuffleMask(Constant::getNullValue(MaskTy), M);
3002 Rep = Builder.CreateShuffleVector(Op, M);
3004 if (CI->arg_size() == 3)
3005 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3006 CI->getArgOperand(1));
3007 } else if (Name.starts_with("sse2.padds.") ||
3008 Name.starts_with("avx2.padds.") ||
3009 Name.starts_with("avx512.padds.") ||
3010 Name.starts_with("avx512.mask.padds.")) {
3011 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::sadd_sat);
3012 } else if (Name.starts_with("sse2.psubs.") ||
3013 Name.starts_with("avx2.psubs.") ||
3014 Name.starts_with("avx512.psubs.") ||
3015 Name.starts_with("avx512.mask.psubs.")) {
3016 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::ssub_sat);
3017 } else if (Name.starts_with("sse2.paddus.") ||
3018 Name.starts_with("avx2.paddus.") ||
3019 Name.starts_with("avx512.mask.paddus.")) {
3020 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::uadd_sat);
3021 } else if (Name.starts_with("sse2.psubus.") ||
3022 Name.starts_with("avx2.psubus.") ||
3023 Name.starts_with("avx512.mask.psubus.")) {
3024 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::usub_sat);
3025 } else if (Name.starts_with("avx512.mask.palignr.")) {
3026 Rep = upgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
3027 CI->getArgOperand(1), CI->getArgOperand(2),
3028 CI->getArgOperand(3), CI->getArgOperand(4),
3029 false);
3030 } else if (Name.starts_with("avx512.mask.valign.")) {
3031 Rep = upgradeX86ALIGNIntrinsics(
3032 Builder, CI->getArgOperand(0), CI->getArgOperand(1),
3033 CI->getArgOperand(2), CI->getArgOperand(3), CI->getArgOperand(4), true);
3034 } else if (Name == "sse2.psll.dq" || Name == "avx2.psll.dq") {
3035 // 128/256-bit shift left specified in bits.
3036 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3037 Rep = upgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0),
3038 Shift / 8); // Shift is in bits.
3039 } else if (Name == "sse2.psrl.dq" || Name == "avx2.psrl.dq") {
3040 // 128/256-bit shift right specified in bits.
3041 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3042 Rep = upgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0),
3043 Shift / 8); // Shift is in bits.
3044 } else if (Name == "sse2.psll.dq.bs" || Name == "avx2.psll.dq.bs" ||
3045 Name == "avx512.psll.dq.512") {
3046 // 128/256/512-bit shift left specified in bytes.
3047 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3048 Rep = upgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
3049 } else if (Name == "sse2.psrl.dq.bs" || Name == "avx2.psrl.dq.bs" ||
3050 Name == "avx512.psrl.dq.512") {
3051 // 128/256/512-bit shift right specified in bytes.
3052 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3053 Rep = upgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
3054 } else if (Name == "sse41.pblendw" || Name.starts_with("sse41.blendp") ||
3055 Name.starts_with("avx.blend.p") || Name == "avx2.pblendw" ||
3056 Name.starts_with("avx2.pblendd.")) {
3057 Value *Op0 = CI->getArgOperand(0);
3058 Value *Op1 = CI->getArgOperand(1);
3059 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3060 auto *VecTy = cast<FixedVectorType>(CI->getType());
3061 unsigned NumElts = VecTy->getNumElements();
3063 SmallVector<int, 16> Idxs(NumElts);
3064 for (unsigned i = 0; i != NumElts; ++i)
3065 Idxs[i] = ((Imm >> (i % 8)) & 1) ? i + NumElts : i;
3067 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3068 } else if (Name.starts_with("avx.vinsertf128.") ||
3069 Name == "avx2.vinserti128" ||
3070 Name.starts_with("avx512.mask.insert")) {
3071 Value *Op0 = CI->getArgOperand(0);
3072 Value *Op1 = CI->getArgOperand(1);
3073 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3074 unsigned DstNumElts =
3075 cast<FixedVectorType>(CI->getType())->getNumElements();
3076 unsigned SrcNumElts =
3077 cast<FixedVectorType>(Op1->getType())->getNumElements();
3078 unsigned Scale = DstNumElts / SrcNumElts;
3080 // Mask off the high bits of the immediate value; hardware ignores those.
3081 Imm = Imm % Scale;
3083 // Extend the second operand into a vector the size of the destination.
3084 SmallVector<int, 8> Idxs(DstNumElts);
3085 for (unsigned i = 0; i != SrcNumElts; ++i)
3086 Idxs[i] = i;
3087 for (unsigned i = SrcNumElts; i != DstNumElts; ++i)
3088 Idxs[i] = SrcNumElts;
3089 Rep = Builder.CreateShuffleVector(Op1, Idxs);
3091 // Insert the second operand into the first operand.
3093 // Note that there is no guarantee that instruction lowering will actually
3094 // produce a vinsertf128 instruction for the created shuffles. In
3095 // particular, the 0 immediate case involves no lane changes, so it can
3096 // be handled as a blend.
3098 // Example of shuffle mask for 32-bit elements:
3099 // Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
3100 // Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 >
3102 // First fill with identify mask.
3103 for (unsigned i = 0; i != DstNumElts; ++i)
3104 Idxs[i] = i;
3105 // Then replace the elements where we need to insert.
3106 for (unsigned i = 0; i != SrcNumElts; ++i)
3107 Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
3108 Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
3110 // If the intrinsic has a mask operand, handle that.
3111 if (CI->arg_size() == 5)
3112 Rep = emitX86Select(Builder, CI->getArgOperand(4), Rep,
3113 CI->getArgOperand(3));
3114 } else if (Name.starts_with("avx.vextractf128.") ||
3115 Name == "avx2.vextracti128" ||
3116 Name.starts_with("avx512.mask.vextract")) {
3117 Value *Op0 = CI->getArgOperand(0);
3118 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3119 unsigned DstNumElts =
3120 cast<FixedVectorType>(CI->getType())->getNumElements();
3121 unsigned SrcNumElts =
3122 cast<FixedVectorType>(Op0->getType())->getNumElements();
3123 unsigned Scale = SrcNumElts / DstNumElts;
3125 // Mask off the high bits of the immediate value; hardware ignores those.
3126 Imm = Imm % Scale;
3128 // Get indexes for the subvector of the input vector.
3129 SmallVector<int, 8> Idxs(DstNumElts);
3130 for (unsigned i = 0; i != DstNumElts; ++i) {
3131 Idxs[i] = i + (Imm * DstNumElts);
3133 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3135 // If the intrinsic has a mask operand, handle that.
3136 if (CI->arg_size() == 4)
3137 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3138 CI->getArgOperand(2));
3139 } else if (Name.starts_with("avx512.mask.perm.df.") ||
3140 Name.starts_with("avx512.mask.perm.di.")) {
3141 Value *Op0 = CI->getArgOperand(0);
3142 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3143 auto *VecTy = cast<FixedVectorType>(CI->getType());
3144 unsigned NumElts = VecTy->getNumElements();
3146 SmallVector<int, 8> Idxs(NumElts);
3147 for (unsigned i = 0; i != NumElts; ++i)
3148 Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
3150 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3152 if (CI->arg_size() == 4)
3153 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3154 CI->getArgOperand(2));
3155 } else if (Name.starts_with("avx.vperm2f128.") || Name == "avx2.vperm2i128") {
3156 // The immediate permute control byte looks like this:
3157 // [1:0] - select 128 bits from sources for low half of destination
3158 // [2] - ignore
3159 // [3] - zero low half of destination
3160 // [5:4] - select 128 bits from sources for high half of destination
3161 // [6] - ignore
3162 // [7] - zero high half of destination
3164 uint8_t Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3166 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3167 unsigned HalfSize = NumElts / 2;
3168 SmallVector<int, 8> ShuffleMask(NumElts);
3170 // Determine which operand(s) are actually in use for this instruction.
3171 Value *V0 = (Imm & 0x02) ? CI->getArgOperand(1) : CI->getArgOperand(0);
3172 Value *V1 = (Imm & 0x20) ? CI->getArgOperand(1) : CI->getArgOperand(0);
3174 // If needed, replace operands based on zero mask.
3175 V0 = (Imm & 0x08) ? ConstantAggregateZero::get(CI->getType()) : V0;
3176 V1 = (Imm & 0x80) ? ConstantAggregateZero::get(CI->getType()) : V1;
3178 // Permute low half of result.
3179 unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0;
3180 for (unsigned i = 0; i < HalfSize; ++i)
3181 ShuffleMask[i] = StartIndex + i;
3183 // Permute high half of result.
3184 StartIndex = (Imm & 0x10) ? HalfSize : 0;
3185 for (unsigned i = 0; i < HalfSize; ++i)
3186 ShuffleMask[i + HalfSize] = NumElts + StartIndex + i;
3188 Rep = Builder.CreateShuffleVector(V0, V1, ShuffleMask);
3190 } else if (Name.starts_with("avx.vpermil.") || Name == "sse2.pshuf.d" ||
3191 Name.starts_with("avx512.mask.vpermil.p") ||
3192 Name.starts_with("avx512.mask.pshuf.d.")) {
3193 Value *Op0 = CI->getArgOperand(0);
3194 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3195 auto *VecTy = cast<FixedVectorType>(CI->getType());
3196 unsigned NumElts = VecTy->getNumElements();
3197 // Calculate the size of each index in the immediate.
3198 unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
3199 unsigned IdxMask = ((1 << IdxSize) - 1);
3201 SmallVector<int, 8> Idxs(NumElts);
3202 // Lookup the bits for this element, wrapping around the immediate every
3203 // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need
3204 // to offset by the first index of each group.
3205 for (unsigned i = 0; i != NumElts; ++i)
3206 Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
3208 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3210 if (CI->arg_size() == 4)
3211 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3212 CI->getArgOperand(2));
3213 } else if (Name == "sse2.pshufl.w" ||
3214 Name.starts_with("avx512.mask.pshufl.w.")) {
3215 Value *Op0 = CI->getArgOperand(0);
3216 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3217 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3219 SmallVector<int, 16> Idxs(NumElts);
3220 for (unsigned l = 0; l != NumElts; l += 8) {
3221 for (unsigned i = 0; i != 4; ++i)
3222 Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
3223 for (unsigned i = 4; i != 8; ++i)
3224 Idxs[i + l] = i + l;
3227 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3229 if (CI->arg_size() == 4)
3230 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3231 CI->getArgOperand(2));
3232 } else if (Name == "sse2.pshufh.w" ||
3233 Name.starts_with("avx512.mask.pshufh.w.")) {
3234 Value *Op0 = CI->getArgOperand(0);
3235 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3236 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3238 SmallVector<int, 16> Idxs(NumElts);
3239 for (unsigned l = 0; l != NumElts; l += 8) {
3240 for (unsigned i = 0; i != 4; ++i)
3241 Idxs[i + l] = i + l;
3242 for (unsigned i = 0; i != 4; ++i)
3243 Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
3246 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3248 if (CI->arg_size() == 4)
3249 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3250 CI->getArgOperand(2));
3251 } else if (Name.starts_with("avx512.mask.shuf.p")) {
3252 Value *Op0 = CI->getArgOperand(0);
3253 Value *Op1 = CI->getArgOperand(1);
3254 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3255 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3257 unsigned NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3258 unsigned HalfLaneElts = NumLaneElts / 2;
3260 SmallVector<int, 16> Idxs(NumElts);
3261 for (unsigned i = 0; i != NumElts; ++i) {
3262 // Base index is the starting element of the lane.
3263 Idxs[i] = i - (i % NumLaneElts);
3264 // If we are half way through the lane switch to the other source.
3265 if ((i % NumLaneElts) >= HalfLaneElts)
3266 Idxs[i] += NumElts;
3267 // Now select the specific element. By adding HalfLaneElts bits from
3268 // the immediate. Wrapping around the immediate every 8-bits.
3269 Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
3272 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3274 Rep =
3275 emitX86Select(Builder, CI->getArgOperand(4), Rep, CI->getArgOperand(3));
3276 } else if (Name.starts_with("avx512.mask.movddup") ||
3277 Name.starts_with("avx512.mask.movshdup") ||
3278 Name.starts_with("avx512.mask.movsldup")) {
3279 Value *Op0 = CI->getArgOperand(0);
3280 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3281 unsigned NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3283 unsigned Offset = 0;
3284 if (Name.starts_with("avx512.mask.movshdup."))
3285 Offset = 1;
3287 SmallVector<int, 16> Idxs(NumElts);
3288 for (unsigned l = 0; l != NumElts; l += NumLaneElts)
3289 for (unsigned i = 0; i != NumLaneElts; i += 2) {
3290 Idxs[i + l + 0] = i + l + Offset;
3291 Idxs[i + l + 1] = i + l + Offset;
3294 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3296 Rep =
3297 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3298 } else if (Name.starts_with("avx512.mask.punpckl") ||
3299 Name.starts_with("avx512.mask.unpckl.")) {
3300 Value *Op0 = CI->getArgOperand(0);
3301 Value *Op1 = CI->getArgOperand(1);
3302 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3303 int NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3305 SmallVector<int, 64> Idxs(NumElts);
3306 for (int l = 0; l != NumElts; l += NumLaneElts)
3307 for (int i = 0; i != NumLaneElts; ++i)
3308 Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
3310 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3312 Rep =
3313 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3314 } else if (Name.starts_with("avx512.mask.punpckh") ||
3315 Name.starts_with("avx512.mask.unpckh.")) {
3316 Value *Op0 = CI->getArgOperand(0);
3317 Value *Op1 = CI->getArgOperand(1);
3318 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3319 int NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3321 SmallVector<int, 64> Idxs(NumElts);
3322 for (int l = 0; l != NumElts; l += NumLaneElts)
3323 for (int i = 0; i != NumLaneElts; ++i)
3324 Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
3326 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3328 Rep =
3329 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3330 } else if (Name.starts_with("avx512.mask.and.") ||
3331 Name.starts_with("avx512.mask.pand.")) {
3332 VectorType *FTy = cast<VectorType>(CI->getType());
3333 VectorType *ITy = VectorType::getInteger(FTy);
3334 Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3335 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3336 Rep = Builder.CreateBitCast(Rep, FTy);
3337 Rep =
3338 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3339 } else if (Name.starts_with("avx512.mask.andn.") ||
3340 Name.starts_with("avx512.mask.pandn.")) {
3341 VectorType *FTy = cast<VectorType>(CI->getType());
3342 VectorType *ITy = VectorType::getInteger(FTy);
3343 Rep = Builder.CreateNot(Builder.CreateBitCast(CI->getArgOperand(0), ITy));
3344 Rep = Builder.CreateAnd(Rep,
3345 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3346 Rep = Builder.CreateBitCast(Rep, FTy);
3347 Rep =
3348 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3349 } else if (Name.starts_with("avx512.mask.or.") ||
3350 Name.starts_with("avx512.mask.por.")) {
3351 VectorType *FTy = cast<VectorType>(CI->getType());
3352 VectorType *ITy = VectorType::getInteger(FTy);
3353 Rep = Builder.CreateOr(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3354 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3355 Rep = Builder.CreateBitCast(Rep, FTy);
3356 Rep =
3357 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3358 } else if (Name.starts_with("avx512.mask.xor.") ||
3359 Name.starts_with("avx512.mask.pxor.")) {
3360 VectorType *FTy = cast<VectorType>(CI->getType());
3361 VectorType *ITy = VectorType::getInteger(FTy);
3362 Rep = Builder.CreateXor(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3363 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3364 Rep = Builder.CreateBitCast(Rep, FTy);
3365 Rep =
3366 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3367 } else if (Name.starts_with("avx512.mask.padd.")) {
3368 Rep = Builder.CreateAdd(CI->getArgOperand(0), CI->getArgOperand(1));
3369 Rep =
3370 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3371 } else if (Name.starts_with("avx512.mask.psub.")) {
3372 Rep = Builder.CreateSub(CI->getArgOperand(0), CI->getArgOperand(1));
3373 Rep =
3374 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3375 } else if (Name.starts_with("avx512.mask.pmull.")) {
3376 Rep = Builder.CreateMul(CI->getArgOperand(0), CI->getArgOperand(1));
3377 Rep =
3378 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3379 } else if (Name.starts_with("avx512.mask.add.p")) {
3380 if (Name.ends_with(".512")) {
3381 Intrinsic::ID IID;
3382 if (Name[17] == 's')
3383 IID = Intrinsic::x86_avx512_add_ps_512;
3384 else
3385 IID = Intrinsic::x86_avx512_add_pd_512;
3387 Rep = Builder.CreateIntrinsic(
3388 IID, {},
3389 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3390 } else {
3391 Rep = Builder.CreateFAdd(CI->getArgOperand(0), CI->getArgOperand(1));
3393 Rep =
3394 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3395 } else if (Name.starts_with("avx512.mask.div.p")) {
3396 if (Name.ends_with(".512")) {
3397 Intrinsic::ID IID;
3398 if (Name[17] == 's')
3399 IID = Intrinsic::x86_avx512_div_ps_512;
3400 else
3401 IID = Intrinsic::x86_avx512_div_pd_512;
3403 Rep = Builder.CreateIntrinsic(
3404 IID, {},
3405 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3406 } else {
3407 Rep = Builder.CreateFDiv(CI->getArgOperand(0), CI->getArgOperand(1));
3409 Rep =
3410 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3411 } else if (Name.starts_with("avx512.mask.mul.p")) {
3412 if (Name.ends_with(".512")) {
3413 Intrinsic::ID IID;
3414 if (Name[17] == 's')
3415 IID = Intrinsic::x86_avx512_mul_ps_512;
3416 else
3417 IID = Intrinsic::x86_avx512_mul_pd_512;
3419 Rep = Builder.CreateIntrinsic(
3420 IID, {},
3421 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3422 } else {
3423 Rep = Builder.CreateFMul(CI->getArgOperand(0), CI->getArgOperand(1));
3425 Rep =
3426 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3427 } else if (Name.starts_with("avx512.mask.sub.p")) {
3428 if (Name.ends_with(".512")) {
3429 Intrinsic::ID IID;
3430 if (Name[17] == 's')
3431 IID = Intrinsic::x86_avx512_sub_ps_512;
3432 else
3433 IID = Intrinsic::x86_avx512_sub_pd_512;
3435 Rep = Builder.CreateIntrinsic(
3436 IID, {},
3437 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3438 } else {
3439 Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1));
3441 Rep =
3442 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3443 } else if ((Name.starts_with("avx512.mask.max.p") ||
3444 Name.starts_with("avx512.mask.min.p")) &&
3445 Name.drop_front(18) == ".512") {
3446 bool IsDouble = Name[17] == 'd';
3447 bool IsMin = Name[13] == 'i';
3448 static const Intrinsic::ID MinMaxTbl[2][2] = {
3449 {Intrinsic::x86_avx512_max_ps_512, Intrinsic::x86_avx512_max_pd_512},
3450 {Intrinsic::x86_avx512_min_ps_512, Intrinsic::x86_avx512_min_pd_512}};
3451 Intrinsic::ID IID = MinMaxTbl[IsMin][IsDouble];
3453 Rep = Builder.CreateIntrinsic(
3454 IID, {},
3455 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3456 Rep =
3457 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3458 } else if (Name.starts_with("avx512.mask.lzcnt.")) {
3459 Rep =
3460 Builder.CreateIntrinsic(Intrinsic::ctlz, CI->getType(),
3461 {CI->getArgOperand(0), Builder.getInt1(false)});
3462 Rep =
3463 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3464 } else if (Name.starts_with("avx512.mask.psll")) {
3465 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
3466 bool IsVariable = Name[16] == 'v';
3467 char Size = Name[16] == '.' ? Name[17]
3468 : Name[17] == '.' ? Name[18]
3469 : Name[18] == '.' ? Name[19]
3470 : Name[20];
3472 Intrinsic::ID IID;
3473 if (IsVariable && Name[17] != '.') {
3474 if (Size == 'd' && Name[17] == '2') // avx512.mask.psllv2.di
3475 IID = Intrinsic::x86_avx2_psllv_q;
3476 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psllv4.di
3477 IID = Intrinsic::x86_avx2_psllv_q_256;
3478 else if (Size == 's' && Name[17] == '4') // avx512.mask.psllv4.si
3479 IID = Intrinsic::x86_avx2_psllv_d;
3480 else if (Size == 's' && Name[17] == '8') // avx512.mask.psllv8.si
3481 IID = Intrinsic::x86_avx2_psllv_d_256;
3482 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psllv8.hi
3483 IID = Intrinsic::x86_avx512_psllv_w_128;
3484 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psllv16.hi
3485 IID = Intrinsic::x86_avx512_psllv_w_256;
3486 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psllv32hi
3487 IID = Intrinsic::x86_avx512_psllv_w_512;
3488 else
3489 llvm_unreachable("Unexpected size");
3490 } else if (Name.ends_with(".128")) {
3491 if (Size == 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128
3492 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
3493 : Intrinsic::x86_sse2_psll_d;
3494 else if (Size == 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.128
3495 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
3496 : Intrinsic::x86_sse2_psll_q;
3497 else if (Size == 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.128
3498 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
3499 : Intrinsic::x86_sse2_psll_w;
3500 else
3501 llvm_unreachable("Unexpected size");
3502 } else if (Name.ends_with(".256")) {
3503 if (Size == 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256
3504 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
3505 : Intrinsic::x86_avx2_psll_d;
3506 else if (Size == 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.256
3507 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
3508 : Intrinsic::x86_avx2_psll_q;
3509 else if (Size == 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.256
3510 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
3511 : Intrinsic::x86_avx2_psll_w;
3512 else
3513 llvm_unreachable("Unexpected size");
3514 } else {
3515 if (Size == 'd') // psll.di.512, pslli.d, psll.d, psllv.d.512
3516 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512
3517 : IsVariable ? Intrinsic::x86_avx512_psllv_d_512
3518 : Intrinsic::x86_avx512_psll_d_512;
3519 else if (Size == 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.512
3520 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512
3521 : IsVariable ? Intrinsic::x86_avx512_psllv_q_512
3522 : Intrinsic::x86_avx512_psll_q_512;
3523 else if (Size == 'w') // psll.wi.512, pslli.w, psll.w
3524 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
3525 : Intrinsic::x86_avx512_psll_w_512;
3526 else
3527 llvm_unreachable("Unexpected size");
3530 Rep = upgradeX86MaskedShift(Builder, *CI, IID);
3531 } else if (Name.starts_with("avx512.mask.psrl")) {
3532 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
3533 bool IsVariable = Name[16] == 'v';
3534 char Size = Name[16] == '.' ? Name[17]
3535 : Name[17] == '.' ? Name[18]
3536 : Name[18] == '.' ? Name[19]
3537 : Name[20];
3539 Intrinsic::ID IID;
3540 if (IsVariable && Name[17] != '.') {
3541 if (Size == 'd' && Name[17] == '2') // avx512.mask.psrlv2.di
3542 IID = Intrinsic::x86_avx2_psrlv_q;
3543 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psrlv4.di
3544 IID = Intrinsic::x86_avx2_psrlv_q_256;
3545 else if (Size == 's' && Name[17] == '4') // avx512.mask.psrlv4.si
3546 IID = Intrinsic::x86_avx2_psrlv_d;
3547 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrlv8.si
3548 IID = Intrinsic::x86_avx2_psrlv_d_256;
3549 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrlv8.hi
3550 IID = Intrinsic::x86_avx512_psrlv_w_128;
3551 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrlv16.hi
3552 IID = Intrinsic::x86_avx512_psrlv_w_256;
3553 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrlv32hi
3554 IID = Intrinsic::x86_avx512_psrlv_w_512;
3555 else
3556 llvm_unreachable("Unexpected size");
3557 } else if (Name.ends_with(".128")) {
3558 if (Size == 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128
3559 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
3560 : Intrinsic::x86_sse2_psrl_d;
3561 else if (Size == 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.128
3562 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
3563 : Intrinsic::x86_sse2_psrl_q;
3564 else if (Size == 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.128
3565 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
3566 : Intrinsic::x86_sse2_psrl_w;
3567 else
3568 llvm_unreachable("Unexpected size");
3569 } else if (Name.ends_with(".256")) {
3570 if (Size == 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256
3571 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
3572 : Intrinsic::x86_avx2_psrl_d;
3573 else if (Size == 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.256
3574 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
3575 : Intrinsic::x86_avx2_psrl_q;
3576 else if (Size == 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.256
3577 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
3578 : Intrinsic::x86_avx2_psrl_w;
3579 else
3580 llvm_unreachable("Unexpected size");
3581 } else {
3582 if (Size == 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.512
3583 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512
3584 : IsVariable ? Intrinsic::x86_avx512_psrlv_d_512
3585 : Intrinsic::x86_avx512_psrl_d_512;
3586 else if (Size == 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.512
3587 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512
3588 : IsVariable ? Intrinsic::x86_avx512_psrlv_q_512
3589 : Intrinsic::x86_avx512_psrl_q_512;
3590 else if (Size == 'w') // psrl.wi.512, psrli.w, psrl.w)
3591 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
3592 : Intrinsic::x86_avx512_psrl_w_512;
3593 else
3594 llvm_unreachable("Unexpected size");
3597 Rep = upgradeX86MaskedShift(Builder, *CI, IID);
3598 } else if (Name.starts_with("avx512.mask.psra")) {
3599 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
3600 bool IsVariable = Name[16] == 'v';
3601 char Size = Name[16] == '.' ? Name[17]
3602 : Name[17] == '.' ? Name[18]
3603 : Name[18] == '.' ? Name[19]
3604 : Name[20];
3606 Intrinsic::ID IID;
3607 if (IsVariable && Name[17] != '.') {
3608 if (Size == 's' && Name[17] == '4') // avx512.mask.psrav4.si
3609 IID = Intrinsic::x86_avx2_psrav_d;
3610 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrav8.si
3611 IID = Intrinsic::x86_avx2_psrav_d_256;
3612 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrav8.hi
3613 IID = Intrinsic::x86_avx512_psrav_w_128;
3614 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrav16.hi
3615 IID = Intrinsic::x86_avx512_psrav_w_256;
3616 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrav32hi
3617 IID = Intrinsic::x86_avx512_psrav_w_512;
3618 else
3619 llvm_unreachable("Unexpected size");
3620 } else if (Name.ends_with(".128")) {
3621 if (Size == 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128
3622 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
3623 : Intrinsic::x86_sse2_psra_d;
3624 else if (Size == 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.128
3625 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128
3626 : IsVariable ? Intrinsic::x86_avx512_psrav_q_128
3627 : Intrinsic::x86_avx512_psra_q_128;
3628 else if (Size == 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.128
3629 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
3630 : Intrinsic::x86_sse2_psra_w;
3631 else
3632 llvm_unreachable("Unexpected size");
3633 } else if (Name.ends_with(".256")) {
3634 if (Size == 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256
3635 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
3636 : Intrinsic::x86_avx2_psra_d;
3637 else if (Size == 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.256
3638 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256
3639 : IsVariable ? Intrinsic::x86_avx512_psrav_q_256
3640 : Intrinsic::x86_avx512_psra_q_256;
3641 else if (Size == 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.256
3642 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
3643 : Intrinsic::x86_avx2_psra_w;
3644 else
3645 llvm_unreachable("Unexpected size");
3646 } else {
3647 if (Size == 'd') // psra.di.512, psrai.d, psra.d, psrav.d.512
3648 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512
3649 : IsVariable ? Intrinsic::x86_avx512_psrav_d_512
3650 : Intrinsic::x86_avx512_psra_d_512;
3651 else if (Size == 'q') // psra.qi.512, psrai.q, psra.q
3652 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512
3653 : IsVariable ? Intrinsic::x86_avx512_psrav_q_512
3654 : Intrinsic::x86_avx512_psra_q_512;
3655 else if (Size == 'w') // psra.wi.512, psrai.w, psra.w
3656 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
3657 : Intrinsic::x86_avx512_psra_w_512;
3658 else
3659 llvm_unreachable("Unexpected size");
3662 Rep = upgradeX86MaskedShift(Builder, *CI, IID);
3663 } else if (Name.starts_with("avx512.mask.move.s")) {
3664 Rep = upgradeMaskedMove(Builder, *CI);
3665 } else if (Name.starts_with("avx512.cvtmask2")) {
3666 Rep = upgradeMaskToInt(Builder, *CI);
3667 } else if (Name.ends_with(".movntdqa")) {
3668 MDNode *Node = MDNode::get(
3669 C, ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
3671 LoadInst *LI = Builder.CreateAlignedLoad(
3672 CI->getType(), CI->getArgOperand(0),
3673 Align(CI->getType()->getPrimitiveSizeInBits().getFixedValue() / 8));
3674 LI->setMetadata(LLVMContext::MD_nontemporal, Node);
3675 Rep = LI;
3676 } else if (Name.starts_with("fma.vfmadd.") ||
3677 Name.starts_with("fma.vfmsub.") ||
3678 Name.starts_with("fma.vfnmadd.") ||
3679 Name.starts_with("fma.vfnmsub.")) {
3680 bool NegMul = Name[6] == 'n';
3681 bool NegAcc = NegMul ? Name[8] == 's' : Name[7] == 's';
3682 bool IsScalar = NegMul ? Name[12] == 's' : Name[11] == 's';
3684 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3685 CI->getArgOperand(2)};
3687 if (IsScalar) {
3688 Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
3689 Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
3690 Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
3693 if (NegMul && !IsScalar)
3694 Ops[0] = Builder.CreateFNeg(Ops[0]);
3695 if (NegMul && IsScalar)
3696 Ops[1] = Builder.CreateFNeg(Ops[1]);
3697 if (NegAcc)
3698 Ops[2] = Builder.CreateFNeg(Ops[2]);
3700 Rep = Builder.CreateIntrinsic(Intrinsic::fma, Ops[0]->getType(), Ops);
3702 if (IsScalar)
3703 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
3704 } else if (Name.starts_with("fma4.vfmadd.s")) {
3705 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3706 CI->getArgOperand(2)};
3708 Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
3709 Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
3710 Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
3712 Rep = Builder.CreateIntrinsic(Intrinsic::fma, Ops[0]->getType(), Ops);
3714 Rep = Builder.CreateInsertElement(Constant::getNullValue(CI->getType()),
3715 Rep, (uint64_t)0);
3716 } else if (Name.starts_with("avx512.mask.vfmadd.s") ||
3717 Name.starts_with("avx512.maskz.vfmadd.s") ||
3718 Name.starts_with("avx512.mask3.vfmadd.s") ||
3719 Name.starts_with("avx512.mask3.vfmsub.s") ||
3720 Name.starts_with("avx512.mask3.vfnmsub.s")) {
3721 bool IsMask3 = Name[11] == '3';
3722 bool IsMaskZ = Name[11] == 'z';
3723 // Drop the "avx512.mask." to make it easier.
3724 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3725 bool NegMul = Name[2] == 'n';
3726 bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
3728 Value *A = CI->getArgOperand(0);
3729 Value *B = CI->getArgOperand(1);
3730 Value *C = CI->getArgOperand(2);
3732 if (NegMul && (IsMask3 || IsMaskZ))
3733 A = Builder.CreateFNeg(A);
3734 if (NegMul && !(IsMask3 || IsMaskZ))
3735 B = Builder.CreateFNeg(B);
3736 if (NegAcc)
3737 C = Builder.CreateFNeg(C);
3739 A = Builder.CreateExtractElement(A, (uint64_t)0);
3740 B = Builder.CreateExtractElement(B, (uint64_t)0);
3741 C = Builder.CreateExtractElement(C, (uint64_t)0);
3743 if (!isa<ConstantInt>(CI->getArgOperand(4)) ||
3744 cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4) {
3745 Value *Ops[] = {A, B, C, CI->getArgOperand(4)};
3747 Intrinsic::ID IID;
3748 if (Name.back() == 'd')
3749 IID = Intrinsic::x86_avx512_vfmadd_f64;
3750 else
3751 IID = Intrinsic::x86_avx512_vfmadd_f32;
3752 Rep = Builder.CreateIntrinsic(IID, {}, Ops);
3753 } else {
3754 Rep = Builder.CreateIntrinsic(Intrinsic::fma, A->getType(), {A, B, C});
3757 Value *PassThru = IsMaskZ ? Constant::getNullValue(Rep->getType())
3758 : IsMask3 ? C
3759 : A;
3761 // For Mask3 with NegAcc, we need to create a new extractelement that
3762 // avoids the negation above.
3763 if (NegAcc && IsMask3)
3764 PassThru =
3765 Builder.CreateExtractElement(CI->getArgOperand(2), (uint64_t)0);
3767 Rep = emitX86ScalarSelect(Builder, CI->getArgOperand(3), Rep, PassThru);
3768 Rep = Builder.CreateInsertElement(CI->getArgOperand(IsMask3 ? 2 : 0), Rep,
3769 (uint64_t)0);
3770 } else if (Name.starts_with("avx512.mask.vfmadd.p") ||
3771 Name.starts_with("avx512.mask.vfnmadd.p") ||
3772 Name.starts_with("avx512.mask.vfnmsub.p") ||
3773 Name.starts_with("avx512.mask3.vfmadd.p") ||
3774 Name.starts_with("avx512.mask3.vfmsub.p") ||
3775 Name.starts_with("avx512.mask3.vfnmsub.p") ||
3776 Name.starts_with("avx512.maskz.vfmadd.p")) {
3777 bool IsMask3 = Name[11] == '3';
3778 bool IsMaskZ = Name[11] == 'z';
3779 // Drop the "avx512.mask." to make it easier.
3780 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3781 bool NegMul = Name[2] == 'n';
3782 bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
3784 Value *A = CI->getArgOperand(0);
3785 Value *B = CI->getArgOperand(1);
3786 Value *C = CI->getArgOperand(2);
3788 if (NegMul && (IsMask3 || IsMaskZ))
3789 A = Builder.CreateFNeg(A);
3790 if (NegMul && !(IsMask3 || IsMaskZ))
3791 B = Builder.CreateFNeg(B);
3792 if (NegAcc)
3793 C = Builder.CreateFNeg(C);
3795 if (CI->arg_size() == 5 &&
3796 (!isa<ConstantInt>(CI->getArgOperand(4)) ||
3797 cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4)) {
3798 Intrinsic::ID IID;
3799 // Check the character before ".512" in string.
3800 if (Name[Name.size() - 5] == 's')
3801 IID = Intrinsic::x86_avx512_vfmadd_ps_512;
3802 else
3803 IID = Intrinsic::x86_avx512_vfmadd_pd_512;
3805 Rep = Builder.CreateIntrinsic(IID, {}, {A, B, C, CI->getArgOperand(4)});
3806 } else {
3807 Rep = Builder.CreateIntrinsic(Intrinsic::fma, A->getType(), {A, B, C});
3810 Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType())
3811 : IsMask3 ? CI->getArgOperand(2)
3812 : CI->getArgOperand(0);
3814 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3815 } else if (Name.starts_with("fma.vfmsubadd.p")) {
3816 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3817 unsigned EltWidth = CI->getType()->getScalarSizeInBits();
3818 Intrinsic::ID IID;
3819 if (VecWidth == 128 && EltWidth == 32)
3820 IID = Intrinsic::x86_fma_vfmaddsub_ps;
3821 else if (VecWidth == 256 && EltWidth == 32)
3822 IID = Intrinsic::x86_fma_vfmaddsub_ps_256;
3823 else if (VecWidth == 128 && EltWidth == 64)
3824 IID = Intrinsic::x86_fma_vfmaddsub_pd;
3825 else if (VecWidth == 256 && EltWidth == 64)
3826 IID = Intrinsic::x86_fma_vfmaddsub_pd_256;
3827 else
3828 llvm_unreachable("Unexpected intrinsic");
3830 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3831 CI->getArgOperand(2)};
3832 Ops[2] = Builder.CreateFNeg(Ops[2]);
3833 Rep = Builder.CreateIntrinsic(IID, {}, Ops);
3834 } else if (Name.starts_with("avx512.mask.vfmaddsub.p") ||
3835 Name.starts_with("avx512.mask3.vfmaddsub.p") ||
3836 Name.starts_with("avx512.maskz.vfmaddsub.p") ||
3837 Name.starts_with("avx512.mask3.vfmsubadd.p")) {
3838 bool IsMask3 = Name[11] == '3';
3839 bool IsMaskZ = Name[11] == 'z';
3840 // Drop the "avx512.mask." to make it easier.
3841 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3842 bool IsSubAdd = Name[3] == 's';
3843 if (CI->arg_size() == 5) {
3844 Intrinsic::ID IID;
3845 // Check the character before ".512" in string.
3846 if (Name[Name.size() - 5] == 's')
3847 IID = Intrinsic::x86_avx512_vfmaddsub_ps_512;
3848 else
3849 IID = Intrinsic::x86_avx512_vfmaddsub_pd_512;
3851 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3852 CI->getArgOperand(2), CI->getArgOperand(4)};
3853 if (IsSubAdd)
3854 Ops[2] = Builder.CreateFNeg(Ops[2]);
3856 Rep = Builder.CreateIntrinsic(IID, {}, Ops);
3857 } else {
3858 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3860 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3861 CI->getArgOperand(2)};
3863 Function *FMA = Intrinsic::getOrInsertDeclaration(
3864 CI->getModule(), Intrinsic::fma, Ops[0]->getType());
3865 Value *Odd = Builder.CreateCall(FMA, Ops);
3866 Ops[2] = Builder.CreateFNeg(Ops[2]);
3867 Value *Even = Builder.CreateCall(FMA, Ops);
3869 if (IsSubAdd)
3870 std::swap(Even, Odd);
3872 SmallVector<int, 32> Idxs(NumElts);
3873 for (int i = 0; i != NumElts; ++i)
3874 Idxs[i] = i + (i % 2) * NumElts;
3876 Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
3879 Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType())
3880 : IsMask3 ? CI->getArgOperand(2)
3881 : CI->getArgOperand(0);
3883 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3884 } else if (Name.starts_with("avx512.mask.pternlog.") ||
3885 Name.starts_with("avx512.maskz.pternlog.")) {
3886 bool ZeroMask = Name[11] == 'z';
3887 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3888 unsigned EltWidth = CI->getType()->getScalarSizeInBits();
3889 Intrinsic::ID IID;
3890 if (VecWidth == 128 && EltWidth == 32)
3891 IID = Intrinsic::x86_avx512_pternlog_d_128;
3892 else if (VecWidth == 256 && EltWidth == 32)
3893 IID = Intrinsic::x86_avx512_pternlog_d_256;
3894 else if (VecWidth == 512 && EltWidth == 32)
3895 IID = Intrinsic::x86_avx512_pternlog_d_512;
3896 else if (VecWidth == 128 && EltWidth == 64)
3897 IID = Intrinsic::x86_avx512_pternlog_q_128;
3898 else if (VecWidth == 256 && EltWidth == 64)
3899 IID = Intrinsic::x86_avx512_pternlog_q_256;
3900 else if (VecWidth == 512 && EltWidth == 64)
3901 IID = Intrinsic::x86_avx512_pternlog_q_512;
3902 else
3903 llvm_unreachable("Unexpected intrinsic");
3905 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3906 CI->getArgOperand(2), CI->getArgOperand(3)};
3907 Rep = Builder.CreateIntrinsic(IID, {}, Args);
3908 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3909 : CI->getArgOperand(0);
3910 Rep = emitX86Select(Builder, CI->getArgOperand(4), Rep, PassThru);
3911 } else if (Name.starts_with("avx512.mask.vpmadd52") ||
3912 Name.starts_with("avx512.maskz.vpmadd52")) {
3913 bool ZeroMask = Name[11] == 'z';
3914 bool High = Name[20] == 'h' || Name[21] == 'h';
3915 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3916 Intrinsic::ID IID;
3917 if (VecWidth == 128 && !High)
3918 IID = Intrinsic::x86_avx512_vpmadd52l_uq_128;
3919 else if (VecWidth == 256 && !High)
3920 IID = Intrinsic::x86_avx512_vpmadd52l_uq_256;
3921 else if (VecWidth == 512 && !High)
3922 IID = Intrinsic::x86_avx512_vpmadd52l_uq_512;
3923 else if (VecWidth == 128 && High)
3924 IID = Intrinsic::x86_avx512_vpmadd52h_uq_128;
3925 else if (VecWidth == 256 && High)
3926 IID = Intrinsic::x86_avx512_vpmadd52h_uq_256;
3927 else if (VecWidth == 512 && High)
3928 IID = Intrinsic::x86_avx512_vpmadd52h_uq_512;
3929 else
3930 llvm_unreachable("Unexpected intrinsic");
3932 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3933 CI->getArgOperand(2)};
3934 Rep = Builder.CreateIntrinsic(IID, {}, Args);
3935 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3936 : CI->getArgOperand(0);
3937 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3938 } else if (Name.starts_with("avx512.mask.vpermi2var.") ||
3939 Name.starts_with("avx512.mask.vpermt2var.") ||
3940 Name.starts_with("avx512.maskz.vpermt2var.")) {
3941 bool ZeroMask = Name[11] == 'z';
3942 bool IndexForm = Name[17] == 'i';
3943 Rep = upgradeX86VPERMT2Intrinsics(Builder, *CI, ZeroMask, IndexForm);
3944 } else if (Name.starts_with("avx512.mask.vpdpbusd.") ||
3945 Name.starts_with("avx512.maskz.vpdpbusd.") ||
3946 Name.starts_with("avx512.mask.vpdpbusds.") ||
3947 Name.starts_with("avx512.maskz.vpdpbusds.")) {
3948 bool ZeroMask = Name[11] == 'z';
3949 bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
3950 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3951 Intrinsic::ID IID;
3952 if (VecWidth == 128 && !IsSaturating)
3953 IID = Intrinsic::x86_avx512_vpdpbusd_128;
3954 else if (VecWidth == 256 && !IsSaturating)
3955 IID = Intrinsic::x86_avx512_vpdpbusd_256;
3956 else if (VecWidth == 512 && !IsSaturating)
3957 IID = Intrinsic::x86_avx512_vpdpbusd_512;
3958 else if (VecWidth == 128 && IsSaturating)
3959 IID = Intrinsic::x86_avx512_vpdpbusds_128;
3960 else if (VecWidth == 256 && IsSaturating)
3961 IID = Intrinsic::x86_avx512_vpdpbusds_256;
3962 else if (VecWidth == 512 && IsSaturating)
3963 IID = Intrinsic::x86_avx512_vpdpbusds_512;
3964 else
3965 llvm_unreachable("Unexpected intrinsic");
3967 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3968 CI->getArgOperand(2)};
3969 Rep = Builder.CreateIntrinsic(IID, {}, Args);
3970 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3971 : CI->getArgOperand(0);
3972 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3973 } else if (Name.starts_with("avx512.mask.vpdpwssd.") ||
3974 Name.starts_with("avx512.maskz.vpdpwssd.") ||
3975 Name.starts_with("avx512.mask.vpdpwssds.") ||
3976 Name.starts_with("avx512.maskz.vpdpwssds.")) {
3977 bool ZeroMask = Name[11] == 'z';
3978 bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
3979 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3980 Intrinsic::ID IID;
3981 if (VecWidth == 128 && !IsSaturating)
3982 IID = Intrinsic::x86_avx512_vpdpwssd_128;
3983 else if (VecWidth == 256 && !IsSaturating)
3984 IID = Intrinsic::x86_avx512_vpdpwssd_256;
3985 else if (VecWidth == 512 && !IsSaturating)
3986 IID = Intrinsic::x86_avx512_vpdpwssd_512;
3987 else if (VecWidth == 128 && IsSaturating)
3988 IID = Intrinsic::x86_avx512_vpdpwssds_128;
3989 else if (VecWidth == 256 && IsSaturating)
3990 IID = Intrinsic::x86_avx512_vpdpwssds_256;
3991 else if (VecWidth == 512 && IsSaturating)
3992 IID = Intrinsic::x86_avx512_vpdpwssds_512;
3993 else
3994 llvm_unreachable("Unexpected intrinsic");
3996 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3997 CI->getArgOperand(2)};
3998 Rep = Builder.CreateIntrinsic(IID, {}, Args);
3999 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4000 : CI->getArgOperand(0);
4001 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4002 } else if (Name == "addcarryx.u32" || Name == "addcarryx.u64" ||
4003 Name == "addcarry.u32" || Name == "addcarry.u64" ||
4004 Name == "subborrow.u32" || Name == "subborrow.u64") {
4005 Intrinsic::ID IID;
4006 if (Name[0] == 'a' && Name.back() == '2')
4007 IID = Intrinsic::x86_addcarry_32;
4008 else if (Name[0] == 'a' && Name.back() == '4')
4009 IID = Intrinsic::x86_addcarry_64;
4010 else if (Name[0] == 's' && Name.back() == '2')
4011 IID = Intrinsic::x86_subborrow_32;
4012 else if (Name[0] == 's' && Name.back() == '4')
4013 IID = Intrinsic::x86_subborrow_64;
4014 else
4015 llvm_unreachable("Unexpected intrinsic");
4017 // Make a call with 3 operands.
4018 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4019 CI->getArgOperand(2)};
4020 Value *NewCall = Builder.CreateIntrinsic(IID, {}, Args);
4022 // Extract the second result and store it.
4023 Value *Data = Builder.CreateExtractValue(NewCall, 1);
4024 Builder.CreateAlignedStore(Data, CI->getArgOperand(3), Align(1));
4025 // Replace the original call result with the first result of the new call.
4026 Value *CF = Builder.CreateExtractValue(NewCall, 0);
4028 CI->replaceAllUsesWith(CF);
4029 Rep = nullptr;
4030 } else if (Name.starts_with("avx512.mask.") &&
4031 upgradeAVX512MaskToSelect(Name, Builder, *CI, Rep)) {
4032 // Rep will be updated by the call in the condition.
4035 return Rep;
4038 static Value *upgradeAArch64IntrinsicCall(StringRef Name, CallBase *CI,
4039 Function *F, IRBuilder<> &Builder) {
4040 if (Name.starts_with("neon.bfcvt")) {
4041 if (Name.starts_with("neon.bfcvtn2")) {
4042 SmallVector<int, 32> LoMask(4);
4043 std::iota(LoMask.begin(), LoMask.end(), 0);
4044 SmallVector<int, 32> ConcatMask(8);
4045 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
4046 Value *Inactive = Builder.CreateShuffleVector(CI->getOperand(0), LoMask);
4047 Value *Trunc =
4048 Builder.CreateFPTrunc(CI->getOperand(1), Inactive->getType());
4049 return Builder.CreateShuffleVector(Inactive, Trunc, ConcatMask);
4050 } else if (Name.starts_with("neon.bfcvtn")) {
4051 SmallVector<int, 32> ConcatMask(8);
4052 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
4053 Type *V4BF16 =
4054 FixedVectorType::get(Type::getBFloatTy(F->getContext()), 4);
4055 Value *Trunc = Builder.CreateFPTrunc(CI->getOperand(0), V4BF16);
4056 dbgs() << "Trunc: " << *Trunc << "\n";
4057 return Builder.CreateShuffleVector(
4058 Trunc, ConstantAggregateZero::get(V4BF16), ConcatMask);
4059 } else {
4060 return Builder.CreateFPTrunc(CI->getOperand(0),
4061 Type::getBFloatTy(F->getContext()));
4063 } else if (Name.starts_with("sve.fcvt")) {
4064 Intrinsic::ID NewID =
4065 StringSwitch<Intrinsic::ID>(Name)
4066 .Case("sve.fcvt.bf16f32", Intrinsic::aarch64_sve_fcvt_bf16f32_v2)
4067 .Case("sve.fcvtnt.bf16f32",
4068 Intrinsic::aarch64_sve_fcvtnt_bf16f32_v2)
4069 .Default(Intrinsic::not_intrinsic);
4070 if (NewID == Intrinsic::not_intrinsic)
4071 llvm_unreachable("Unhandled Intrinsic!");
4073 SmallVector<Value *, 3> Args(CI->args());
4075 // The original intrinsics incorrectly used a predicate based on the
4076 // smallest element type rather than the largest.
4077 Type *BadPredTy = ScalableVectorType::get(Builder.getInt1Ty(), 8);
4078 Type *GoodPredTy = ScalableVectorType::get(Builder.getInt1Ty(), 4);
4080 if (Args[1]->getType() != BadPredTy)
4081 llvm_unreachable("Unexpected predicate type!");
4083 Args[1] = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool,
4084 BadPredTy, Args[1]);
4085 Args[1] = Builder.CreateIntrinsic(
4086 Intrinsic::aarch64_sve_convert_from_svbool, GoodPredTy, Args[1]);
4088 return Builder.CreateIntrinsic(NewID, {}, Args, /*FMFSource=*/nullptr,
4089 CI->getName());
4092 llvm_unreachable("Unhandled Intrinsic!");
4095 static Value *upgradeARMIntrinsicCall(StringRef Name, CallBase *CI, Function *F,
4096 IRBuilder<> &Builder) {
4097 if (Name == "mve.vctp64.old") {
4098 // Replace the old v4i1 vctp64 with a v2i1 vctp and predicate-casts to the
4099 // correct type.
4100 Value *VCTP = Builder.CreateIntrinsic(Intrinsic::arm_mve_vctp64, {},
4101 CI->getArgOperand(0),
4102 /*FMFSource=*/nullptr, CI->getName());
4103 Value *C1 = Builder.CreateIntrinsic(
4104 Intrinsic::arm_mve_pred_v2i,
4105 {VectorType::get(Builder.getInt1Ty(), 2, false)}, VCTP);
4106 return Builder.CreateIntrinsic(
4107 Intrinsic::arm_mve_pred_i2v,
4108 {VectorType::get(Builder.getInt1Ty(), 4, false)}, C1);
4109 } else if (Name == "mve.mull.int.predicated.v2i64.v4i32.v4i1" ||
4110 Name == "mve.vqdmull.predicated.v2i64.v4i32.v4i1" ||
4111 Name == "mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1" ||
4112 Name == "mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1" ||
4113 Name ==
4114 "mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1" ||
4115 Name == "mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v4i1" ||
4116 Name == "mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1" ||
4117 Name == "mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1" ||
4118 Name ==
4119 "mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1" ||
4120 Name == "mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v4i1" ||
4121 Name == "cde.vcx1q.predicated.v2i64.v4i1" ||
4122 Name == "cde.vcx1qa.predicated.v2i64.v4i1" ||
4123 Name == "cde.vcx2q.predicated.v2i64.v4i1" ||
4124 Name == "cde.vcx2qa.predicated.v2i64.v4i1" ||
4125 Name == "cde.vcx3q.predicated.v2i64.v4i1" ||
4126 Name == "cde.vcx3qa.predicated.v2i64.v4i1") {
4127 std::vector<Type *> Tys;
4128 unsigned ID = CI->getIntrinsicID();
4129 Type *V2I1Ty = FixedVectorType::get(Builder.getInt1Ty(), 2);
4130 switch (ID) {
4131 case Intrinsic::arm_mve_mull_int_predicated:
4132 case Intrinsic::arm_mve_vqdmull_predicated:
4133 case Intrinsic::arm_mve_vldr_gather_base_predicated:
4134 Tys = {CI->getType(), CI->getOperand(0)->getType(), V2I1Ty};
4135 break;
4136 case Intrinsic::arm_mve_vldr_gather_base_wb_predicated:
4137 case Intrinsic::arm_mve_vstr_scatter_base_predicated:
4138 case Intrinsic::arm_mve_vstr_scatter_base_wb_predicated:
4139 Tys = {CI->getOperand(0)->getType(), CI->getOperand(0)->getType(),
4140 V2I1Ty};
4141 break;
4142 case Intrinsic::arm_mve_vldr_gather_offset_predicated:
4143 Tys = {CI->getType(), CI->getOperand(0)->getType(),
4144 CI->getOperand(1)->getType(), V2I1Ty};
4145 break;
4146 case Intrinsic::arm_mve_vstr_scatter_offset_predicated:
4147 Tys = {CI->getOperand(0)->getType(), CI->getOperand(1)->getType(),
4148 CI->getOperand(2)->getType(), V2I1Ty};
4149 break;
4150 case Intrinsic::arm_cde_vcx1q_predicated:
4151 case Intrinsic::arm_cde_vcx1qa_predicated:
4152 case Intrinsic::arm_cde_vcx2q_predicated:
4153 case Intrinsic::arm_cde_vcx2qa_predicated:
4154 case Intrinsic::arm_cde_vcx3q_predicated:
4155 case Intrinsic::arm_cde_vcx3qa_predicated:
4156 Tys = {CI->getOperand(1)->getType(), V2I1Ty};
4157 break;
4158 default:
4159 llvm_unreachable("Unhandled Intrinsic!");
4162 std::vector<Value *> Ops;
4163 for (Value *Op : CI->args()) {
4164 Type *Ty = Op->getType();
4165 if (Ty->getScalarSizeInBits() == 1) {
4166 Value *C1 = Builder.CreateIntrinsic(
4167 Intrinsic::arm_mve_pred_v2i,
4168 {VectorType::get(Builder.getInt1Ty(), 4, false)}, Op);
4169 Op = Builder.CreateIntrinsic(Intrinsic::arm_mve_pred_i2v, {V2I1Ty}, C1);
4171 Ops.push_back(Op);
4174 return Builder.CreateIntrinsic(ID, Tys, Ops, /*FMFSource=*/nullptr,
4175 CI->getName());
4177 llvm_unreachable("Unknown function for ARM CallBase upgrade.");
4180 // These are expected to have the arguments:
4181 // atomic.intrin (ptr, rmw_value, ordering, scope, isVolatile)
4183 // Except for int_amdgcn_ds_fadd_v2bf16 which only has (ptr, rmw_value).
4185 static Value *upgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI,
4186 Function *F, IRBuilder<> &Builder) {
4187 AtomicRMWInst::BinOp RMWOp =
4188 StringSwitch<AtomicRMWInst::BinOp>(Name)
4189 .StartsWith("ds.fadd", AtomicRMWInst::FAdd)
4190 .StartsWith("ds.fmin", AtomicRMWInst::FMin)
4191 .StartsWith("ds.fmax", AtomicRMWInst::FMax)
4192 .StartsWith("atomic.inc.", AtomicRMWInst::UIncWrap)
4193 .StartsWith("atomic.dec.", AtomicRMWInst::UDecWrap)
4194 .StartsWith("global.atomic.fadd", AtomicRMWInst::FAdd)
4195 .StartsWith("flat.atomic.fadd", AtomicRMWInst::FAdd)
4196 .StartsWith("global.atomic.fmin", AtomicRMWInst::FMin)
4197 .StartsWith("flat.atomic.fmin", AtomicRMWInst::FMin)
4198 .StartsWith("global.atomic.fmax", AtomicRMWInst::FMax)
4199 .StartsWith("flat.atomic.fmax", AtomicRMWInst::FMax);
4201 unsigned NumOperands = CI->getNumOperands();
4202 if (NumOperands < 3) // Malformed bitcode.
4203 return nullptr;
4205 Value *Ptr = CI->getArgOperand(0);
4206 PointerType *PtrTy = dyn_cast<PointerType>(Ptr->getType());
4207 if (!PtrTy) // Malformed.
4208 return nullptr;
4210 Value *Val = CI->getArgOperand(1);
4211 if (Val->getType() != CI->getType()) // Malformed.
4212 return nullptr;
4214 ConstantInt *OrderArg = nullptr;
4215 bool IsVolatile = false;
4217 // These should have 5 arguments (plus the callee). A separate version of the
4218 // ds_fadd intrinsic was defined for bf16 which was missing arguments.
4219 if (NumOperands > 3)
4220 OrderArg = dyn_cast<ConstantInt>(CI->getArgOperand(2));
4222 // Ignore scope argument at 3
4224 if (NumOperands > 5) {
4225 ConstantInt *VolatileArg = dyn_cast<ConstantInt>(CI->getArgOperand(4));
4226 IsVolatile = !VolatileArg || !VolatileArg->isZero();
4229 AtomicOrdering Order = AtomicOrdering::SequentiallyConsistent;
4230 if (OrderArg && isValidAtomicOrdering(OrderArg->getZExtValue()))
4231 Order = static_cast<AtomicOrdering>(OrderArg->getZExtValue());
4232 if (Order == AtomicOrdering::NotAtomic || Order == AtomicOrdering::Unordered)
4233 Order = AtomicOrdering::SequentiallyConsistent;
4235 LLVMContext &Ctx = F->getContext();
4237 // Handle the v2bf16 intrinsic which used <2 x i16> instead of <2 x bfloat>
4238 Type *RetTy = CI->getType();
4239 if (VectorType *VT = dyn_cast<VectorType>(RetTy)) {
4240 if (VT->getElementType()->isIntegerTy(16)) {
4241 VectorType *AsBF16 =
4242 VectorType::get(Type::getBFloatTy(Ctx), VT->getElementCount());
4243 Val = Builder.CreateBitCast(Val, AsBF16);
4247 // The scope argument never really worked correctly. Use agent as the most
4248 // conservative option which should still always produce the instruction.
4249 SyncScope::ID SSID = Ctx.getOrInsertSyncScopeID("agent");
4250 AtomicRMWInst *RMW =
4251 Builder.CreateAtomicRMW(RMWOp, Ptr, Val, std::nullopt, Order, SSID);
4253 unsigned AddrSpace = PtrTy->getAddressSpace();
4254 if (AddrSpace != AMDGPUAS::LOCAL_ADDRESS) {
4255 MDNode *EmptyMD = MDNode::get(F->getContext(), {});
4256 RMW->setMetadata("amdgpu.no.fine.grained.memory", EmptyMD);
4257 if (RMWOp == AtomicRMWInst::FAdd && RetTy->isFloatTy())
4258 RMW->setMetadata("amdgpu.ignore.denormal.mode", EmptyMD);
4261 if (AddrSpace == AMDGPUAS::FLAT_ADDRESS) {
4262 MDBuilder MDB(F->getContext());
4263 MDNode *RangeNotPrivate =
4264 MDB.createRange(APInt(32, AMDGPUAS::PRIVATE_ADDRESS),
4265 APInt(32, AMDGPUAS::PRIVATE_ADDRESS + 1));
4266 RMW->setMetadata(LLVMContext::MD_noalias_addrspace, RangeNotPrivate);
4269 if (IsVolatile)
4270 RMW->setVolatile(true);
4272 return Builder.CreateBitCast(RMW, RetTy);
4275 /// Helper to unwrap intrinsic call MetadataAsValue operands.
4276 template <typename MDType>
4277 static MDType *unwrapMAVOp(CallBase *CI, unsigned Op) {
4278 if (MetadataAsValue *MAV = dyn_cast<MetadataAsValue>(CI->getArgOperand(Op)))
4279 return dyn_cast<MDType>(MAV->getMetadata());
4280 return nullptr;
4283 /// Convert debug intrinsic calls to non-instruction debug records.
4284 /// \p Name - Final part of the intrinsic name, e.g. 'value' in llvm.dbg.value.
4285 /// \p CI - The debug intrinsic call.
4286 static void upgradeDbgIntrinsicToDbgRecord(StringRef Name, CallBase *CI) {
4287 DbgRecord *DR = nullptr;
4288 if (Name == "label") {
4289 DR = new DbgLabelRecord(unwrapMAVOp<DILabel>(CI, 0), CI->getDebugLoc());
4290 } else if (Name == "assign") {
4291 DR = new DbgVariableRecord(
4292 unwrapMAVOp<Metadata>(CI, 0), unwrapMAVOp<DILocalVariable>(CI, 1),
4293 unwrapMAVOp<DIExpression>(CI, 2), unwrapMAVOp<DIAssignID>(CI, 3),
4294 unwrapMAVOp<Metadata>(CI, 4), unwrapMAVOp<DIExpression>(CI, 5),
4295 CI->getDebugLoc());
4296 } else if (Name == "declare") {
4297 DR = new DbgVariableRecord(
4298 unwrapMAVOp<Metadata>(CI, 0), unwrapMAVOp<DILocalVariable>(CI, 1),
4299 unwrapMAVOp<DIExpression>(CI, 2), CI->getDebugLoc(),
4300 DbgVariableRecord::LocationType::Declare);
4301 } else if (Name == "addr") {
4302 // Upgrade dbg.addr to dbg.value with DW_OP_deref.
4303 DIExpression *Expr = unwrapMAVOp<DIExpression>(CI, 2);
4304 Expr = DIExpression::append(Expr, dwarf::DW_OP_deref);
4305 DR = new DbgVariableRecord(unwrapMAVOp<Metadata>(CI, 0),
4306 unwrapMAVOp<DILocalVariable>(CI, 1), Expr,
4307 CI->getDebugLoc());
4308 } else if (Name == "value") {
4309 // An old version of dbg.value had an extra offset argument.
4310 unsigned VarOp = 1;
4311 unsigned ExprOp = 2;
4312 if (CI->arg_size() == 4) {
4313 auto *Offset = dyn_cast_or_null<Constant>(CI->getArgOperand(1));
4314 // Nonzero offset dbg.values get dropped without a replacement.
4315 if (!Offset || !Offset->isZeroValue())
4316 return;
4317 VarOp = 2;
4318 ExprOp = 3;
4320 DR = new DbgVariableRecord(
4321 unwrapMAVOp<Metadata>(CI, 0), unwrapMAVOp<DILocalVariable>(CI, VarOp),
4322 unwrapMAVOp<DIExpression>(CI, ExprOp), CI->getDebugLoc());
4324 assert(DR && "Unhandled intrinsic kind in upgrade to DbgRecord");
4325 CI->getParent()->insertDbgRecordBefore(DR, CI->getIterator());
4328 /// Upgrade a call to an old intrinsic. All argument and return casting must be
4329 /// provided to seamlessly integrate with existing context.
4330 void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
4331 // Note dyn_cast to Function is not quite the same as getCalledFunction, which
4332 // checks the callee's function type matches. It's likely we need to handle
4333 // type changes here.
4334 Function *F = dyn_cast<Function>(CI->getCalledOperand());
4335 if (!F)
4336 return;
4338 LLVMContext &C = CI->getContext();
4339 IRBuilder<> Builder(C);
4340 Builder.SetInsertPoint(CI->getParent(), CI->getIterator());
4342 if (!NewFn) {
4343 bool FallthroughToDefaultUpgrade = false;
4344 // Get the Function's name.
4345 StringRef Name = F->getName();
4347 assert(Name.starts_with("llvm.") && "Intrinsic doesn't start with 'llvm.'");
4348 Name = Name.substr(5);
4350 bool IsX86 = Name.consume_front("x86.");
4351 bool IsNVVM = Name.consume_front("nvvm.");
4352 bool IsAArch64 = Name.consume_front("aarch64.");
4353 bool IsARM = Name.consume_front("arm.");
4354 bool IsAMDGCN = Name.consume_front("amdgcn.");
4355 bool IsDbg = Name.consume_front("dbg.");
4356 Value *Rep = nullptr;
4358 if (!IsX86 && Name == "stackprotectorcheck") {
4359 Rep = nullptr;
4360 } else if (IsNVVM) {
4361 Rep = upgradeNVVMIntrinsicCall(Name, CI, F, Builder);
4362 } else if (IsX86) {
4363 Rep = upgradeX86IntrinsicCall(Name, CI, F, Builder);
4364 } else if (IsAArch64) {
4365 Rep = upgradeAArch64IntrinsicCall(Name, CI, F, Builder);
4366 } else if (IsARM) {
4367 Rep = upgradeARMIntrinsicCall(Name, CI, F, Builder);
4368 } else if (IsAMDGCN) {
4369 Rep = upgradeAMDGCNIntrinsicCall(Name, CI, F, Builder);
4370 } else if (IsDbg) {
4371 // We might have decided we don't want the new format after all between
4372 // first requesting the upgrade and now; skip the conversion if that is
4373 // the case, and check here to see if the intrinsic needs to be upgraded
4374 // normally.
4375 if (!CI->getModule()->IsNewDbgInfoFormat) {
4376 bool NeedsUpgrade =
4377 upgradeIntrinsicFunction1(CI->getCalledFunction(), NewFn, false);
4378 if (!NeedsUpgrade)
4379 return;
4380 FallthroughToDefaultUpgrade = true;
4381 } else {
4382 upgradeDbgIntrinsicToDbgRecord(Name, CI);
4384 } else {
4385 llvm_unreachable("Unknown function for CallBase upgrade.");
4388 if (!FallthroughToDefaultUpgrade) {
4389 if (Rep)
4390 CI->replaceAllUsesWith(Rep);
4391 CI->eraseFromParent();
4392 return;
4396 const auto &DefaultCase = [&]() -> void {
4397 if (CI->getFunctionType() == NewFn->getFunctionType()) {
4398 // Handle generic mangling change.
4399 assert(
4400 (CI->getCalledFunction()->getName() != NewFn->getName()) &&
4401 "Unknown function for CallBase upgrade and isn't just a name change");
4402 CI->setCalledFunction(NewFn);
4403 return;
4406 // This must be an upgrade from a named to a literal struct.
4407 if (auto *OldST = dyn_cast<StructType>(CI->getType())) {
4408 assert(OldST != NewFn->getReturnType() &&
4409 "Return type must have changed");
4410 assert(OldST->getNumElements() ==
4411 cast<StructType>(NewFn->getReturnType())->getNumElements() &&
4412 "Must have same number of elements");
4414 SmallVector<Value *> Args(CI->args());
4415 CallInst *NewCI = Builder.CreateCall(NewFn, Args);
4416 NewCI->setAttributes(CI->getAttributes());
4417 Value *Res = PoisonValue::get(OldST);
4418 for (unsigned Idx = 0; Idx < OldST->getNumElements(); ++Idx) {
4419 Value *Elem = Builder.CreateExtractValue(NewCI, Idx);
4420 Res = Builder.CreateInsertValue(Res, Elem, Idx);
4422 CI->replaceAllUsesWith(Res);
4423 CI->eraseFromParent();
4424 return;
4427 // We're probably about to produce something invalid. Let the verifier catch
4428 // it instead of dying here.
4429 CI->setCalledOperand(
4430 ConstantExpr::getPointerCast(NewFn, CI->getCalledOperand()->getType()));
4431 return;
4433 CallInst *NewCall = nullptr;
4434 switch (NewFn->getIntrinsicID()) {
4435 default: {
4436 DefaultCase();
4437 return;
4439 case Intrinsic::arm_neon_vst1:
4440 case Intrinsic::arm_neon_vst2:
4441 case Intrinsic::arm_neon_vst3:
4442 case Intrinsic::arm_neon_vst4:
4443 case Intrinsic::arm_neon_vst2lane:
4444 case Intrinsic::arm_neon_vst3lane:
4445 case Intrinsic::arm_neon_vst4lane: {
4446 SmallVector<Value *, 4> Args(CI->args());
4447 NewCall = Builder.CreateCall(NewFn, Args);
4448 break;
4450 case Intrinsic::aarch64_sve_bfmlalb_lane_v2:
4451 case Intrinsic::aarch64_sve_bfmlalt_lane_v2:
4452 case Intrinsic::aarch64_sve_bfdot_lane_v2: {
4453 LLVMContext &Ctx = F->getParent()->getContext();
4454 SmallVector<Value *, 4> Args(CI->args());
4455 Args[3] = ConstantInt::get(Type::getInt32Ty(Ctx),
4456 cast<ConstantInt>(Args[3])->getZExtValue());
4457 NewCall = Builder.CreateCall(NewFn, Args);
4458 break;
4460 case Intrinsic::aarch64_sve_ld3_sret:
4461 case Intrinsic::aarch64_sve_ld4_sret:
4462 case Intrinsic::aarch64_sve_ld2_sret: {
4463 StringRef Name = F->getName();
4464 Name = Name.substr(5);
4465 unsigned N = StringSwitch<unsigned>(Name)
4466 .StartsWith("aarch64.sve.ld2", 2)
4467 .StartsWith("aarch64.sve.ld3", 3)
4468 .StartsWith("aarch64.sve.ld4", 4)
4469 .Default(0);
4470 auto *RetTy = cast<ScalableVectorType>(F->getReturnType());
4471 unsigned MinElts = RetTy->getMinNumElements() / N;
4472 SmallVector<Value *, 2> Args(CI->args());
4473 Value *NewLdCall = Builder.CreateCall(NewFn, Args);
4474 Value *Ret = llvm::PoisonValue::get(RetTy);
4475 for (unsigned I = 0; I < N; I++) {
4476 Value *Idx = ConstantInt::get(Type::getInt64Ty(C), I * MinElts);
4477 Value *SRet = Builder.CreateExtractValue(NewLdCall, I);
4478 Ret = Builder.CreateInsertVector(RetTy, Ret, SRet, Idx);
4480 NewCall = dyn_cast<CallInst>(Ret);
4481 break;
4484 case Intrinsic::coro_end: {
4485 SmallVector<Value *, 3> Args(CI->args());
4486 Args.push_back(ConstantTokenNone::get(CI->getContext()));
4487 NewCall = Builder.CreateCall(NewFn, Args);
4488 break;
4491 case Intrinsic::vector_extract: {
4492 StringRef Name = F->getName();
4493 Name = Name.substr(5); // Strip llvm
4494 if (!Name.starts_with("aarch64.sve.tuple.get")) {
4495 DefaultCase();
4496 return;
4498 auto *RetTy = cast<ScalableVectorType>(F->getReturnType());
4499 unsigned MinElts = RetTy->getMinNumElements();
4500 unsigned I = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
4501 Value *NewIdx = ConstantInt::get(Type::getInt64Ty(C), I * MinElts);
4502 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0), NewIdx});
4503 break;
4506 case Intrinsic::vector_insert: {
4507 StringRef Name = F->getName();
4508 Name = Name.substr(5);
4509 if (!Name.starts_with("aarch64.sve.tuple")) {
4510 DefaultCase();
4511 return;
4513 if (Name.starts_with("aarch64.sve.tuple.set")) {
4514 unsigned I = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
4515 auto *Ty = cast<ScalableVectorType>(CI->getArgOperand(2)->getType());
4516 Value *NewIdx =
4517 ConstantInt::get(Type::getInt64Ty(C), I * Ty->getMinNumElements());
4518 NewCall = Builder.CreateCall(
4519 NewFn, {CI->getArgOperand(0), CI->getArgOperand(2), NewIdx});
4520 break;
4522 if (Name.starts_with("aarch64.sve.tuple.create")) {
4523 unsigned N = StringSwitch<unsigned>(Name)
4524 .StartsWith("aarch64.sve.tuple.create2", 2)
4525 .StartsWith("aarch64.sve.tuple.create3", 3)
4526 .StartsWith("aarch64.sve.tuple.create4", 4)
4527 .Default(0);
4528 assert(N > 1 && "Create is expected to be between 2-4");
4529 auto *RetTy = cast<ScalableVectorType>(F->getReturnType());
4530 Value *Ret = llvm::PoisonValue::get(RetTy);
4531 unsigned MinElts = RetTy->getMinNumElements() / N;
4532 for (unsigned I = 0; I < N; I++) {
4533 Value *Idx = ConstantInt::get(Type::getInt64Ty(C), I * MinElts);
4534 Value *V = CI->getArgOperand(I);
4535 Ret = Builder.CreateInsertVector(RetTy, Ret, V, Idx);
4537 NewCall = dyn_cast<CallInst>(Ret);
4539 break;
4542 case Intrinsic::arm_neon_bfdot:
4543 case Intrinsic::arm_neon_bfmmla:
4544 case Intrinsic::arm_neon_bfmlalb:
4545 case Intrinsic::arm_neon_bfmlalt:
4546 case Intrinsic::aarch64_neon_bfdot:
4547 case Intrinsic::aarch64_neon_bfmmla:
4548 case Intrinsic::aarch64_neon_bfmlalb:
4549 case Intrinsic::aarch64_neon_bfmlalt: {
4550 SmallVector<Value *, 3> Args;
4551 assert(CI->arg_size() == 3 &&
4552 "Mismatch between function args and call args");
4553 size_t OperandWidth =
4554 CI->getArgOperand(1)->getType()->getPrimitiveSizeInBits();
4555 assert((OperandWidth == 64 || OperandWidth == 128) &&
4556 "Unexpected operand width");
4557 Type *NewTy = FixedVectorType::get(Type::getBFloatTy(C), OperandWidth / 16);
4558 auto Iter = CI->args().begin();
4559 Args.push_back(*Iter++);
4560 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
4561 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
4562 NewCall = Builder.CreateCall(NewFn, Args);
4563 break;
4566 case Intrinsic::bitreverse:
4567 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
4568 break;
4570 case Intrinsic::ctlz:
4571 case Intrinsic::cttz:
4572 assert(CI->arg_size() == 1 &&
4573 "Mismatch between function args and call args");
4574 NewCall =
4575 Builder.CreateCall(NewFn, {CI->getArgOperand(0), Builder.getFalse()});
4576 break;
4578 case Intrinsic::objectsize: {
4579 Value *NullIsUnknownSize =
4580 CI->arg_size() == 2 ? Builder.getFalse() : CI->getArgOperand(2);
4581 Value *Dynamic =
4582 CI->arg_size() < 4 ? Builder.getFalse() : CI->getArgOperand(3);
4583 NewCall = Builder.CreateCall(
4584 NewFn, {CI->getArgOperand(0), CI->getArgOperand(1), NullIsUnknownSize, Dynamic});
4585 break;
4588 case Intrinsic::ctpop:
4589 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
4590 break;
4592 case Intrinsic::convert_from_fp16:
4593 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
4594 break;
4596 case Intrinsic::dbg_value: {
4597 StringRef Name = F->getName();
4598 Name = Name.substr(5); // Strip llvm.
4599 // Upgrade `dbg.addr` to `dbg.value` with `DW_OP_deref`.
4600 if (Name.starts_with("dbg.addr")) {
4601 DIExpression *Expr = cast<DIExpression>(
4602 cast<MetadataAsValue>(CI->getArgOperand(2))->getMetadata());
4603 Expr = DIExpression::append(Expr, dwarf::DW_OP_deref);
4604 NewCall =
4605 Builder.CreateCall(NewFn, {CI->getArgOperand(0), CI->getArgOperand(1),
4606 MetadataAsValue::get(C, Expr)});
4607 break;
4610 // Upgrade from the old version that had an extra offset argument.
4611 assert(CI->arg_size() == 4);
4612 // Drop nonzero offsets instead of attempting to upgrade them.
4613 if (auto *Offset = dyn_cast_or_null<Constant>(CI->getArgOperand(1)))
4614 if (Offset->isZeroValue()) {
4615 NewCall = Builder.CreateCall(
4616 NewFn,
4617 {CI->getArgOperand(0), CI->getArgOperand(2), CI->getArgOperand(3)});
4618 break;
4620 CI->eraseFromParent();
4621 return;
4624 case Intrinsic::ptr_annotation:
4625 // Upgrade from versions that lacked the annotation attribute argument.
4626 if (CI->arg_size() != 4) {
4627 DefaultCase();
4628 return;
4631 // Create a new call with an added null annotation attribute argument.
4632 NewCall =
4633 Builder.CreateCall(NewFn, {CI->getArgOperand(0), CI->getArgOperand(1),
4634 CI->getArgOperand(2), CI->getArgOperand(3),
4635 Constant::getNullValue(Builder.getPtrTy())});
4636 NewCall->takeName(CI);
4637 CI->replaceAllUsesWith(NewCall);
4638 CI->eraseFromParent();
4639 return;
4641 case Intrinsic::var_annotation:
4642 // Upgrade from versions that lacked the annotation attribute argument.
4643 if (CI->arg_size() != 4) {
4644 DefaultCase();
4645 return;
4647 // Create a new call with an added null annotation attribute argument.
4648 NewCall =
4649 Builder.CreateCall(NewFn, {CI->getArgOperand(0), CI->getArgOperand(1),
4650 CI->getArgOperand(2), CI->getArgOperand(3),
4651 Constant::getNullValue(Builder.getPtrTy())});
4652 NewCall->takeName(CI);
4653 CI->replaceAllUsesWith(NewCall);
4654 CI->eraseFromParent();
4655 return;
4657 case Intrinsic::riscv_aes32dsi:
4658 case Intrinsic::riscv_aes32dsmi:
4659 case Intrinsic::riscv_aes32esi:
4660 case Intrinsic::riscv_aes32esmi:
4661 case Intrinsic::riscv_sm4ks:
4662 case Intrinsic::riscv_sm4ed: {
4663 // The last argument to these intrinsics used to be i8 and changed to i32.
4664 // The type overload for sm4ks and sm4ed was removed.
4665 Value *Arg2 = CI->getArgOperand(2);
4666 if (Arg2->getType()->isIntegerTy(32) && !CI->getType()->isIntegerTy(64))
4667 return;
4669 Value *Arg0 = CI->getArgOperand(0);
4670 Value *Arg1 = CI->getArgOperand(1);
4671 if (CI->getType()->isIntegerTy(64)) {
4672 Arg0 = Builder.CreateTrunc(Arg0, Builder.getInt32Ty());
4673 Arg1 = Builder.CreateTrunc(Arg1, Builder.getInt32Ty());
4676 Arg2 = ConstantInt::get(Type::getInt32Ty(C),
4677 cast<ConstantInt>(Arg2)->getZExtValue());
4679 NewCall = Builder.CreateCall(NewFn, {Arg0, Arg1, Arg2});
4680 Value *Res = NewCall;
4681 if (Res->getType() != CI->getType())
4682 Res = Builder.CreateIntCast(NewCall, CI->getType(), /*isSigned*/ true);
4683 NewCall->takeName(CI);
4684 CI->replaceAllUsesWith(Res);
4685 CI->eraseFromParent();
4686 return;
4688 case Intrinsic::riscv_sha256sig0:
4689 case Intrinsic::riscv_sha256sig1:
4690 case Intrinsic::riscv_sha256sum0:
4691 case Intrinsic::riscv_sha256sum1:
4692 case Intrinsic::riscv_sm3p0:
4693 case Intrinsic::riscv_sm3p1: {
4694 // The last argument to these intrinsics used to be i8 and changed to i32.
4695 // The type overload for sm4ks and sm4ed was removed.
4696 if (!CI->getType()->isIntegerTy(64))
4697 return;
4699 Value *Arg =
4700 Builder.CreateTrunc(CI->getArgOperand(0), Builder.getInt32Ty());
4702 NewCall = Builder.CreateCall(NewFn, Arg);
4703 Value *Res =
4704 Builder.CreateIntCast(NewCall, CI->getType(), /*isSigned*/ true);
4705 NewCall->takeName(CI);
4706 CI->replaceAllUsesWith(Res);
4707 CI->eraseFromParent();
4708 return;
4711 case Intrinsic::x86_xop_vfrcz_ss:
4712 case Intrinsic::x86_xop_vfrcz_sd:
4713 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(1)});
4714 break;
4716 case Intrinsic::x86_xop_vpermil2pd:
4717 case Intrinsic::x86_xop_vpermil2ps:
4718 case Intrinsic::x86_xop_vpermil2pd_256:
4719 case Intrinsic::x86_xop_vpermil2ps_256: {
4720 SmallVector<Value *, 4> Args(CI->args());
4721 VectorType *FltIdxTy = cast<VectorType>(Args[2]->getType());
4722 VectorType *IntIdxTy = VectorType::getInteger(FltIdxTy);
4723 Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
4724 NewCall = Builder.CreateCall(NewFn, Args);
4725 break;
4728 case Intrinsic::x86_sse41_ptestc:
4729 case Intrinsic::x86_sse41_ptestz:
4730 case Intrinsic::x86_sse41_ptestnzc: {
4731 // The arguments for these intrinsics used to be v4f32, and changed
4732 // to v2i64. This is purely a nop, since those are bitwise intrinsics.
4733 // So, the only thing required is a bitcast for both arguments.
4734 // First, check the arguments have the old type.
4735 Value *Arg0 = CI->getArgOperand(0);
4736 if (Arg0->getType() != FixedVectorType::get(Type::getFloatTy(C), 4))
4737 return;
4739 // Old intrinsic, add bitcasts
4740 Value *Arg1 = CI->getArgOperand(1);
4742 auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
4744 Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast");
4745 Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
4747 NewCall = Builder.CreateCall(NewFn, {BC0, BC1});
4748 break;
4751 case Intrinsic::x86_rdtscp: {
4752 // This used to take 1 arguments. If we have no arguments, it is already
4753 // upgraded.
4754 if (CI->getNumOperands() == 0)
4755 return;
4757 NewCall = Builder.CreateCall(NewFn);
4758 // Extract the second result and store it.
4759 Value *Data = Builder.CreateExtractValue(NewCall, 1);
4760 Builder.CreateAlignedStore(Data, CI->getArgOperand(0), Align(1));
4761 // Replace the original call result with the first result of the new call.
4762 Value *TSC = Builder.CreateExtractValue(NewCall, 0);
4764 NewCall->takeName(CI);
4765 CI->replaceAllUsesWith(TSC);
4766 CI->eraseFromParent();
4767 return;
4770 case Intrinsic::x86_sse41_insertps:
4771 case Intrinsic::x86_sse41_dppd:
4772 case Intrinsic::x86_sse41_dpps:
4773 case Intrinsic::x86_sse41_mpsadbw:
4774 case Intrinsic::x86_avx_dp_ps_256:
4775 case Intrinsic::x86_avx2_mpsadbw: {
4776 // Need to truncate the last argument from i32 to i8 -- this argument models
4777 // an inherently 8-bit immediate operand to these x86 instructions.
4778 SmallVector<Value *, 4> Args(CI->args());
4780 // Replace the last argument with a trunc.
4781 Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
4782 NewCall = Builder.CreateCall(NewFn, Args);
4783 break;
4786 case Intrinsic::x86_avx512_mask_cmp_pd_128:
4787 case Intrinsic::x86_avx512_mask_cmp_pd_256:
4788 case Intrinsic::x86_avx512_mask_cmp_pd_512:
4789 case Intrinsic::x86_avx512_mask_cmp_ps_128:
4790 case Intrinsic::x86_avx512_mask_cmp_ps_256:
4791 case Intrinsic::x86_avx512_mask_cmp_ps_512: {
4792 SmallVector<Value *, 4> Args(CI->args());
4793 unsigned NumElts =
4794 cast<FixedVectorType>(Args[0]->getType())->getNumElements();
4795 Args[3] = getX86MaskVec(Builder, Args[3], NumElts);
4797 NewCall = Builder.CreateCall(NewFn, Args);
4798 Value *Res = applyX86MaskOn1BitsVec(Builder, NewCall, nullptr);
4800 NewCall->takeName(CI);
4801 CI->replaceAllUsesWith(Res);
4802 CI->eraseFromParent();
4803 return;
4806 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128:
4807 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256:
4808 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512:
4809 case Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128:
4810 case Intrinsic::x86_avx512bf16_cvtneps2bf16_256:
4811 case Intrinsic::x86_avx512bf16_cvtneps2bf16_512: {
4812 SmallVector<Value *, 4> Args(CI->args());
4813 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
4814 if (NewFn->getIntrinsicID() ==
4815 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
4816 Args[1] = Builder.CreateBitCast(
4817 Args[1], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
4819 NewCall = Builder.CreateCall(NewFn, Args);
4820 Value *Res = Builder.CreateBitCast(
4821 NewCall, FixedVectorType::get(Builder.getInt16Ty(), NumElts));
4823 NewCall->takeName(CI);
4824 CI->replaceAllUsesWith(Res);
4825 CI->eraseFromParent();
4826 return;
4828 case Intrinsic::x86_avx512bf16_dpbf16ps_128:
4829 case Intrinsic::x86_avx512bf16_dpbf16ps_256:
4830 case Intrinsic::x86_avx512bf16_dpbf16ps_512:{
4831 SmallVector<Value *, 4> Args(CI->args());
4832 unsigned NumElts =
4833 cast<FixedVectorType>(CI->getType())->getNumElements() * 2;
4834 Args[1] = Builder.CreateBitCast(
4835 Args[1], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
4836 Args[2] = Builder.CreateBitCast(
4837 Args[2], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
4839 NewCall = Builder.CreateCall(NewFn, Args);
4840 break;
4843 case Intrinsic::thread_pointer: {
4844 NewCall = Builder.CreateCall(NewFn, {});
4845 break;
4848 case Intrinsic::memcpy:
4849 case Intrinsic::memmove:
4850 case Intrinsic::memset: {
4851 // We have to make sure that the call signature is what we're expecting.
4852 // We only want to change the old signatures by removing the alignment arg:
4853 // @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i32, i1)
4854 // -> @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i1)
4855 // @llvm.memset...(i8*, i8, i[32|64], i32, i1)
4856 // -> @llvm.memset...(i8*, i8, i[32|64], i1)
4857 // Note: i8*'s in the above can be any pointer type
4858 if (CI->arg_size() != 5) {
4859 DefaultCase();
4860 return;
4862 // Remove alignment argument (3), and add alignment attributes to the
4863 // dest/src pointers.
4864 Value *Args[4] = {CI->getArgOperand(0), CI->getArgOperand(1),
4865 CI->getArgOperand(2), CI->getArgOperand(4)};
4866 NewCall = Builder.CreateCall(NewFn, Args);
4867 AttributeList OldAttrs = CI->getAttributes();
4868 AttributeList NewAttrs = AttributeList::get(
4869 C, OldAttrs.getFnAttrs(), OldAttrs.getRetAttrs(),
4870 {OldAttrs.getParamAttrs(0), OldAttrs.getParamAttrs(1),
4871 OldAttrs.getParamAttrs(2), OldAttrs.getParamAttrs(4)});
4872 NewCall->setAttributes(NewAttrs);
4873 auto *MemCI = cast<MemIntrinsic>(NewCall);
4874 // All mem intrinsics support dest alignment.
4875 const ConstantInt *Align = cast<ConstantInt>(CI->getArgOperand(3));
4876 MemCI->setDestAlignment(Align->getMaybeAlignValue());
4877 // Memcpy/Memmove also support source alignment.
4878 if (auto *MTI = dyn_cast<MemTransferInst>(MemCI))
4879 MTI->setSourceAlignment(Align->getMaybeAlignValue());
4880 break;
4883 assert(NewCall && "Should have either set this variable or returned through "
4884 "the default case");
4885 NewCall->takeName(CI);
4886 CI->replaceAllUsesWith(NewCall);
4887 CI->eraseFromParent();
4890 void llvm::UpgradeCallsToIntrinsic(Function *F) {
4891 assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
4893 // Check if this function should be upgraded and get the replacement function
4894 // if there is one.
4895 Function *NewFn;
4896 if (UpgradeIntrinsicFunction(F, NewFn)) {
4897 // Replace all users of the old function with the new function or new
4898 // instructions. This is not a range loop because the call is deleted.
4899 for (User *U : make_early_inc_range(F->users()))
4900 if (CallBase *CB = dyn_cast<CallBase>(U))
4901 UpgradeIntrinsicCall(CB, NewFn);
4903 // Remove old function, no longer used, from the module.
4904 F->eraseFromParent();
4908 MDNode *llvm::UpgradeTBAANode(MDNode &MD) {
4909 const unsigned NumOperands = MD.getNumOperands();
4910 if (NumOperands == 0)
4911 return &MD; // Invalid, punt to a verifier error.
4913 // Check if the tag uses struct-path aware TBAA format.
4914 if (isa<MDNode>(MD.getOperand(0)) && NumOperands >= 3)
4915 return &MD;
4917 auto &Context = MD.getContext();
4918 if (NumOperands == 3) {
4919 Metadata *Elts[] = {MD.getOperand(0), MD.getOperand(1)};
4920 MDNode *ScalarType = MDNode::get(Context, Elts);
4921 // Create a MDNode <ScalarType, ScalarType, offset 0, const>
4922 Metadata *Elts2[] = {ScalarType, ScalarType,
4923 ConstantAsMetadata::get(
4924 Constant::getNullValue(Type::getInt64Ty(Context))),
4925 MD.getOperand(2)};
4926 return MDNode::get(Context, Elts2);
4928 // Create a MDNode <MD, MD, offset 0>
4929 Metadata *Elts[] = {&MD, &MD, ConstantAsMetadata::get(Constant::getNullValue(
4930 Type::getInt64Ty(Context)))};
4931 return MDNode::get(Context, Elts);
4934 Instruction *llvm::UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy,
4935 Instruction *&Temp) {
4936 if (Opc != Instruction::BitCast)
4937 return nullptr;
4939 Temp = nullptr;
4940 Type *SrcTy = V->getType();
4941 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
4942 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
4943 LLVMContext &Context = V->getContext();
4945 // We have no information about target data layout, so we assume that
4946 // the maximum pointer size is 64bit.
4947 Type *MidTy = Type::getInt64Ty(Context);
4948 Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy);
4950 return CastInst::Create(Instruction::IntToPtr, Temp, DestTy);
4953 return nullptr;
4956 Constant *llvm::UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy) {
4957 if (Opc != Instruction::BitCast)
4958 return nullptr;
4960 Type *SrcTy = C->getType();
4961 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
4962 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
4963 LLVMContext &Context = C->getContext();
4965 // We have no information about target data layout, so we assume that
4966 // the maximum pointer size is 64bit.
4967 Type *MidTy = Type::getInt64Ty(Context);
4969 return ConstantExpr::getIntToPtr(ConstantExpr::getPtrToInt(C, MidTy),
4970 DestTy);
4973 return nullptr;
4976 /// Check the debug info version number, if it is out-dated, drop the debug
4977 /// info. Return true if module is modified.
4978 bool llvm::UpgradeDebugInfo(Module &M) {
4979 if (DisableAutoUpgradeDebugInfo)
4980 return false;
4982 // We need to get metadata before the module is verified (i.e., getModuleFlag
4983 // makes assumptions that we haven't verified yet). Carefully extract the flag
4984 // from the metadata.
4985 unsigned Version = 0;
4986 if (NamedMDNode *ModFlags = M.getModuleFlagsMetadata()) {
4987 auto OpIt = find_if(ModFlags->operands(), [](const MDNode *Flag) {
4988 if (Flag->getNumOperands() < 3)
4989 return false;
4990 if (MDString *K = dyn_cast_or_null<MDString>(Flag->getOperand(1)))
4991 return K->getString() == "Debug Info Version";
4992 return false;
4994 if (OpIt != ModFlags->op_end()) {
4995 const MDOperand &ValOp = (*OpIt)->getOperand(2);
4996 if (auto *CI = mdconst::dyn_extract_or_null<ConstantInt>(ValOp))
4997 Version = CI->getZExtValue();
5001 if (Version == DEBUG_METADATA_VERSION) {
5002 bool BrokenDebugInfo = false;
5003 if (verifyModule(M, &llvm::errs(), &BrokenDebugInfo))
5004 report_fatal_error("Broken module found, compilation aborted!");
5005 if (!BrokenDebugInfo)
5006 // Everything is ok.
5007 return false;
5008 else {
5009 // Diagnose malformed debug info.
5010 DiagnosticInfoIgnoringInvalidDebugMetadata Diag(M);
5011 M.getContext().diagnose(Diag);
5014 bool Modified = StripDebugInfo(M);
5015 if (Modified && Version != DEBUG_METADATA_VERSION) {
5016 // Diagnose a version mismatch.
5017 DiagnosticInfoDebugMetadataVersion DiagVersion(M, Version);
5018 M.getContext().diagnose(DiagVersion);
5020 return Modified;
5023 bool static upgradeSingleNVVMAnnotation(GlobalValue *GV, StringRef K,
5024 const Metadata *V) {
5025 if (K == "kernel") {
5026 if (!mdconst::extract<ConstantInt>(V)->isZero())
5027 cast<Function>(GV)->setCallingConv(CallingConv::PTX_Kernel);
5028 return true;
5030 if (K == "align") {
5031 // V is a bitfeild specifying two 16-bit values. The alignment value is
5032 // specfied in low 16-bits, The index is specified in the high bits. For the
5033 // index, 0 indicates the return value while higher values correspond to
5034 // each parameter (idx = param + 1).
5035 const uint64_t AlignIdxValuePair =
5036 mdconst::extract<ConstantInt>(V)->getZExtValue();
5037 const unsigned Idx = (AlignIdxValuePair >> 16);
5038 const Align StackAlign = Align(AlignIdxValuePair & 0xFFFF);
5039 // TODO: Skip adding the stackalign attribute for returns, for now.
5040 if (!Idx)
5041 return false;
5042 cast<Function>(GV)->addAttributeAtIndex(
5043 Idx, Attribute::getWithStackAlignment(GV->getContext(), StackAlign));
5044 return true;
5047 return false;
5050 void llvm::UpgradeNVVMAnnotations(Module &M) {
5051 NamedMDNode *NamedMD = M.getNamedMetadata("nvvm.annotations");
5052 if (!NamedMD)
5053 return;
5055 SmallVector<MDNode *, 8> NewNodes;
5056 SmallSet<const MDNode *, 8> SeenNodes;
5057 for (MDNode *MD : NamedMD->operands()) {
5058 if (!SeenNodes.insert(MD).second)
5059 continue;
5061 auto *GV = mdconst::dyn_extract_or_null<GlobalValue>(MD->getOperand(0));
5062 if (!GV)
5063 continue;
5065 assert((MD->getNumOperands() % 2) == 1 && "Invalid number of operands");
5067 SmallVector<Metadata *, 8> NewOperands{MD->getOperand(0)};
5068 // Each nvvm.annotations metadata entry will be of the following form:
5069 // !{ ptr @gv, !"key1", value1, !"key2", value2, ... }
5070 // start index = 1, to skip the global variable key
5071 // increment = 2, to skip the value for each property-value pairs
5072 for (unsigned j = 1, je = MD->getNumOperands(); j < je; j += 2) {
5073 MDString *K = cast<MDString>(MD->getOperand(j));
5074 const MDOperand &V = MD->getOperand(j + 1);
5075 bool Upgraded = upgradeSingleNVVMAnnotation(GV, K->getString(), V);
5076 if (!Upgraded)
5077 NewOperands.append({K, V});
5080 if (NewOperands.size() > 1)
5081 NewNodes.push_back(MDNode::get(M.getContext(), NewOperands));
5084 NamedMD->clearOperands();
5085 for (MDNode *N : NewNodes)
5086 NamedMD->addOperand(N);
5089 /// This checks for objc retain release marker which should be upgraded. It
5090 /// returns true if module is modified.
5091 static bool upgradeRetainReleaseMarker(Module &M) {
5092 bool Changed = false;
5093 const char *MarkerKey = "clang.arc.retainAutoreleasedReturnValueMarker";
5094 NamedMDNode *ModRetainReleaseMarker = M.getNamedMetadata(MarkerKey);
5095 if (ModRetainReleaseMarker) {
5096 MDNode *Op = ModRetainReleaseMarker->getOperand(0);
5097 if (Op) {
5098 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(0));
5099 if (ID) {
5100 SmallVector<StringRef, 4> ValueComp;
5101 ID->getString().split(ValueComp, "#");
5102 if (ValueComp.size() == 2) {
5103 std::string NewValue = ValueComp[0].str() + ";" + ValueComp[1].str();
5104 ID = MDString::get(M.getContext(), NewValue);
5106 M.addModuleFlag(Module::Error, MarkerKey, ID);
5107 M.eraseNamedMetadata(ModRetainReleaseMarker);
5108 Changed = true;
5112 return Changed;
5115 void llvm::UpgradeARCRuntime(Module &M) {
5116 // This lambda converts normal function calls to ARC runtime functions to
5117 // intrinsic calls.
5118 auto UpgradeToIntrinsic = [&](const char *OldFunc,
5119 llvm::Intrinsic::ID IntrinsicFunc) {
5120 Function *Fn = M.getFunction(OldFunc);
5122 if (!Fn)
5123 return;
5125 Function *NewFn =
5126 llvm::Intrinsic::getOrInsertDeclaration(&M, IntrinsicFunc);
5128 for (User *U : make_early_inc_range(Fn->users())) {
5129 CallInst *CI = dyn_cast<CallInst>(U);
5130 if (!CI || CI->getCalledFunction() != Fn)
5131 continue;
5133 IRBuilder<> Builder(CI->getParent(), CI->getIterator());
5134 FunctionType *NewFuncTy = NewFn->getFunctionType();
5135 SmallVector<Value *, 2> Args;
5137 // Don't upgrade the intrinsic if it's not valid to bitcast the return
5138 // value to the return type of the old function.
5139 if (NewFuncTy->getReturnType() != CI->getType() &&
5140 !CastInst::castIsValid(Instruction::BitCast, CI,
5141 NewFuncTy->getReturnType()))
5142 continue;
5144 bool InvalidCast = false;
5146 for (unsigned I = 0, E = CI->arg_size(); I != E; ++I) {
5147 Value *Arg = CI->getArgOperand(I);
5149 // Bitcast argument to the parameter type of the new function if it's
5150 // not a variadic argument.
5151 if (I < NewFuncTy->getNumParams()) {
5152 // Don't upgrade the intrinsic if it's not valid to bitcast the argument
5153 // to the parameter type of the new function.
5154 if (!CastInst::castIsValid(Instruction::BitCast, Arg,
5155 NewFuncTy->getParamType(I))) {
5156 InvalidCast = true;
5157 break;
5159 Arg = Builder.CreateBitCast(Arg, NewFuncTy->getParamType(I));
5161 Args.push_back(Arg);
5164 if (InvalidCast)
5165 continue;
5167 // Create a call instruction that calls the new function.
5168 CallInst *NewCall = Builder.CreateCall(NewFuncTy, NewFn, Args);
5169 NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
5170 NewCall->takeName(CI);
5172 // Bitcast the return value back to the type of the old call.
5173 Value *NewRetVal = Builder.CreateBitCast(NewCall, CI->getType());
5175 if (!CI->use_empty())
5176 CI->replaceAllUsesWith(NewRetVal);
5177 CI->eraseFromParent();
5180 if (Fn->use_empty())
5181 Fn->eraseFromParent();
5184 // Unconditionally convert a call to "clang.arc.use" to a call to
5185 // "llvm.objc.clang.arc.use".
5186 UpgradeToIntrinsic("clang.arc.use", llvm::Intrinsic::objc_clang_arc_use);
5188 // Upgrade the retain release marker. If there is no need to upgrade
5189 // the marker, that means either the module is already new enough to contain
5190 // new intrinsics or it is not ARC. There is no need to upgrade runtime call.
5191 if (!upgradeRetainReleaseMarker(M))
5192 return;
5194 std::pair<const char *, llvm::Intrinsic::ID> RuntimeFuncs[] = {
5195 {"objc_autorelease", llvm::Intrinsic::objc_autorelease},
5196 {"objc_autoreleasePoolPop", llvm::Intrinsic::objc_autoreleasePoolPop},
5197 {"objc_autoreleasePoolPush", llvm::Intrinsic::objc_autoreleasePoolPush},
5198 {"objc_autoreleaseReturnValue",
5199 llvm::Intrinsic::objc_autoreleaseReturnValue},
5200 {"objc_copyWeak", llvm::Intrinsic::objc_copyWeak},
5201 {"objc_destroyWeak", llvm::Intrinsic::objc_destroyWeak},
5202 {"objc_initWeak", llvm::Intrinsic::objc_initWeak},
5203 {"objc_loadWeak", llvm::Intrinsic::objc_loadWeak},
5204 {"objc_loadWeakRetained", llvm::Intrinsic::objc_loadWeakRetained},
5205 {"objc_moveWeak", llvm::Intrinsic::objc_moveWeak},
5206 {"objc_release", llvm::Intrinsic::objc_release},
5207 {"objc_retain", llvm::Intrinsic::objc_retain},
5208 {"objc_retainAutorelease", llvm::Intrinsic::objc_retainAutorelease},
5209 {"objc_retainAutoreleaseReturnValue",
5210 llvm::Intrinsic::objc_retainAutoreleaseReturnValue},
5211 {"objc_retainAutoreleasedReturnValue",
5212 llvm::Intrinsic::objc_retainAutoreleasedReturnValue},
5213 {"objc_retainBlock", llvm::Intrinsic::objc_retainBlock},
5214 {"objc_storeStrong", llvm::Intrinsic::objc_storeStrong},
5215 {"objc_storeWeak", llvm::Intrinsic::objc_storeWeak},
5216 {"objc_unsafeClaimAutoreleasedReturnValue",
5217 llvm::Intrinsic::objc_unsafeClaimAutoreleasedReturnValue},
5218 {"objc_retainedObject", llvm::Intrinsic::objc_retainedObject},
5219 {"objc_unretainedObject", llvm::Intrinsic::objc_unretainedObject},
5220 {"objc_unretainedPointer", llvm::Intrinsic::objc_unretainedPointer},
5221 {"objc_retain_autorelease", llvm::Intrinsic::objc_retain_autorelease},
5222 {"objc_sync_enter", llvm::Intrinsic::objc_sync_enter},
5223 {"objc_sync_exit", llvm::Intrinsic::objc_sync_exit},
5224 {"objc_arc_annotation_topdown_bbstart",
5225 llvm::Intrinsic::objc_arc_annotation_topdown_bbstart},
5226 {"objc_arc_annotation_topdown_bbend",
5227 llvm::Intrinsic::objc_arc_annotation_topdown_bbend},
5228 {"objc_arc_annotation_bottomup_bbstart",
5229 llvm::Intrinsic::objc_arc_annotation_bottomup_bbstart},
5230 {"objc_arc_annotation_bottomup_bbend",
5231 llvm::Intrinsic::objc_arc_annotation_bottomup_bbend}};
5233 for (auto &I : RuntimeFuncs)
5234 UpgradeToIntrinsic(I.first, I.second);
5237 bool llvm::UpgradeModuleFlags(Module &M) {
5238 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
5239 if (!ModFlags)
5240 return false;
5242 bool HasObjCFlag = false, HasClassProperties = false, Changed = false;
5243 bool HasSwiftVersionFlag = false;
5244 uint8_t SwiftMajorVersion, SwiftMinorVersion;
5245 uint32_t SwiftABIVersion;
5246 auto Int8Ty = Type::getInt8Ty(M.getContext());
5247 auto Int32Ty = Type::getInt32Ty(M.getContext());
5249 for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
5250 MDNode *Op = ModFlags->getOperand(I);
5251 if (Op->getNumOperands() != 3)
5252 continue;
5253 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
5254 if (!ID)
5255 continue;
5256 auto SetBehavior = [&](Module::ModFlagBehavior B) {
5257 Metadata *Ops[3] = {ConstantAsMetadata::get(ConstantInt::get(
5258 Type::getInt32Ty(M.getContext()), B)),
5259 MDString::get(M.getContext(), ID->getString()),
5260 Op->getOperand(2)};
5261 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
5262 Changed = true;
5265 if (ID->getString() == "Objective-C Image Info Version")
5266 HasObjCFlag = true;
5267 if (ID->getString() == "Objective-C Class Properties")
5268 HasClassProperties = true;
5269 // Upgrade PIC from Error/Max to Min.
5270 if (ID->getString() == "PIC Level") {
5271 if (auto *Behavior =
5272 mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0))) {
5273 uint64_t V = Behavior->getLimitedValue();
5274 if (V == Module::Error || V == Module::Max)
5275 SetBehavior(Module::Min);
5278 // Upgrade "PIE Level" from Error to Max.
5279 if (ID->getString() == "PIE Level")
5280 if (auto *Behavior =
5281 mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0)))
5282 if (Behavior->getLimitedValue() == Module::Error)
5283 SetBehavior(Module::Max);
5285 // Upgrade branch protection and return address signing module flags. The
5286 // module flag behavior for these fields were Error and now they are Min.
5287 if (ID->getString() == "branch-target-enforcement" ||
5288 ID->getString().starts_with("sign-return-address")) {
5289 if (auto *Behavior =
5290 mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0))) {
5291 if (Behavior->getLimitedValue() == Module::Error) {
5292 Type *Int32Ty = Type::getInt32Ty(M.getContext());
5293 Metadata *Ops[3] = {
5294 ConstantAsMetadata::get(ConstantInt::get(Int32Ty, Module::Min)),
5295 Op->getOperand(1), Op->getOperand(2)};
5296 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
5297 Changed = true;
5302 // Upgrade Objective-C Image Info Section. Removed the whitespce in the
5303 // section name so that llvm-lto will not complain about mismatching
5304 // module flags that is functionally the same.
5305 if (ID->getString() == "Objective-C Image Info Section") {
5306 if (auto *Value = dyn_cast_or_null<MDString>(Op->getOperand(2))) {
5307 SmallVector<StringRef, 4> ValueComp;
5308 Value->getString().split(ValueComp, " ");
5309 if (ValueComp.size() != 1) {
5310 std::string NewValue;
5311 for (auto &S : ValueComp)
5312 NewValue += S.str();
5313 Metadata *Ops[3] = {Op->getOperand(0), Op->getOperand(1),
5314 MDString::get(M.getContext(), NewValue)};
5315 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
5316 Changed = true;
5321 // IRUpgrader turns a i32 type "Objective-C Garbage Collection" into i8 value.
5322 // If the higher bits are set, it adds new module flag for swift info.
5323 if (ID->getString() == "Objective-C Garbage Collection") {
5324 auto Md = dyn_cast<ConstantAsMetadata>(Op->getOperand(2));
5325 if (Md) {
5326 assert(Md->getValue() && "Expected non-empty metadata");
5327 auto Type = Md->getValue()->getType();
5328 if (Type == Int8Ty)
5329 continue;
5330 unsigned Val = Md->getValue()->getUniqueInteger().getZExtValue();
5331 if ((Val & 0xff) != Val) {
5332 HasSwiftVersionFlag = true;
5333 SwiftABIVersion = (Val & 0xff00) >> 8;
5334 SwiftMajorVersion = (Val & 0xff000000) >> 24;
5335 SwiftMinorVersion = (Val & 0xff0000) >> 16;
5337 Metadata *Ops[3] = {
5338 ConstantAsMetadata::get(ConstantInt::get(Int32Ty,Module::Error)),
5339 Op->getOperand(1),
5340 ConstantAsMetadata::get(ConstantInt::get(Int8Ty,Val & 0xff))};
5341 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
5342 Changed = true;
5346 if (ID->getString() == "amdgpu_code_object_version") {
5347 Metadata *Ops[3] = {
5348 Op->getOperand(0),
5349 MDString::get(M.getContext(), "amdhsa_code_object_version"),
5350 Op->getOperand(2)};
5351 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
5352 Changed = true;
5356 // "Objective-C Class Properties" is recently added for Objective-C. We
5357 // upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module
5358 // flag of value 0, so we can correclty downgrade this flag when trying to
5359 // link an ObjC bitcode without this module flag with an ObjC bitcode with
5360 // this module flag.
5361 if (HasObjCFlag && !HasClassProperties) {
5362 M.addModuleFlag(llvm::Module::Override, "Objective-C Class Properties",
5363 (uint32_t)0);
5364 Changed = true;
5367 if (HasSwiftVersionFlag) {
5368 M.addModuleFlag(Module::Error, "Swift ABI Version",
5369 SwiftABIVersion);
5370 M.addModuleFlag(Module::Error, "Swift Major Version",
5371 ConstantInt::get(Int8Ty, SwiftMajorVersion));
5372 M.addModuleFlag(Module::Error, "Swift Minor Version",
5373 ConstantInt::get(Int8Ty, SwiftMinorVersion));
5374 Changed = true;
5377 return Changed;
5380 void llvm::UpgradeSectionAttributes(Module &M) {
5381 auto TrimSpaces = [](StringRef Section) -> std::string {
5382 SmallVector<StringRef, 5> Components;
5383 Section.split(Components, ',');
5385 SmallString<32> Buffer;
5386 raw_svector_ostream OS(Buffer);
5388 for (auto Component : Components)
5389 OS << ',' << Component.trim();
5391 return std::string(OS.str().substr(1));
5394 for (auto &GV : M.globals()) {
5395 if (!GV.hasSection())
5396 continue;
5398 StringRef Section = GV.getSection();
5400 if (!Section.starts_with("__DATA, __objc_catlist"))
5401 continue;
5403 // __DATA, __objc_catlist, regular, no_dead_strip
5404 // __DATA,__objc_catlist,regular,no_dead_strip
5405 GV.setSection(TrimSpaces(Section));
5409 namespace {
5410 // Prior to LLVM 10.0, the strictfp attribute could be used on individual
5411 // callsites within a function that did not also have the strictfp attribute.
5412 // Since 10.0, if strict FP semantics are needed within a function, the
5413 // function must have the strictfp attribute and all calls within the function
5414 // must also have the strictfp attribute. This latter restriction is
5415 // necessary to prevent unwanted libcall simplification when a function is
5416 // being cloned (such as for inlining).
5418 // The "dangling" strictfp attribute usage was only used to prevent constant
5419 // folding and other libcall simplification. The nobuiltin attribute on the
5420 // callsite has the same effect.
5421 struct StrictFPUpgradeVisitor : public InstVisitor<StrictFPUpgradeVisitor> {
5422 StrictFPUpgradeVisitor() = default;
5424 void visitCallBase(CallBase &Call) {
5425 if (!Call.isStrictFP())
5426 return;
5427 if (isa<ConstrainedFPIntrinsic>(&Call))
5428 return;
5429 // If we get here, the caller doesn't have the strictfp attribute
5430 // but this callsite does. Replace the strictfp attribute with nobuiltin.
5431 Call.removeFnAttr(Attribute::StrictFP);
5432 Call.addFnAttr(Attribute::NoBuiltin);
5436 /// Replace "amdgpu-unsafe-fp-atomics" metadata with atomicrmw metadata
5437 struct AMDGPUUnsafeFPAtomicsUpgradeVisitor
5438 : public InstVisitor<AMDGPUUnsafeFPAtomicsUpgradeVisitor> {
5439 AMDGPUUnsafeFPAtomicsUpgradeVisitor() = default;
5441 void visitAtomicRMWInst(AtomicRMWInst &RMW) {
5442 if (!RMW.isFloatingPointOperation())
5443 return;
5445 MDNode *Empty = MDNode::get(RMW.getContext(), {});
5446 RMW.setMetadata("amdgpu.no.fine.grained.host.memory", Empty);
5447 RMW.setMetadata("amdgpu.no.remote.memory.access", Empty);
5448 RMW.setMetadata("amdgpu.ignore.denormal.mode", Empty);
5451 } // namespace
5453 void llvm::UpgradeFunctionAttributes(Function &F) {
5454 // If a function definition doesn't have the strictfp attribute,
5455 // convert any callsite strictfp attributes to nobuiltin.
5456 if (!F.isDeclaration() && !F.hasFnAttribute(Attribute::StrictFP)) {
5457 StrictFPUpgradeVisitor SFPV;
5458 SFPV.visit(F);
5461 // Remove all incompatibile attributes from function.
5462 F.removeRetAttrs(AttributeFuncs::typeIncompatible(
5463 F.getReturnType(), F.getAttributes().getRetAttrs()));
5464 for (auto &Arg : F.args())
5465 Arg.removeAttrs(
5466 AttributeFuncs::typeIncompatible(Arg.getType(), Arg.getAttributes()));
5468 // Older versions of LLVM treated an "implicit-section-name" attribute
5469 // similarly to directly setting the section on a Function.
5470 if (Attribute A = F.getFnAttribute("implicit-section-name");
5471 A.isValid() && A.isStringAttribute()) {
5472 F.setSection(A.getValueAsString());
5473 F.removeFnAttr("implicit-section-name");
5476 if (!F.empty()) {
5477 // For some reason this is called twice, and the first time is before any
5478 // instructions are loaded into the body.
5480 if (Attribute A = F.getFnAttribute("amdgpu-unsafe-fp-atomics");
5481 A.isValid()) {
5483 if (A.getValueAsBool()) {
5484 AMDGPUUnsafeFPAtomicsUpgradeVisitor Visitor;
5485 Visitor.visit(F);
5488 // We will leave behind dead attribute uses on external declarations, but
5489 // clang never added these to declarations anyway.
5490 F.removeFnAttr("amdgpu-unsafe-fp-atomics");
5495 static bool isOldLoopArgument(Metadata *MD) {
5496 auto *T = dyn_cast_or_null<MDTuple>(MD);
5497 if (!T)
5498 return false;
5499 if (T->getNumOperands() < 1)
5500 return false;
5501 auto *S = dyn_cast_or_null<MDString>(T->getOperand(0));
5502 if (!S)
5503 return false;
5504 return S->getString().starts_with("llvm.vectorizer.");
5507 static MDString *upgradeLoopTag(LLVMContext &C, StringRef OldTag) {
5508 StringRef OldPrefix = "llvm.vectorizer.";
5509 assert(OldTag.starts_with(OldPrefix) && "Expected old prefix");
5511 if (OldTag == "llvm.vectorizer.unroll")
5512 return MDString::get(C, "llvm.loop.interleave.count");
5514 return MDString::get(
5515 C, (Twine("llvm.loop.vectorize.") + OldTag.drop_front(OldPrefix.size()))
5516 .str());
5519 static Metadata *upgradeLoopArgument(Metadata *MD) {
5520 auto *T = dyn_cast_or_null<MDTuple>(MD);
5521 if (!T)
5522 return MD;
5523 if (T->getNumOperands() < 1)
5524 return MD;
5525 auto *OldTag = dyn_cast_or_null<MDString>(T->getOperand(0));
5526 if (!OldTag)
5527 return MD;
5528 if (!OldTag->getString().starts_with("llvm.vectorizer."))
5529 return MD;
5531 // This has an old tag. Upgrade it.
5532 SmallVector<Metadata *, 8> Ops;
5533 Ops.reserve(T->getNumOperands());
5534 Ops.push_back(upgradeLoopTag(T->getContext(), OldTag->getString()));
5535 for (unsigned I = 1, E = T->getNumOperands(); I != E; ++I)
5536 Ops.push_back(T->getOperand(I));
5538 return MDTuple::get(T->getContext(), Ops);
5541 MDNode *llvm::upgradeInstructionLoopAttachment(MDNode &N) {
5542 auto *T = dyn_cast<MDTuple>(&N);
5543 if (!T)
5544 return &N;
5546 if (none_of(T->operands(), isOldLoopArgument))
5547 return &N;
5549 SmallVector<Metadata *, 8> Ops;
5550 Ops.reserve(T->getNumOperands());
5551 for (Metadata *MD : T->operands())
5552 Ops.push_back(upgradeLoopArgument(MD));
5554 return MDTuple::get(T->getContext(), Ops);
5557 std::string llvm::UpgradeDataLayoutString(StringRef DL, StringRef TT) {
5558 Triple T(TT);
5559 // The only data layout upgrades needed for pre-GCN, SPIR or SPIRV are setting
5560 // the address space of globals to 1. This does not apply to SPIRV Logical.
5561 if (((T.isAMDGPU() && !T.isAMDGCN()) ||
5562 (T.isSPIR() || (T.isSPIRV() && !T.isSPIRVLogical()))) &&
5563 !DL.contains("-G") && !DL.starts_with("G")) {
5564 return DL.empty() ? std::string("G1") : (DL + "-G1").str();
5567 if (T.isLoongArch64() || T.isRISCV64()) {
5568 // Make i32 a native type for 64-bit LoongArch and RISC-V.
5569 auto I = DL.find("-n64-");
5570 if (I != StringRef::npos)
5571 return (DL.take_front(I) + "-n32:64-" + DL.drop_front(I + 5)).str();
5572 return DL.str();
5575 std::string Res = DL.str();
5576 // AMDGCN data layout upgrades.
5577 if (T.isAMDGCN()) {
5578 // Define address spaces for constants.
5579 if (!DL.contains("-G") && !DL.starts_with("G"))
5580 Res.append(Res.empty() ? "G1" : "-G1");
5582 // Add missing non-integral declarations.
5583 // This goes before adding new address spaces to prevent incoherent string
5584 // values.
5585 if (!DL.contains("-ni") && !DL.starts_with("ni"))
5586 Res.append("-ni:7:8:9");
5587 // Update ni:7 to ni:7:8:9.
5588 if (DL.ends_with("ni:7"))
5589 Res.append(":8:9");
5590 if (DL.ends_with("ni:7:8"))
5591 Res.append(":9");
5593 // Add sizing for address spaces 7 and 8 (fat raw buffers and buffer
5594 // resources) An empty data layout has already been upgraded to G1 by now.
5595 if (!DL.contains("-p7") && !DL.starts_with("p7"))
5596 Res.append("-p7:160:256:256:32");
5597 if (!DL.contains("-p8") && !DL.starts_with("p8"))
5598 Res.append("-p8:128:128");
5599 if (!DL.contains("-p9") && !DL.starts_with("p9"))
5600 Res.append("-p9:192:256:256:32");
5602 return Res;
5605 auto AddPtr32Ptr64AddrSpaces = [&DL, &Res]() {
5606 // If the datalayout matches the expected format, add pointer size address
5607 // spaces to the datalayout.
5608 StringRef AddrSpaces{"-p270:32:32-p271:32:32-p272:64:64"};
5609 if (!DL.contains(AddrSpaces)) {
5610 SmallVector<StringRef, 4> Groups;
5611 Regex R("^([Ee]-m:[a-z](-p:32:32)?)(-.*)$");
5612 if (R.match(Res, &Groups))
5613 Res = (Groups[1] + AddrSpaces + Groups[3]).str();
5617 // AArch64 data layout upgrades.
5618 if (T.isAArch64()) {
5619 // Add "-Fn32"
5620 if (!DL.empty() && !DL.contains("-Fn32"))
5621 Res.append("-Fn32");
5622 AddPtr32Ptr64AddrSpaces();
5623 return Res;
5626 if (T.isSPARC() || (T.isMIPS64() && !DL.contains("m:m")) || T.isPPC64() ||
5627 T.isWasm()) {
5628 // Mips64 with o32 ABI did not add "-i128:128".
5629 // Add "-i128:128"
5630 std::string I64 = "-i64:64";
5631 std::string I128 = "-i128:128";
5632 if (!StringRef(Res).contains(I128)) {
5633 size_t Pos = Res.find(I64);
5634 if (Pos != size_t(-1))
5635 Res.insert(Pos + I64.size(), I128);
5637 return Res;
5640 if (!T.isX86())
5641 return Res;
5643 AddPtr32Ptr64AddrSpaces();
5645 // i128 values need to be 16-byte-aligned. LLVM already called into libgcc
5646 // for i128 operations prior to this being reflected in the data layout, and
5647 // clang mostly produced LLVM IR that already aligned i128 to 16 byte
5648 // boundaries, so although this is a breaking change, the upgrade is expected
5649 // to fix more IR than it breaks.
5650 // Intel MCU is an exception and uses 4-byte-alignment.
5651 if (!T.isOSIAMCU()) {
5652 std::string I128 = "-i128:128";
5653 if (StringRef Ref = Res; !Ref.contains(I128)) {
5654 SmallVector<StringRef, 4> Groups;
5655 Regex R("^(e(-[mpi][^-]*)*)((-[^mpi][^-]*)*)$");
5656 if (R.match(Res, &Groups))
5657 Res = (Groups[1] + I128 + Groups[3]).str();
5661 // For 32-bit MSVC targets, raise the alignment of f80 values to 16 bytes.
5662 // Raising the alignment is safe because Clang did not produce f80 values in
5663 // the MSVC environment before this upgrade was added.
5664 if (T.isWindowsMSVCEnvironment() && !T.isArch64Bit()) {
5665 StringRef Ref = Res;
5666 auto I = Ref.find("-f80:32-");
5667 if (I != StringRef::npos)
5668 Res = (Ref.take_front(I) + "-f80:128-" + Ref.drop_front(I + 8)).str();
5671 return Res;
5674 void llvm::UpgradeAttributes(AttrBuilder &B) {
5675 StringRef FramePointer;
5676 Attribute A = B.getAttribute("no-frame-pointer-elim");
5677 if (A.isValid()) {
5678 // The value can be "true" or "false".
5679 FramePointer = A.getValueAsString() == "true" ? "all" : "none";
5680 B.removeAttribute("no-frame-pointer-elim");
5682 if (B.contains("no-frame-pointer-elim-non-leaf")) {
5683 // The value is ignored. "no-frame-pointer-elim"="true" takes priority.
5684 if (FramePointer != "all")
5685 FramePointer = "non-leaf";
5686 B.removeAttribute("no-frame-pointer-elim-non-leaf");
5688 if (!FramePointer.empty())
5689 B.addAttribute("frame-pointer", FramePointer);
5691 A = B.getAttribute("null-pointer-is-valid");
5692 if (A.isValid()) {
5693 // The value can be "true" or "false".
5694 bool NullPointerIsValid = A.getValueAsString() == "true";
5695 B.removeAttribute("null-pointer-is-valid");
5696 if (NullPointerIsValid)
5697 B.addAttribute(Attribute::NullPointerIsValid);
5701 void llvm::UpgradeOperandBundles(std::vector<OperandBundleDef> &Bundles) {
5702 // clang.arc.attachedcall bundles are now required to have an operand.
5703 // If they don't, it's okay to drop them entirely: when there is an operand,
5704 // the "attachedcall" is meaningful and required, but without an operand,
5705 // it's just a marker NOP. Dropping it merely prevents an optimization.
5706 erase_if(Bundles, [&](OperandBundleDef &OBD) {
5707 return OBD.getTag() == "clang.arc.attachedcall" &&
5708 OBD.inputs().empty();