1 //===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements the auto-upgrade helper functions.
10 // This is where deprecated IR intrinsics and other IR features are updated to
11 // current specifications.
13 //===----------------------------------------------------------------------===//
15 #include "llvm/IR/AutoUpgrade.h"
16 #include "llvm/ADT/StringRef.h"
17 #include "llvm/ADT/StringSwitch.h"
18 #include "llvm/BinaryFormat/Dwarf.h"
19 #include "llvm/IR/AttributeMask.h"
20 #include "llvm/IR/CallingConv.h"
21 #include "llvm/IR/Constants.h"
22 #include "llvm/IR/DebugInfo.h"
23 #include "llvm/IR/DebugInfoMetadata.h"
24 #include "llvm/IR/DiagnosticInfo.h"
25 #include "llvm/IR/Function.h"
26 #include "llvm/IR/IRBuilder.h"
27 #include "llvm/IR/InstVisitor.h"
28 #include "llvm/IR/Instruction.h"
29 #include "llvm/IR/IntrinsicInst.h"
30 #include "llvm/IR/Intrinsics.h"
31 #include "llvm/IR/IntrinsicsAArch64.h"
32 #include "llvm/IR/IntrinsicsARM.h"
33 #include "llvm/IR/IntrinsicsNVPTX.h"
34 #include "llvm/IR/IntrinsicsRISCV.h"
35 #include "llvm/IR/IntrinsicsWebAssembly.h"
36 #include "llvm/IR/IntrinsicsX86.h"
37 #include "llvm/IR/LLVMContext.h"
38 #include "llvm/IR/MDBuilder.h"
39 #include "llvm/IR/Metadata.h"
40 #include "llvm/IR/Module.h"
41 #include "llvm/IR/Value.h"
42 #include "llvm/IR/Verifier.h"
43 #include "llvm/Support/AMDGPUAddrSpace.h"
44 #include "llvm/Support/CommandLine.h"
45 #include "llvm/Support/ErrorHandling.h"
46 #include "llvm/Support/Regex.h"
47 #include "llvm/TargetParser/Triple.h"
54 DisableAutoUpgradeDebugInfo("disable-auto-upgrade-debug-info",
55 cl::desc("Disable autoupgrade of debug info"));
57 static void rename(GlobalValue
*GV
) { GV
->setName(GV
->getName() + ".old"); }
59 // Upgrade the declarations of the SSE4.1 ptest intrinsics whose arguments have
60 // changed their type from v4f32 to v2i64.
61 static bool upgradePTESTIntrinsic(Function
*F
, Intrinsic::ID IID
,
63 // Check whether this is an old version of the function, which received
65 Type
*Arg0Type
= F
->getFunctionType()->getParamType(0);
66 if (Arg0Type
!= FixedVectorType::get(Type::getFloatTy(F
->getContext()), 4))
69 // Yes, it's old, replace it with new version.
71 NewFn
= Intrinsic::getOrInsertDeclaration(F
->getParent(), IID
);
75 // Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
76 // arguments have changed their type from i32 to i8.
77 static bool upgradeX86IntrinsicsWith8BitMask(Function
*F
, Intrinsic::ID IID
,
79 // Check that the last argument is an i32.
80 Type
*LastArgType
= F
->getFunctionType()->getParamType(
81 F
->getFunctionType()->getNumParams() - 1);
82 if (!LastArgType
->isIntegerTy(32))
85 // Move this function aside and map down.
87 NewFn
= Intrinsic::getOrInsertDeclaration(F
->getParent(), IID
);
91 // Upgrade the declaration of fp compare intrinsics that change return type
92 // from scalar to vXi1 mask.
93 static bool upgradeX86MaskedFPCompare(Function
*F
, Intrinsic::ID IID
,
95 // Check if the return type is a vector.
96 if (F
->getReturnType()->isVectorTy())
100 NewFn
= Intrinsic::getOrInsertDeclaration(F
->getParent(), IID
);
104 static bool upgradeX86BF16Intrinsic(Function
*F
, Intrinsic::ID IID
,
106 if (F
->getReturnType()->getScalarType()->isBFloatTy())
110 NewFn
= Intrinsic::getOrInsertDeclaration(F
->getParent(), IID
);
114 static bool upgradeX86BF16DPIntrinsic(Function
*F
, Intrinsic::ID IID
,
116 if (F
->getFunctionType()->getParamType(1)->getScalarType()->isBFloatTy())
120 NewFn
= Intrinsic::getOrInsertDeclaration(F
->getParent(), IID
);
124 static bool shouldUpgradeX86Intrinsic(Function
*F
, StringRef Name
) {
125 // All of the intrinsics matches below should be marked with which llvm
126 // version started autoupgrading them. At some point in the future we would
127 // like to use this information to remove upgrade code for some older
128 // intrinsics. It is currently undecided how we will determine that future
130 if (Name
.consume_front("avx."))
131 return (Name
.starts_with("blend.p") || // Added in 3.7
132 Name
== "cvt.ps2.pd.256" || // Added in 3.9
133 Name
== "cvtdq2.pd.256" || // Added in 3.9
134 Name
== "cvtdq2.ps.256" || // Added in 7.0
135 Name
.starts_with("movnt.") || // Added in 3.2
136 Name
.starts_with("sqrt.p") || // Added in 7.0
137 Name
.starts_with("storeu.") || // Added in 3.9
138 Name
.starts_with("vbroadcast.s") || // Added in 3.5
139 Name
.starts_with("vbroadcastf128") || // Added in 4.0
140 Name
.starts_with("vextractf128.") || // Added in 3.7
141 Name
.starts_with("vinsertf128.") || // Added in 3.7
142 Name
.starts_with("vperm2f128.") || // Added in 6.0
143 Name
.starts_with("vpermil.")); // Added in 3.1
145 if (Name
.consume_front("avx2."))
146 return (Name
== "movntdqa" || // Added in 5.0
147 Name
.starts_with("pabs.") || // Added in 6.0
148 Name
.starts_with("padds.") || // Added in 8.0
149 Name
.starts_with("paddus.") || // Added in 8.0
150 Name
.starts_with("pblendd.") || // Added in 3.7
151 Name
== "pblendw" || // Added in 3.7
152 Name
.starts_with("pbroadcast") || // Added in 3.8
153 Name
.starts_with("pcmpeq.") || // Added in 3.1
154 Name
.starts_with("pcmpgt.") || // Added in 3.1
155 Name
.starts_with("pmax") || // Added in 3.9
156 Name
.starts_with("pmin") || // Added in 3.9
157 Name
.starts_with("pmovsx") || // Added in 3.9
158 Name
.starts_with("pmovzx") || // Added in 3.9
159 Name
== "pmul.dq" || // Added in 7.0
160 Name
== "pmulu.dq" || // Added in 7.0
161 Name
.starts_with("psll.dq") || // Added in 3.7
162 Name
.starts_with("psrl.dq") || // Added in 3.7
163 Name
.starts_with("psubs.") || // Added in 8.0
164 Name
.starts_with("psubus.") || // Added in 8.0
165 Name
.starts_with("vbroadcast") || // Added in 3.8
166 Name
== "vbroadcasti128" || // Added in 3.7
167 Name
== "vextracti128" || // Added in 3.7
168 Name
== "vinserti128" || // Added in 3.7
169 Name
== "vperm2i128"); // Added in 6.0
171 if (Name
.consume_front("avx512.")) {
172 if (Name
.consume_front("mask."))
174 return (Name
.starts_with("add.p") || // Added in 7.0. 128/256 in 4.0
175 Name
.starts_with("and.") || // Added in 3.9
176 Name
.starts_with("andn.") || // Added in 3.9
177 Name
.starts_with("broadcast.s") || // Added in 3.9
178 Name
.starts_with("broadcastf32x4.") || // Added in 6.0
179 Name
.starts_with("broadcastf32x8.") || // Added in 6.0
180 Name
.starts_with("broadcastf64x2.") || // Added in 6.0
181 Name
.starts_with("broadcastf64x4.") || // Added in 6.0
182 Name
.starts_with("broadcasti32x4.") || // Added in 6.0
183 Name
.starts_with("broadcasti32x8.") || // Added in 6.0
184 Name
.starts_with("broadcasti64x2.") || // Added in 6.0
185 Name
.starts_with("broadcasti64x4.") || // Added in 6.0
186 Name
.starts_with("cmp.b") || // Added in 5.0
187 Name
.starts_with("cmp.d") || // Added in 5.0
188 Name
.starts_with("cmp.q") || // Added in 5.0
189 Name
.starts_with("cmp.w") || // Added in 5.0
190 Name
.starts_with("compress.b") || // Added in 9.0
191 Name
.starts_with("compress.d") || // Added in 9.0
192 Name
.starts_with("compress.p") || // Added in 9.0
193 Name
.starts_with("compress.q") || // Added in 9.0
194 Name
.starts_with("compress.store.") || // Added in 7.0
195 Name
.starts_with("compress.w") || // Added in 9.0
196 Name
.starts_with("conflict.") || // Added in 9.0
197 Name
.starts_with("cvtdq2pd.") || // Added in 4.0
198 Name
.starts_with("cvtdq2ps.") || // Added in 7.0 updated 9.0
199 Name
== "cvtpd2dq.256" || // Added in 7.0
200 Name
== "cvtpd2ps.256" || // Added in 7.0
201 Name
== "cvtps2pd.128" || // Added in 7.0
202 Name
== "cvtps2pd.256" || // Added in 7.0
203 Name
.starts_with("cvtqq2pd.") || // Added in 7.0 updated 9.0
204 Name
== "cvtqq2ps.256" || // Added in 9.0
205 Name
== "cvtqq2ps.512" || // Added in 9.0
206 Name
== "cvttpd2dq.256" || // Added in 7.0
207 Name
== "cvttps2dq.128" || // Added in 7.0
208 Name
== "cvttps2dq.256" || // Added in 7.0
209 Name
.starts_with("cvtudq2pd.") || // Added in 4.0
210 Name
.starts_with("cvtudq2ps.") || // Added in 7.0 updated 9.0
211 Name
.starts_with("cvtuqq2pd.") || // Added in 7.0 updated 9.0
212 Name
== "cvtuqq2ps.256" || // Added in 9.0
213 Name
== "cvtuqq2ps.512" || // Added in 9.0
214 Name
.starts_with("dbpsadbw.") || // Added in 7.0
215 Name
.starts_with("div.p") || // Added in 7.0. 128/256 in 4.0
216 Name
.starts_with("expand.b") || // Added in 9.0
217 Name
.starts_with("expand.d") || // Added in 9.0
218 Name
.starts_with("expand.load.") || // Added in 7.0
219 Name
.starts_with("expand.p") || // Added in 9.0
220 Name
.starts_with("expand.q") || // Added in 9.0
221 Name
.starts_with("expand.w") || // Added in 9.0
222 Name
.starts_with("fpclass.p") || // Added in 7.0
223 Name
.starts_with("insert") || // Added in 4.0
224 Name
.starts_with("load.") || // Added in 3.9
225 Name
.starts_with("loadu.") || // Added in 3.9
226 Name
.starts_with("lzcnt.") || // Added in 5.0
227 Name
.starts_with("max.p") || // Added in 7.0. 128/256 in 5.0
228 Name
.starts_with("min.p") || // Added in 7.0. 128/256 in 5.0
229 Name
.starts_with("movddup") || // Added in 3.9
230 Name
.starts_with("move.s") || // Added in 4.0
231 Name
.starts_with("movshdup") || // Added in 3.9
232 Name
.starts_with("movsldup") || // Added in 3.9
233 Name
.starts_with("mul.p") || // Added in 7.0. 128/256 in 4.0
234 Name
.starts_with("or.") || // Added in 3.9
235 Name
.starts_with("pabs.") || // Added in 6.0
236 Name
.starts_with("packssdw.") || // Added in 5.0
237 Name
.starts_with("packsswb.") || // Added in 5.0
238 Name
.starts_with("packusdw.") || // Added in 5.0
239 Name
.starts_with("packuswb.") || // Added in 5.0
240 Name
.starts_with("padd.") || // Added in 4.0
241 Name
.starts_with("padds.") || // Added in 8.0
242 Name
.starts_with("paddus.") || // Added in 8.0
243 Name
.starts_with("palignr.") || // Added in 3.9
244 Name
.starts_with("pand.") || // Added in 3.9
245 Name
.starts_with("pandn.") || // Added in 3.9
246 Name
.starts_with("pavg") || // Added in 6.0
247 Name
.starts_with("pbroadcast") || // Added in 6.0
248 Name
.starts_with("pcmpeq.") || // Added in 3.9
249 Name
.starts_with("pcmpgt.") || // Added in 3.9
250 Name
.starts_with("perm.df.") || // Added in 3.9
251 Name
.starts_with("perm.di.") || // Added in 3.9
252 Name
.starts_with("permvar.") || // Added in 7.0
253 Name
.starts_with("pmaddubs.w.") || // Added in 7.0
254 Name
.starts_with("pmaddw.d.") || // Added in 7.0
255 Name
.starts_with("pmax") || // Added in 4.0
256 Name
.starts_with("pmin") || // Added in 4.0
257 Name
== "pmov.qd.256" || // Added in 9.0
258 Name
== "pmov.qd.512" || // Added in 9.0
259 Name
== "pmov.wb.256" || // Added in 9.0
260 Name
== "pmov.wb.512" || // Added in 9.0
261 Name
.starts_with("pmovsx") || // Added in 4.0
262 Name
.starts_with("pmovzx") || // Added in 4.0
263 Name
.starts_with("pmul.dq.") || // Added in 4.0
264 Name
.starts_with("pmul.hr.sw.") || // Added in 7.0
265 Name
.starts_with("pmulh.w.") || // Added in 7.0
266 Name
.starts_with("pmulhu.w.") || // Added in 7.0
267 Name
.starts_with("pmull.") || // Added in 4.0
268 Name
.starts_with("pmultishift.qb.") || // Added in 8.0
269 Name
.starts_with("pmulu.dq.") || // Added in 4.0
270 Name
.starts_with("por.") || // Added in 3.9
271 Name
.starts_with("prol.") || // Added in 8.0
272 Name
.starts_with("prolv.") || // Added in 8.0
273 Name
.starts_with("pror.") || // Added in 8.0
274 Name
.starts_with("prorv.") || // Added in 8.0
275 Name
.starts_with("pshuf.b.") || // Added in 4.0
276 Name
.starts_with("pshuf.d.") || // Added in 3.9
277 Name
.starts_with("pshufh.w.") || // Added in 3.9
278 Name
.starts_with("pshufl.w.") || // Added in 3.9
279 Name
.starts_with("psll.d") || // Added in 4.0
280 Name
.starts_with("psll.q") || // Added in 4.0
281 Name
.starts_with("psll.w") || // Added in 4.0
282 Name
.starts_with("pslli") || // Added in 4.0
283 Name
.starts_with("psllv") || // Added in 4.0
284 Name
.starts_with("psra.d") || // Added in 4.0
285 Name
.starts_with("psra.q") || // Added in 4.0
286 Name
.starts_with("psra.w") || // Added in 4.0
287 Name
.starts_with("psrai") || // Added in 4.0
288 Name
.starts_with("psrav") || // Added in 4.0
289 Name
.starts_with("psrl.d") || // Added in 4.0
290 Name
.starts_with("psrl.q") || // Added in 4.0
291 Name
.starts_with("psrl.w") || // Added in 4.0
292 Name
.starts_with("psrli") || // Added in 4.0
293 Name
.starts_with("psrlv") || // Added in 4.0
294 Name
.starts_with("psub.") || // Added in 4.0
295 Name
.starts_with("psubs.") || // Added in 8.0
296 Name
.starts_with("psubus.") || // Added in 8.0
297 Name
.starts_with("pternlog.") || // Added in 7.0
298 Name
.starts_with("punpckh") || // Added in 3.9
299 Name
.starts_with("punpckl") || // Added in 3.9
300 Name
.starts_with("pxor.") || // Added in 3.9
301 Name
.starts_with("shuf.f") || // Added in 6.0
302 Name
.starts_with("shuf.i") || // Added in 6.0
303 Name
.starts_with("shuf.p") || // Added in 4.0
304 Name
.starts_with("sqrt.p") || // Added in 7.0
305 Name
.starts_with("store.b.") || // Added in 3.9
306 Name
.starts_with("store.d.") || // Added in 3.9
307 Name
.starts_with("store.p") || // Added in 3.9
308 Name
.starts_with("store.q.") || // Added in 3.9
309 Name
.starts_with("store.w.") || // Added in 3.9
310 Name
== "store.ss" || // Added in 7.0
311 Name
.starts_with("storeu.") || // Added in 3.9
312 Name
.starts_with("sub.p") || // Added in 7.0. 128/256 in 4.0
313 Name
.starts_with("ucmp.") || // Added in 5.0
314 Name
.starts_with("unpckh.") || // Added in 3.9
315 Name
.starts_with("unpckl.") || // Added in 3.9
316 Name
.starts_with("valign.") || // Added in 4.0
317 Name
== "vcvtph2ps.128" || // Added in 11.0
318 Name
== "vcvtph2ps.256" || // Added in 11.0
319 Name
.starts_with("vextract") || // Added in 4.0
320 Name
.starts_with("vfmadd.") || // Added in 7.0
321 Name
.starts_with("vfmaddsub.") || // Added in 7.0
322 Name
.starts_with("vfnmadd.") || // Added in 7.0
323 Name
.starts_with("vfnmsub.") || // Added in 7.0
324 Name
.starts_with("vpdpbusd.") || // Added in 7.0
325 Name
.starts_with("vpdpbusds.") || // Added in 7.0
326 Name
.starts_with("vpdpwssd.") || // Added in 7.0
327 Name
.starts_with("vpdpwssds.") || // Added in 7.0
328 Name
.starts_with("vpermi2var.") || // Added in 7.0
329 Name
.starts_with("vpermil.p") || // Added in 3.9
330 Name
.starts_with("vpermilvar.") || // Added in 4.0
331 Name
.starts_with("vpermt2var.") || // Added in 7.0
332 Name
.starts_with("vpmadd52") || // Added in 7.0
333 Name
.starts_with("vpshld.") || // Added in 7.0
334 Name
.starts_with("vpshldv.") || // Added in 8.0
335 Name
.starts_with("vpshrd.") || // Added in 7.0
336 Name
.starts_with("vpshrdv.") || // Added in 8.0
337 Name
.starts_with("vpshufbitqmb.") || // Added in 8.0
338 Name
.starts_with("xor.")); // Added in 3.9
340 if (Name
.consume_front("mask3."))
342 return (Name
.starts_with("vfmadd.") || // Added in 7.0
343 Name
.starts_with("vfmaddsub.") || // Added in 7.0
344 Name
.starts_with("vfmsub.") || // Added in 7.0
345 Name
.starts_with("vfmsubadd.") || // Added in 7.0
346 Name
.starts_with("vfnmsub.")); // Added in 7.0
348 if (Name
.consume_front("maskz."))
350 return (Name
.starts_with("pternlog.") || // Added in 7.0
351 Name
.starts_with("vfmadd.") || // Added in 7.0
352 Name
.starts_with("vfmaddsub.") || // Added in 7.0
353 Name
.starts_with("vpdpbusd.") || // Added in 7.0
354 Name
.starts_with("vpdpbusds.") || // Added in 7.0
355 Name
.starts_with("vpdpwssd.") || // Added in 7.0
356 Name
.starts_with("vpdpwssds.") || // Added in 7.0
357 Name
.starts_with("vpermt2var.") || // Added in 7.0
358 Name
.starts_with("vpmadd52") || // Added in 7.0
359 Name
.starts_with("vpshldv.") || // Added in 8.0
360 Name
.starts_with("vpshrdv.")); // Added in 8.0
363 return (Name
== "movntdqa" || // Added in 5.0
364 Name
== "pmul.dq.512" || // Added in 7.0
365 Name
== "pmulu.dq.512" || // Added in 7.0
366 Name
.starts_with("broadcastm") || // Added in 6.0
367 Name
.starts_with("cmp.p") || // Added in 12.0
368 Name
.starts_with("cvtb2mask.") || // Added in 7.0
369 Name
.starts_with("cvtd2mask.") || // Added in 7.0
370 Name
.starts_with("cvtmask2") || // Added in 5.0
371 Name
.starts_with("cvtq2mask.") || // Added in 7.0
372 Name
== "cvtusi2sd" || // Added in 7.0
373 Name
.starts_with("cvtw2mask.") || // Added in 7.0
374 Name
== "kand.w" || // Added in 7.0
375 Name
== "kandn.w" || // Added in 7.0
376 Name
== "knot.w" || // Added in 7.0
377 Name
== "kor.w" || // Added in 7.0
378 Name
== "kortestc.w" || // Added in 7.0
379 Name
== "kortestz.w" || // Added in 7.0
380 Name
.starts_with("kunpck") || // added in 6.0
381 Name
== "kxnor.w" || // Added in 7.0
382 Name
== "kxor.w" || // Added in 7.0
383 Name
.starts_with("padds.") || // Added in 8.0
384 Name
.starts_with("pbroadcast") || // Added in 3.9
385 Name
.starts_with("prol") || // Added in 8.0
386 Name
.starts_with("pror") || // Added in 8.0
387 Name
.starts_with("psll.dq") || // Added in 3.9
388 Name
.starts_with("psrl.dq") || // Added in 3.9
389 Name
.starts_with("psubs.") || // Added in 8.0
390 Name
.starts_with("ptestm") || // Added in 6.0
391 Name
.starts_with("ptestnm") || // Added in 6.0
392 Name
.starts_with("storent.") || // Added in 3.9
393 Name
.starts_with("vbroadcast.s") || // Added in 7.0
394 Name
.starts_with("vpshld.") || // Added in 8.0
395 Name
.starts_with("vpshrd.")); // Added in 8.0
398 if (Name
.consume_front("fma."))
399 return (Name
.starts_with("vfmadd.") || // Added in 7.0
400 Name
.starts_with("vfmsub.") || // Added in 7.0
401 Name
.starts_with("vfmsubadd.") || // Added in 7.0
402 Name
.starts_with("vfnmadd.") || // Added in 7.0
403 Name
.starts_with("vfnmsub.")); // Added in 7.0
405 if (Name
.consume_front("fma4."))
406 return Name
.starts_with("vfmadd.s"); // Added in 7.0
408 if (Name
.consume_front("sse."))
409 return (Name
== "add.ss" || // Added in 4.0
410 Name
== "cvtsi2ss" || // Added in 7.0
411 Name
== "cvtsi642ss" || // Added in 7.0
412 Name
== "div.ss" || // Added in 4.0
413 Name
== "mul.ss" || // Added in 4.0
414 Name
.starts_with("sqrt.p") || // Added in 7.0
415 Name
== "sqrt.ss" || // Added in 7.0
416 Name
.starts_with("storeu.") || // Added in 3.9
417 Name
== "sub.ss"); // Added in 4.0
419 if (Name
.consume_front("sse2."))
420 return (Name
== "add.sd" || // Added in 4.0
421 Name
== "cvtdq2pd" || // Added in 3.9
422 Name
== "cvtdq2ps" || // Added in 7.0
423 Name
== "cvtps2pd" || // Added in 3.9
424 Name
== "cvtsi2sd" || // Added in 7.0
425 Name
== "cvtsi642sd" || // Added in 7.0
426 Name
== "cvtss2sd" || // Added in 7.0
427 Name
== "div.sd" || // Added in 4.0
428 Name
== "mul.sd" || // Added in 4.0
429 Name
.starts_with("padds.") || // Added in 8.0
430 Name
.starts_with("paddus.") || // Added in 8.0
431 Name
.starts_with("pcmpeq.") || // Added in 3.1
432 Name
.starts_with("pcmpgt.") || // Added in 3.1
433 Name
== "pmaxs.w" || // Added in 3.9
434 Name
== "pmaxu.b" || // Added in 3.9
435 Name
== "pmins.w" || // Added in 3.9
436 Name
== "pminu.b" || // Added in 3.9
437 Name
== "pmulu.dq" || // Added in 7.0
438 Name
.starts_with("pshuf") || // Added in 3.9
439 Name
.starts_with("psll.dq") || // Added in 3.7
440 Name
.starts_with("psrl.dq") || // Added in 3.7
441 Name
.starts_with("psubs.") || // Added in 8.0
442 Name
.starts_with("psubus.") || // Added in 8.0
443 Name
.starts_with("sqrt.p") || // Added in 7.0
444 Name
== "sqrt.sd" || // Added in 7.0
445 Name
== "storel.dq" || // Added in 3.9
446 Name
.starts_with("storeu.") || // Added in 3.9
447 Name
== "sub.sd"); // Added in 4.0
449 if (Name
.consume_front("sse41."))
450 return (Name
.starts_with("blendp") || // Added in 3.7
451 Name
== "movntdqa" || // Added in 5.0
452 Name
== "pblendw" || // Added in 3.7
453 Name
== "pmaxsb" || // Added in 3.9
454 Name
== "pmaxsd" || // Added in 3.9
455 Name
== "pmaxud" || // Added in 3.9
456 Name
== "pmaxuw" || // Added in 3.9
457 Name
== "pminsb" || // Added in 3.9
458 Name
== "pminsd" || // Added in 3.9
459 Name
== "pminud" || // Added in 3.9
460 Name
== "pminuw" || // Added in 3.9
461 Name
.starts_with("pmovsx") || // Added in 3.8
462 Name
.starts_with("pmovzx") || // Added in 3.9
463 Name
== "pmuldq"); // Added in 7.0
465 if (Name
.consume_front("sse42."))
466 return Name
== "crc32.64.8"; // Added in 3.4
468 if (Name
.consume_front("sse4a."))
469 return Name
.starts_with("movnt."); // Added in 3.9
471 if (Name
.consume_front("ssse3."))
472 return (Name
== "pabs.b.128" || // Added in 6.0
473 Name
== "pabs.d.128" || // Added in 6.0
474 Name
== "pabs.w.128"); // Added in 6.0
476 if (Name
.consume_front("xop."))
477 return (Name
== "vpcmov" || // Added in 3.8
478 Name
== "vpcmov.256" || // Added in 5.0
479 Name
.starts_with("vpcom") || // Added in 3.2, Updated in 9.0
480 Name
.starts_with("vprot")); // Added in 8.0
482 return (Name
== "addcarry.u32" || // Added in 8.0
483 Name
== "addcarry.u64" || // Added in 8.0
484 Name
== "addcarryx.u32" || // Added in 8.0
485 Name
== "addcarryx.u64" || // Added in 8.0
486 Name
== "subborrow.u32" || // Added in 8.0
487 Name
== "subborrow.u64" || // Added in 8.0
488 Name
.starts_with("vcvtph2ps.")); // Added in 11.0
491 static bool upgradeX86IntrinsicFunction(Function
*F
, StringRef Name
,
493 // Only handle intrinsics that start with "x86.".
494 if (!Name
.consume_front("x86."))
497 if (shouldUpgradeX86Intrinsic(F
, Name
)) {
502 if (Name
== "rdtscp") { // Added in 8.0
503 // If this intrinsic has 0 operands, it's the new version.
504 if (F
->getFunctionType()->getNumParams() == 0)
508 NewFn
= Intrinsic::getOrInsertDeclaration(F
->getParent(),
509 Intrinsic::x86_rdtscp
);
515 // SSE4.1 ptest functions may have an old signature.
516 if (Name
.consume_front("sse41.ptest")) { // Added in 3.2
517 ID
= StringSwitch
<Intrinsic::ID
>(Name
)
518 .Case("c", Intrinsic::x86_sse41_ptestc
)
519 .Case("z", Intrinsic::x86_sse41_ptestz
)
520 .Case("nzc", Intrinsic::x86_sse41_ptestnzc
)
521 .Default(Intrinsic::not_intrinsic
);
522 if (ID
!= Intrinsic::not_intrinsic
)
523 return upgradePTESTIntrinsic(F
, ID
, NewFn
);
528 // Several blend and other instructions with masks used the wrong number of
532 ID
= StringSwitch
<Intrinsic::ID
>(Name
)
533 .Case("sse41.insertps", Intrinsic::x86_sse41_insertps
)
534 .Case("sse41.dppd", Intrinsic::x86_sse41_dppd
)
535 .Case("sse41.dpps", Intrinsic::x86_sse41_dpps
)
536 .Case("sse41.mpsadbw", Intrinsic::x86_sse41_mpsadbw
)
537 .Case("avx.dp.ps.256", Intrinsic::x86_avx_dp_ps_256
)
538 .Case("avx2.mpsadbw", Intrinsic::x86_avx2_mpsadbw
)
539 .Default(Intrinsic::not_intrinsic
);
540 if (ID
!= Intrinsic::not_intrinsic
)
541 return upgradeX86IntrinsicsWith8BitMask(F
, ID
, NewFn
);
543 if (Name
.consume_front("avx512.mask.cmp.")) {
545 ID
= StringSwitch
<Intrinsic::ID
>(Name
)
546 .Case("pd.128", Intrinsic::x86_avx512_mask_cmp_pd_128
)
547 .Case("pd.256", Intrinsic::x86_avx512_mask_cmp_pd_256
)
548 .Case("pd.512", Intrinsic::x86_avx512_mask_cmp_pd_512
)
549 .Case("ps.128", Intrinsic::x86_avx512_mask_cmp_ps_128
)
550 .Case("ps.256", Intrinsic::x86_avx512_mask_cmp_ps_256
)
551 .Case("ps.512", Intrinsic::x86_avx512_mask_cmp_ps_512
)
552 .Default(Intrinsic::not_intrinsic
);
553 if (ID
!= Intrinsic::not_intrinsic
)
554 return upgradeX86MaskedFPCompare(F
, ID
, NewFn
);
555 return false; // No other 'x86.avx523.mask.cmp.*'.
558 if (Name
.consume_front("avx512bf16.")) {
560 ID
= StringSwitch
<Intrinsic::ID
>(Name
)
561 .Case("cvtne2ps2bf16.128",
562 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128
)
563 .Case("cvtne2ps2bf16.256",
564 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256
)
565 .Case("cvtne2ps2bf16.512",
566 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512
)
567 .Case("mask.cvtneps2bf16.128",
568 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128
)
569 .Case("cvtneps2bf16.256",
570 Intrinsic::x86_avx512bf16_cvtneps2bf16_256
)
571 .Case("cvtneps2bf16.512",
572 Intrinsic::x86_avx512bf16_cvtneps2bf16_512
)
573 .Default(Intrinsic::not_intrinsic
);
574 if (ID
!= Intrinsic::not_intrinsic
)
575 return upgradeX86BF16Intrinsic(F
, ID
, NewFn
);
578 ID
= StringSwitch
<Intrinsic::ID
>(Name
)
579 .Case("dpbf16ps.128", Intrinsic::x86_avx512bf16_dpbf16ps_128
)
580 .Case("dpbf16ps.256", Intrinsic::x86_avx512bf16_dpbf16ps_256
)
581 .Case("dpbf16ps.512", Intrinsic::x86_avx512bf16_dpbf16ps_512
)
582 .Default(Intrinsic::not_intrinsic
);
583 if (ID
!= Intrinsic::not_intrinsic
)
584 return upgradeX86BF16DPIntrinsic(F
, ID
, NewFn
);
585 return false; // No other 'x86.avx512bf16.*'.
588 if (Name
.consume_front("xop.")) {
589 Intrinsic::ID ID
= Intrinsic::not_intrinsic
;
590 if (Name
.starts_with("vpermil2")) { // Added in 3.9
591 // Upgrade any XOP PERMIL2 index operand still using a float/double
593 auto Idx
= F
->getFunctionType()->getParamType(2);
594 if (Idx
->isFPOrFPVectorTy()) {
595 unsigned IdxSize
= Idx
->getPrimitiveSizeInBits();
596 unsigned EltSize
= Idx
->getScalarSizeInBits();
597 if (EltSize
== 64 && IdxSize
== 128)
598 ID
= Intrinsic::x86_xop_vpermil2pd
;
599 else if (EltSize
== 32 && IdxSize
== 128)
600 ID
= Intrinsic::x86_xop_vpermil2ps
;
601 else if (EltSize
== 64 && IdxSize
== 256)
602 ID
= Intrinsic::x86_xop_vpermil2pd_256
;
604 ID
= Intrinsic::x86_xop_vpermil2ps_256
;
606 } else if (F
->arg_size() == 2)
607 // frcz.ss/sd may need to have an argument dropped. Added in 3.2
608 ID
= StringSwitch
<Intrinsic::ID
>(Name
)
609 .Case("vfrcz.ss", Intrinsic::x86_xop_vfrcz_ss
)
610 .Case("vfrcz.sd", Intrinsic::x86_xop_vfrcz_sd
)
611 .Default(Intrinsic::not_intrinsic
);
613 if (ID
!= Intrinsic::not_intrinsic
) {
615 NewFn
= Intrinsic::getOrInsertDeclaration(F
->getParent(), ID
);
618 return false; // No other 'x86.xop.*'
621 if (Name
== "seh.recoverfp") {
622 NewFn
= Intrinsic::getOrInsertDeclaration(F
->getParent(),
623 Intrinsic::eh_recoverfp
);
630 // Upgrade ARM (IsArm) or Aarch64 (!IsArm) intrinsic fns. Return true iff so.
631 // IsArm: 'arm.*', !IsArm: 'aarch64.*'.
632 static bool upgradeArmOrAarch64IntrinsicFunction(bool IsArm
, Function
*F
,
635 if (Name
.starts_with("rbit")) {
636 // '(arm|aarch64).rbit'.
637 NewFn
= Intrinsic::getOrInsertDeclaration(
638 F
->getParent(), Intrinsic::bitreverse
, F
->arg_begin()->getType());
642 if (Name
== "thread.pointer") {
643 // '(arm|aarch64).thread.pointer'.
644 NewFn
= Intrinsic::getOrInsertDeclaration(F
->getParent(),
645 Intrinsic::thread_pointer
);
649 bool Neon
= Name
.consume_front("neon.");
651 // '(arm|aarch64).neon.*'.
652 // Changed in 12.0: bfdot accept v4bf16 and v8bf16 instead of v8i8 and
653 // v16i8 respectively.
654 if (Name
.consume_front("bfdot.")) {
655 // (arm|aarch64).neon.bfdot.*'.
657 StringSwitch
<Intrinsic::ID
>(Name
)
658 .Cases("v2f32.v8i8", "v4f32.v16i8",
659 IsArm
? (Intrinsic::ID
)Intrinsic::arm_neon_bfdot
660 : (Intrinsic::ID
)Intrinsic::aarch64_neon_bfdot
)
661 .Default(Intrinsic::not_intrinsic
);
662 if (ID
!= Intrinsic::not_intrinsic
) {
663 size_t OperandWidth
= F
->getReturnType()->getPrimitiveSizeInBits();
664 assert((OperandWidth
== 64 || OperandWidth
== 128) &&
665 "Unexpected operand width");
666 LLVMContext
&Ctx
= F
->getParent()->getContext();
667 std::array
<Type
*, 2> Tys
{
669 FixedVectorType::get(Type::getBFloatTy(Ctx
), OperandWidth
/ 16)}};
670 NewFn
= Intrinsic::getOrInsertDeclaration(F
->getParent(), ID
, Tys
);
673 return false; // No other '(arm|aarch64).neon.bfdot.*'.
676 // Changed in 12.0: bfmmla, bfmlalb and bfmlalt are not polymorphic
677 // anymore and accept v8bf16 instead of v16i8.
678 if (Name
.consume_front("bfm")) {
679 // (arm|aarch64).neon.bfm*'.
680 if (Name
.consume_back(".v4f32.v16i8")) {
681 // (arm|aarch64).neon.bfm*.v4f32.v16i8'.
683 StringSwitch
<Intrinsic::ID
>(Name
)
685 IsArm
? (Intrinsic::ID
)Intrinsic::arm_neon_bfmmla
686 : (Intrinsic::ID
)Intrinsic::aarch64_neon_bfmmla
)
688 IsArm
? (Intrinsic::ID
)Intrinsic::arm_neon_bfmlalb
689 : (Intrinsic::ID
)Intrinsic::aarch64_neon_bfmlalb
)
691 IsArm
? (Intrinsic::ID
)Intrinsic::arm_neon_bfmlalt
692 : (Intrinsic::ID
)Intrinsic::aarch64_neon_bfmlalt
)
693 .Default(Intrinsic::not_intrinsic
);
694 if (ID
!= Intrinsic::not_intrinsic
) {
695 NewFn
= Intrinsic::getOrInsertDeclaration(F
->getParent(), ID
);
698 return false; // No other '(arm|aarch64).neon.bfm*.v16i8'.
700 return false; // No other '(arm|aarch64).neon.bfm*.
702 // Continue on to Aarch64 Neon or Arm Neon.
704 // Continue on to Arm or Aarch64.
710 Intrinsic::ID ID
= StringSwitch
<Intrinsic::ID
>(Name
)
711 .StartsWith("vclz.", Intrinsic::ctlz
)
712 .StartsWith("vcnt.", Intrinsic::ctpop
)
713 .StartsWith("vqadds.", Intrinsic::sadd_sat
)
714 .StartsWith("vqaddu.", Intrinsic::uadd_sat
)
715 .StartsWith("vqsubs.", Intrinsic::ssub_sat
)
716 .StartsWith("vqsubu.", Intrinsic::usub_sat
)
717 .Default(Intrinsic::not_intrinsic
);
718 if (ID
!= Intrinsic::not_intrinsic
) {
719 NewFn
= Intrinsic::getOrInsertDeclaration(F
->getParent(), ID
,
720 F
->arg_begin()->getType());
724 if (Name
.consume_front("vst")) {
726 static const Regex
vstRegex("^([1234]|[234]lane)\\.v[a-z0-9]*$");
727 SmallVector
<StringRef
, 2> Groups
;
728 if (vstRegex
.match(Name
, &Groups
)) {
729 static const Intrinsic::ID StoreInts
[] = {
730 Intrinsic::arm_neon_vst1
, Intrinsic::arm_neon_vst2
,
731 Intrinsic::arm_neon_vst3
, Intrinsic::arm_neon_vst4
};
733 static const Intrinsic::ID StoreLaneInts
[] = {
734 Intrinsic::arm_neon_vst2lane
, Intrinsic::arm_neon_vst3lane
,
735 Intrinsic::arm_neon_vst4lane
};
737 auto fArgs
= F
->getFunctionType()->params();
738 Type
*Tys
[] = {fArgs
[0], fArgs
[1]};
739 if (Groups
[1].size() == 1)
740 NewFn
= Intrinsic::getOrInsertDeclaration(
741 F
->getParent(), StoreInts
[fArgs
.size() - 3], Tys
);
743 NewFn
= Intrinsic::getOrInsertDeclaration(
744 F
->getParent(), StoreLaneInts
[fArgs
.size() - 5], Tys
);
747 return false; // No other 'arm.neon.vst*'.
750 return false; // No other 'arm.neon.*'.
753 if (Name
.consume_front("mve.")) {
755 if (Name
== "vctp64") {
756 if (cast
<FixedVectorType
>(F
->getReturnType())->getNumElements() == 4) {
757 // A vctp64 returning a v4i1 is converted to return a v2i1. Rename
758 // the function and deal with it below in UpgradeIntrinsicCall.
762 return false; // Not 'arm.mve.vctp64'.
765 // These too are changed to accept a v2i1 instead of the old v4i1.
766 if (Name
.consume_back(".v4i1")) {
768 if (Name
.consume_back(".predicated.v2i64.v4i32"))
769 // 'arm.mve.*.predicated.v2i64.v4i32.v4i1'
770 return Name
== "mull.int" || Name
== "vqdmull";
772 if (Name
.consume_back(".v2i64")) {
773 // 'arm.mve.*.v2i64.v4i1'
774 bool IsGather
= Name
.consume_front("vldr.gather.");
775 if (IsGather
|| Name
.consume_front("vstr.scatter.")) {
776 if (Name
.consume_front("base.")) {
777 // Optional 'wb.' prefix.
778 Name
.consume_front("wb.");
779 // 'arm.mve.(vldr.gather|vstr.scatter).base.(wb.)?
780 // predicated.v2i64.v2i64.v4i1'.
781 return Name
== "predicated.v2i64";
784 if (Name
.consume_front("offset.predicated."))
785 return Name
== (IsGather
? "v2i64.p0i64" : "p0i64.v2i64") ||
786 Name
== (IsGather
? "v2i64.p0" : "p0.v2i64");
788 // No other 'arm.mve.(vldr.gather|vstr.scatter).*.v2i64.v4i1'.
792 return false; // No other 'arm.mve.*.v2i64.v4i1'.
794 return false; // No other 'arm.mve.*.v4i1'.
796 return false; // No other 'arm.mve.*'.
799 if (Name
.consume_front("cde.vcx")) {
801 if (Name
.consume_back(".predicated.v2i64.v4i1"))
802 // 'arm.cde.vcx*.predicated.v2i64.v4i1'.
803 return Name
== "1q" || Name
== "1qa" || Name
== "2q" || Name
== "2qa" ||
804 Name
== "3q" || Name
== "3qa";
806 return false; // No other 'arm.cde.vcx*'.
812 Intrinsic::ID ID
= StringSwitch
<Intrinsic::ID
>(Name
)
813 .StartsWith("frintn", Intrinsic::roundeven
)
814 .StartsWith("rbit", Intrinsic::bitreverse
)
815 .Default(Intrinsic::not_intrinsic
);
816 if (ID
!= Intrinsic::not_intrinsic
) {
817 NewFn
= Intrinsic::getOrInsertDeclaration(F
->getParent(), ID
,
818 F
->arg_begin()->getType());
822 if (Name
.starts_with("addp")) {
823 // 'aarch64.neon.addp*'.
824 if (F
->arg_size() != 2)
825 return false; // Invalid IR.
826 VectorType
*Ty
= dyn_cast
<VectorType
>(F
->getReturnType());
827 if (Ty
&& Ty
->getElementType()->isFloatingPointTy()) {
828 NewFn
= Intrinsic::getOrInsertDeclaration(
829 F
->getParent(), Intrinsic::aarch64_neon_faddp
, Ty
);
834 // Changed in 20.0: bfcvt/bfcvtn/bcvtn2 have been replaced with fptrunc.
835 if (Name
.starts_with("bfcvt")) {
840 return false; // No other 'aarch64.neon.*'.
842 if (Name
.consume_front("sve.")) {
844 if (Name
.consume_front("bf")) {
845 if (Name
.consume_back(".lane")) {
846 // 'aarch64.sve.bf*.lane'.
848 StringSwitch
<Intrinsic::ID
>(Name
)
849 .Case("dot", Intrinsic::aarch64_sve_bfdot_lane_v2
)
850 .Case("mlalb", Intrinsic::aarch64_sve_bfmlalb_lane_v2
)
851 .Case("mlalt", Intrinsic::aarch64_sve_bfmlalt_lane_v2
)
852 .Default(Intrinsic::not_intrinsic
);
853 if (ID
!= Intrinsic::not_intrinsic
) {
854 NewFn
= Intrinsic::getOrInsertDeclaration(F
->getParent(), ID
);
857 return false; // No other 'aarch64.sve.bf*.lane'.
859 return false; // No other 'aarch64.sve.bf*'.
862 // 'aarch64.sve.fcvt.bf16f32' || 'aarch64.sve.fcvtnt.bf16f32'
863 if (Name
== "fcvt.bf16f32" || Name
== "fcvtnt.bf16f32") {
868 if (Name
.consume_front("addqv")) {
869 // 'aarch64.sve.addqv'.
870 if (!F
->getReturnType()->isFPOrFPVectorTy())
873 auto Args
= F
->getFunctionType()->params();
874 Type
*Tys
[] = {F
->getReturnType(), Args
[1]};
875 NewFn
= Intrinsic::getOrInsertDeclaration(
876 F
->getParent(), Intrinsic::aarch64_sve_faddqv
, Tys
);
880 if (Name
.consume_front("ld")) {
881 // 'aarch64.sve.ld*'.
882 static const Regex
LdRegex("^[234](.nxv[a-z0-9]+|$)");
883 if (LdRegex
.match(Name
)) {
885 cast
<VectorType
>(F
->getReturnType())->getElementType();
887 cast
<VectorType
>(F
->arg_begin()->getType())->getElementCount();
888 Type
*Ty
= VectorType::get(ScalarTy
, EC
);
889 static const Intrinsic::ID LoadIDs
[] = {
890 Intrinsic::aarch64_sve_ld2_sret
,
891 Intrinsic::aarch64_sve_ld3_sret
,
892 Intrinsic::aarch64_sve_ld4_sret
,
894 NewFn
= Intrinsic::getOrInsertDeclaration(F
->getParent(),
895 LoadIDs
[Name
[0] - '2'], Ty
);
898 return false; // No other 'aarch64.sve.ld*'.
901 if (Name
.consume_front("tuple.")) {
902 // 'aarch64.sve.tuple.*'.
903 if (Name
.starts_with("get")) {
904 // 'aarch64.sve.tuple.get*'.
905 Type
*Tys
[] = {F
->getReturnType(), F
->arg_begin()->getType()};
906 NewFn
= Intrinsic::getOrInsertDeclaration(
907 F
->getParent(), Intrinsic::vector_extract
, Tys
);
911 if (Name
.starts_with("set")) {
912 // 'aarch64.sve.tuple.set*'.
913 auto Args
= F
->getFunctionType()->params();
914 Type
*Tys
[] = {Args
[0], Args
[2], Args
[1]};
915 NewFn
= Intrinsic::getOrInsertDeclaration(
916 F
->getParent(), Intrinsic::vector_insert
, Tys
);
920 static const Regex
CreateTupleRegex("^create[234](.nxv[a-z0-9]+|$)");
921 if (CreateTupleRegex
.match(Name
)) {
922 // 'aarch64.sve.tuple.create*'.
923 auto Args
= F
->getFunctionType()->params();
924 Type
*Tys
[] = {F
->getReturnType(), Args
[1]};
925 NewFn
= Intrinsic::getOrInsertDeclaration(
926 F
->getParent(), Intrinsic::vector_insert
, Tys
);
929 return false; // No other 'aarch64.sve.tuple.*'.
931 return false; // No other 'aarch64.sve.*'.
934 return false; // No other 'arm.*', 'aarch64.*'.
937 static Intrinsic::ID
shouldUpgradeNVPTXBF16Intrinsic(StringRef Name
) {
938 if (Name
.consume_front("abs."))
939 return StringSwitch
<Intrinsic::ID
>(Name
)
940 .Case("bf16", Intrinsic::nvvm_abs_bf16
)
941 .Case("bf16x2", Intrinsic::nvvm_abs_bf16x2
)
942 .Default(Intrinsic::not_intrinsic
);
944 if (Name
.consume_front("fma.rn."))
945 return StringSwitch
<Intrinsic::ID
>(Name
)
946 .Case("bf16", Intrinsic::nvvm_fma_rn_bf16
)
947 .Case("bf16x2", Intrinsic::nvvm_fma_rn_bf16x2
)
948 .Case("ftz.bf16", Intrinsic::nvvm_fma_rn_ftz_bf16
)
949 .Case("ftz.bf16x2", Intrinsic::nvvm_fma_rn_ftz_bf16x2
)
950 .Case("ftz.relu.bf16", Intrinsic::nvvm_fma_rn_ftz_relu_bf16
)
951 .Case("ftz.relu.bf16x2", Intrinsic::nvvm_fma_rn_ftz_relu_bf16x2
)
952 .Case("ftz.sat.bf16", Intrinsic::nvvm_fma_rn_ftz_sat_bf16
)
953 .Case("ftz.sat.bf16x2", Intrinsic::nvvm_fma_rn_ftz_sat_bf16x2
)
954 .Case("relu.bf16", Intrinsic::nvvm_fma_rn_relu_bf16
)
955 .Case("relu.bf16x2", Intrinsic::nvvm_fma_rn_relu_bf16x2
)
956 .Case("sat.bf16", Intrinsic::nvvm_fma_rn_sat_bf16
)
957 .Case("sat.bf16x2", Intrinsic::nvvm_fma_rn_sat_bf16x2
)
958 .Default(Intrinsic::not_intrinsic
);
960 if (Name
.consume_front("fmax."))
961 return StringSwitch
<Intrinsic::ID
>(Name
)
962 .Case("bf16", Intrinsic::nvvm_fmax_bf16
)
963 .Case("bf16x2", Intrinsic::nvvm_fmax_bf16x2
)
964 .Case("ftz.bf16", Intrinsic::nvvm_fmax_ftz_bf16
)
965 .Case("ftz.bf16x2", Intrinsic::nvvm_fmax_ftz_bf16x2
)
966 .Case("ftz.nan.bf16", Intrinsic::nvvm_fmax_ftz_nan_bf16
)
967 .Case("ftz.nan.bf16x2", Intrinsic::nvvm_fmax_ftz_nan_bf16x2
)
968 .Case("ftz.nan.xorsign.abs.bf16",
969 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16
)
970 .Case("ftz.nan.xorsign.abs.bf16x2",
971 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16x2
)
972 .Case("ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16
)
973 .Case("ftz.xorsign.abs.bf16x2",
974 Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16x2
)
975 .Case("nan.bf16", Intrinsic::nvvm_fmax_nan_bf16
)
976 .Case("nan.bf16x2", Intrinsic::nvvm_fmax_nan_bf16x2
)
977 .Case("nan.xorsign.abs.bf16", Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16
)
978 .Case("nan.xorsign.abs.bf16x2",
979 Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16x2
)
980 .Case("xorsign.abs.bf16", Intrinsic::nvvm_fmax_xorsign_abs_bf16
)
981 .Case("xorsign.abs.bf16x2", Intrinsic::nvvm_fmax_xorsign_abs_bf16x2
)
982 .Default(Intrinsic::not_intrinsic
);
984 if (Name
.consume_front("fmin."))
985 return StringSwitch
<Intrinsic::ID
>(Name
)
986 .Case("bf16", Intrinsic::nvvm_fmin_bf16
)
987 .Case("bf16x2", Intrinsic::nvvm_fmin_bf16x2
)
988 .Case("ftz.bf16", Intrinsic::nvvm_fmin_ftz_bf16
)
989 .Case("ftz.bf16x2", Intrinsic::nvvm_fmin_ftz_bf16x2
)
990 .Case("ftz.nan.bf16", Intrinsic::nvvm_fmin_ftz_nan_bf16
)
991 .Case("ftz.nan.bf16x2", Intrinsic::nvvm_fmin_ftz_nan_bf16x2
)
992 .Case("ftz.nan.xorsign.abs.bf16",
993 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16
)
994 .Case("ftz.nan.xorsign.abs.bf16x2",
995 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16x2
)
996 .Case("ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16
)
997 .Case("ftz.xorsign.abs.bf16x2",
998 Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16x2
)
999 .Case("nan.bf16", Intrinsic::nvvm_fmin_nan_bf16
)
1000 .Case("nan.bf16x2", Intrinsic::nvvm_fmin_nan_bf16x2
)
1001 .Case("nan.xorsign.abs.bf16", Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16
)
1002 .Case("nan.xorsign.abs.bf16x2",
1003 Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16x2
)
1004 .Case("xorsign.abs.bf16", Intrinsic::nvvm_fmin_xorsign_abs_bf16
)
1005 .Case("xorsign.abs.bf16x2", Intrinsic::nvvm_fmin_xorsign_abs_bf16x2
)
1006 .Default(Intrinsic::not_intrinsic
);
1008 if (Name
.consume_front("neg."))
1009 return StringSwitch
<Intrinsic::ID
>(Name
)
1010 .Case("bf16", Intrinsic::nvvm_neg_bf16
)
1011 .Case("bf16x2", Intrinsic::nvvm_neg_bf16x2
)
1012 .Default(Intrinsic::not_intrinsic
);
1014 return Intrinsic::not_intrinsic
;
1017 static bool upgradeIntrinsicFunction1(Function
*F
, Function
*&NewFn
,
1018 bool CanUpgradeDebugIntrinsicsToRecords
) {
1019 assert(F
&& "Illegal to upgrade a non-existent Function.");
1021 StringRef Name
= F
->getName();
1023 // Quickly eliminate it, if it's not a candidate.
1024 if (!Name
.consume_front("llvm.") || Name
.empty())
1030 bool IsArm
= Name
.consume_front("arm.");
1031 if (IsArm
|| Name
.consume_front("aarch64.")) {
1032 if (upgradeArmOrAarch64IntrinsicFunction(IsArm
, F
, Name
, NewFn
))
1037 if (Name
.consume_front("amdgcn.")) {
1038 if (Name
== "alignbit") {
1039 // Target specific intrinsic became redundant
1040 NewFn
= Intrinsic::getOrInsertDeclaration(
1041 F
->getParent(), Intrinsic::fshr
, {F
->getReturnType()});
1045 if (Name
.consume_front("atomic.")) {
1046 if (Name
.starts_with("inc") || Name
.starts_with("dec")) {
1047 // These were replaced with atomicrmw uinc_wrap and udec_wrap, so
1048 // there's no new declaration.
1052 break; // No other 'amdgcn.atomic.*'
1055 if (Name
.consume_front("ds.") || Name
.consume_front("global.atomic.") ||
1056 Name
.consume_front("flat.atomic.")) {
1057 if (Name
.starts_with("fadd") ||
1058 // FIXME: We should also remove fmin.num and fmax.num intrinsics.
1059 (Name
.starts_with("fmin") && !Name
.starts_with("fmin.num")) ||
1060 (Name
.starts_with("fmax") && !Name
.starts_with("fmax.num"))) {
1061 // Replaced with atomicrmw fadd/fmin/fmax, so there's no new
1068 if (Name
.starts_with("ldexp.")) {
1069 // Target specific intrinsic became redundant
1070 NewFn
= Intrinsic::getOrInsertDeclaration(
1071 F
->getParent(), Intrinsic::ldexp
,
1072 {F
->getReturnType(), F
->getArg(1)->getType()});
1075 break; // No other 'amdgcn.*'
1081 if (F
->arg_size() == 1) {
1082 Intrinsic::ID ID
= StringSwitch
<Intrinsic::ID
>(Name
)
1083 .StartsWith("ctlz.", Intrinsic::ctlz
)
1084 .StartsWith("cttz.", Intrinsic::cttz
)
1085 .Default(Intrinsic::not_intrinsic
);
1086 if (ID
!= Intrinsic::not_intrinsic
) {
1088 NewFn
= Intrinsic::getOrInsertDeclaration(F
->getParent(), ID
,
1089 F
->arg_begin()->getType());
1094 if (F
->arg_size() == 2 && Name
== "coro.end") {
1096 NewFn
= Intrinsic::getOrInsertDeclaration(F
->getParent(),
1097 Intrinsic::coro_end
);
1104 if (Name
.consume_front("dbg.")) {
1105 // Mark debug intrinsics for upgrade to new debug format.
1106 if (CanUpgradeDebugIntrinsicsToRecords
&&
1107 F
->getParent()->IsNewDbgInfoFormat
) {
1108 if (Name
== "addr" || Name
== "value" || Name
== "assign" ||
1109 Name
== "declare" || Name
== "label") {
1110 // There's no function to replace these with.
1112 // But we do want these to get upgraded.
1116 // Update llvm.dbg.addr intrinsics even in "new debug mode"; they'll get
1117 // converted to DbgVariableRecords later.
1118 if (Name
== "addr" || (Name
== "value" && F
->arg_size() == 4)) {
1120 NewFn
= Intrinsic::getOrInsertDeclaration(F
->getParent(),
1121 Intrinsic::dbg_value
);
1124 break; // No other 'dbg.*'.
1128 if (Name
.consume_front("experimental.vector.")) {
1130 StringSwitch
<Intrinsic::ID
>(Name
)
1131 // Skip over extract.last.active, otherwise it will be 'upgraded'
1132 // to a regular vector extract which is a different operation.
1133 .StartsWith("extract.last.active.", Intrinsic::not_intrinsic
)
1134 .StartsWith("extract.", Intrinsic::vector_extract
)
1135 .StartsWith("insert.", Intrinsic::vector_insert
)
1136 .StartsWith("splice.", Intrinsic::vector_splice
)
1137 .StartsWith("reverse.", Intrinsic::vector_reverse
)
1138 .StartsWith("interleave2.", Intrinsic::vector_interleave2
)
1139 .StartsWith("deinterleave2.", Intrinsic::vector_deinterleave2
)
1140 .Default(Intrinsic::not_intrinsic
);
1141 if (ID
!= Intrinsic::not_intrinsic
) {
1142 const auto *FT
= F
->getFunctionType();
1143 SmallVector
<Type
*, 2> Tys
;
1144 if (ID
== Intrinsic::vector_extract
||
1145 ID
== Intrinsic::vector_interleave2
)
1146 // Extracting overloads the return type.
1147 Tys
.push_back(FT
->getReturnType());
1148 if (ID
!= Intrinsic::vector_interleave2
)
1149 Tys
.push_back(FT
->getParamType(0));
1150 if (ID
== Intrinsic::vector_insert
)
1151 // Inserting overloads the inserted type.
1152 Tys
.push_back(FT
->getParamType(1));
1154 NewFn
= Intrinsic::getOrInsertDeclaration(F
->getParent(), ID
, Tys
);
1158 if (Name
.consume_front("reduce.")) {
1159 SmallVector
<StringRef
, 2> Groups
;
1160 static const Regex
R("^([a-z]+)\\.[a-z][0-9]+");
1161 if (R
.match(Name
, &Groups
))
1162 ID
= StringSwitch
<Intrinsic::ID
>(Groups
[1])
1163 .Case("add", Intrinsic::vector_reduce_add
)
1164 .Case("mul", Intrinsic::vector_reduce_mul
)
1165 .Case("and", Intrinsic::vector_reduce_and
)
1166 .Case("or", Intrinsic::vector_reduce_or
)
1167 .Case("xor", Intrinsic::vector_reduce_xor
)
1168 .Case("smax", Intrinsic::vector_reduce_smax
)
1169 .Case("smin", Intrinsic::vector_reduce_smin
)
1170 .Case("umax", Intrinsic::vector_reduce_umax
)
1171 .Case("umin", Intrinsic::vector_reduce_umin
)
1172 .Case("fmax", Intrinsic::vector_reduce_fmax
)
1173 .Case("fmin", Intrinsic::vector_reduce_fmin
)
1174 .Default(Intrinsic::not_intrinsic
);
1177 if (ID
== Intrinsic::not_intrinsic
) {
1178 static const Regex
R2("^v2\\.([a-z]+)\\.[fi][0-9]+");
1181 if (R2
.match(Name
, &Groups
))
1182 ID
= StringSwitch
<Intrinsic::ID
>(Groups
[1])
1183 .Case("fadd", Intrinsic::vector_reduce_fadd
)
1184 .Case("fmul", Intrinsic::vector_reduce_fmul
)
1185 .Default(Intrinsic::not_intrinsic
);
1187 if (ID
!= Intrinsic::not_intrinsic
) {
1189 auto Args
= F
->getFunctionType()->params();
1190 NewFn
= Intrinsic::getOrInsertDeclaration(F
->getParent(), ID
,
1191 {Args
[V2
? 1 : 0]});
1194 break; // No other 'expermental.vector.reduce.*'.
1196 break; // No other 'experimental.vector.*'.
1198 if (Name
.consume_front("experimental.stepvector.")) {
1199 Intrinsic::ID ID
= Intrinsic::stepvector
;
1201 NewFn
= Intrinsic::getOrInsertDeclaration(
1202 F
->getParent(), ID
, F
->getFunctionType()->getReturnType());
1205 break; // No other 'e*'.
1207 if (Name
.starts_with("flt.rounds")) {
1209 NewFn
= Intrinsic::getOrInsertDeclaration(F
->getParent(),
1210 Intrinsic::get_rounding
);
1215 if (Name
.starts_with("invariant.group.barrier")) {
1216 // Rename invariant.group.barrier to launder.invariant.group
1217 auto Args
= F
->getFunctionType()->params();
1218 Type
* ObjectPtr
[1] = {Args
[0]};
1220 NewFn
= Intrinsic::getOrInsertDeclaration(
1221 F
->getParent(), Intrinsic::launder_invariant_group
, ObjectPtr
);
1226 // Updating the memory intrinsics (memcpy/memmove/memset) that have an
1227 // alignment parameter to embedding the alignment as an attribute of
1228 // the pointer args.
1229 if (unsigned ID
= StringSwitch
<unsigned>(Name
)
1230 .StartsWith("memcpy.", Intrinsic::memcpy
)
1231 .StartsWith("memmove.", Intrinsic::memmove
)
1233 if (F
->arg_size() == 5) {
1235 // Get the types of dest, src, and len
1236 ArrayRef
<Type
*> ParamTypes
=
1237 F
->getFunctionType()->params().slice(0, 3);
1239 Intrinsic::getOrInsertDeclaration(F
->getParent(), ID
, ParamTypes
);
1243 if (Name
.starts_with("memset.") && F
->arg_size() == 5) {
1245 // Get the types of dest, and len
1246 const auto *FT
= F
->getFunctionType();
1247 Type
*ParamTypes
[2] = {
1248 FT
->getParamType(0), // Dest
1249 FT
->getParamType(2) // len
1251 NewFn
= Intrinsic::getOrInsertDeclaration(F
->getParent(),
1252 Intrinsic::memset
, ParamTypes
);
1258 if (Name
.consume_front("nvvm.")) {
1259 // Check for nvvm intrinsics corresponding exactly to an LLVM intrinsic.
1260 if (F
->arg_size() == 1) {
1262 StringSwitch
<Intrinsic::ID
>(Name
)
1263 .Cases("brev32", "brev64", Intrinsic::bitreverse
)
1264 .Case("clz.i", Intrinsic::ctlz
)
1265 .Case("popc.i", Intrinsic::ctpop
)
1266 .Default(Intrinsic::not_intrinsic
);
1267 if (IID
!= Intrinsic::not_intrinsic
) {
1268 NewFn
= Intrinsic::getOrInsertDeclaration(F
->getParent(), IID
,
1269 {F
->getReturnType()});
1274 // Check for nvvm intrinsics that need a return type adjustment.
1275 if (!F
->getReturnType()->getScalarType()->isBFloatTy()) {
1276 Intrinsic::ID IID
= shouldUpgradeNVPTXBF16Intrinsic(Name
);
1277 if (IID
!= Intrinsic::not_intrinsic
) {
1283 // The following nvvm intrinsics correspond exactly to an LLVM idiom, but
1284 // not to an intrinsic alone. We expand them in UpgradeIntrinsicCall.
1286 // TODO: We could add lohi.i2d.
1287 bool Expand
= false;
1288 if (Name
.consume_front("abs."))
1290 Expand
= Name
== "i" || Name
== "ll";
1291 else if (Name
== "clz.ll" || Name
== "popc.ll" || Name
== "h2f")
1293 else if (Name
.consume_front("max.") || Name
.consume_front("min."))
1294 // nvvm.{min,max}.{i,ii,ui,ull}
1295 Expand
= Name
== "s" || Name
== "i" || Name
== "ll" || Name
== "us" ||
1296 Name
== "ui" || Name
== "ull";
1297 else if (Name
.consume_front("atomic.load.add."))
1298 // nvvm.atomic.load.add.{f32.p,f64.p}
1299 Expand
= Name
.starts_with("f32.p") || Name
.starts_with("f64.p");
1300 else if (Name
.consume_front("bitcast."))
1301 // nvvm.bitcast.{f2i,i2f,ll2d,d2ll}
1303 Name
== "f2i" || Name
== "i2f" || Name
== "ll2d" || Name
== "d2ll";
1304 else if (Name
.consume_front("rotate."))
1305 // nvvm.rotate.{b32,b64,right.b64}
1306 Expand
= Name
== "b32" || Name
== "b64" || Name
== "right.b64";
1307 else if (Name
.consume_front("ptr.gen.to."))
1308 // nvvm.ptr.gen.to.{local,shared,global,constant}
1309 Expand
= Name
.starts_with("local") || Name
.starts_with("shared") ||
1310 Name
.starts_with("global") || Name
.starts_with("constant");
1311 else if (Name
.consume_front("ptr."))
1312 // nvvm.ptr.{local,shared,global,constant}.to.gen
1314 (Name
.consume_front("local") || Name
.consume_front("shared") ||
1315 Name
.consume_front("global") || Name
.consume_front("constant")) &&
1316 Name
.starts_with(".to.gen");
1317 else if (Name
.consume_front("ldg.global."))
1318 // nvvm.ldg.global.{i,p,f}
1319 Expand
= (Name
.starts_with("i.") || Name
.starts_with("f.") ||
1320 Name
.starts_with("p."));
1328 break; // No other 'nvvm.*'.
1333 // We only need to change the name to match the mangling including the
1335 if (Name
.starts_with("objectsize.")) {
1336 Type
*Tys
[2] = { F
->getReturnType(), F
->arg_begin()->getType() };
1337 if (F
->arg_size() == 2 || F
->arg_size() == 3 ||
1339 Intrinsic::getName(Intrinsic::objectsize
, Tys
, F
->getParent())) {
1341 NewFn
= Intrinsic::getOrInsertDeclaration(F
->getParent(),
1342 Intrinsic::objectsize
, Tys
);
1349 if (Name
.starts_with("ptr.annotation.") && F
->arg_size() == 4) {
1351 NewFn
= Intrinsic::getOrInsertDeclaration(
1352 F
->getParent(), Intrinsic::ptr_annotation
,
1353 {F
->arg_begin()->getType(), F
->getArg(1)->getType()});
1359 if (Name
.consume_front("riscv.")) {
1361 ID
= StringSwitch
<Intrinsic::ID
>(Name
)
1362 .Case("aes32dsi", Intrinsic::riscv_aes32dsi
)
1363 .Case("aes32dsmi", Intrinsic::riscv_aes32dsmi
)
1364 .Case("aes32esi", Intrinsic::riscv_aes32esi
)
1365 .Case("aes32esmi", Intrinsic::riscv_aes32esmi
)
1366 .Default(Intrinsic::not_intrinsic
);
1367 if (ID
!= Intrinsic::not_intrinsic
) {
1368 if (!F
->getFunctionType()->getParamType(2)->isIntegerTy(32)) {
1370 NewFn
= Intrinsic::getOrInsertDeclaration(F
->getParent(), ID
);
1373 break; // No other applicable upgrades.
1376 ID
= StringSwitch
<Intrinsic::ID
>(Name
)
1377 .StartsWith("sm4ks", Intrinsic::riscv_sm4ks
)
1378 .StartsWith("sm4ed", Intrinsic::riscv_sm4ed
)
1379 .Default(Intrinsic::not_intrinsic
);
1380 if (ID
!= Intrinsic::not_intrinsic
) {
1381 if (!F
->getFunctionType()->getParamType(2)->isIntegerTy(32) ||
1382 F
->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1384 NewFn
= Intrinsic::getOrInsertDeclaration(F
->getParent(), ID
);
1387 break; // No other applicable upgrades.
1390 ID
= StringSwitch
<Intrinsic::ID
>(Name
)
1391 .StartsWith("sha256sig0", Intrinsic::riscv_sha256sig0
)
1392 .StartsWith("sha256sig1", Intrinsic::riscv_sha256sig1
)
1393 .StartsWith("sha256sum0", Intrinsic::riscv_sha256sum0
)
1394 .StartsWith("sha256sum1", Intrinsic::riscv_sha256sum1
)
1395 .StartsWith("sm3p0", Intrinsic::riscv_sm3p0
)
1396 .StartsWith("sm3p1", Intrinsic::riscv_sm3p1
)
1397 .Default(Intrinsic::not_intrinsic
);
1398 if (ID
!= Intrinsic::not_intrinsic
) {
1399 if (F
->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1401 NewFn
= Intrinsic::getOrInsertDeclaration(F
->getParent(), ID
);
1404 break; // No other applicable upgrades.
1406 break; // No other 'riscv.*' intrinsics
1411 if (Name
== "stackprotectorcheck") {
1418 if (Name
== "var.annotation" && F
->arg_size() == 4) {
1420 NewFn
= Intrinsic::getOrInsertDeclaration(
1421 F
->getParent(), Intrinsic::var_annotation
,
1422 {{F
->arg_begin()->getType(), F
->getArg(1)->getType()}});
1429 if (Name
.consume_front("wasm.")) {
1431 StringSwitch
<Intrinsic::ID
>(Name
)
1432 .StartsWith("fma.", Intrinsic::wasm_relaxed_madd
)
1433 .StartsWith("fms.", Intrinsic::wasm_relaxed_nmadd
)
1434 .StartsWith("laneselect.", Intrinsic::wasm_relaxed_laneselect
)
1435 .Default(Intrinsic::not_intrinsic
);
1436 if (ID
!= Intrinsic::not_intrinsic
) {
1438 NewFn
= Intrinsic::getOrInsertDeclaration(F
->getParent(), ID
,
1439 F
->getReturnType());
1443 if (Name
.consume_front("dot.i8x16.i7x16.")) {
1444 ID
= StringSwitch
<Intrinsic::ID
>(Name
)
1445 .Case("signed", Intrinsic::wasm_relaxed_dot_i8x16_i7x16_signed
)
1447 Intrinsic::wasm_relaxed_dot_i8x16_i7x16_add_signed
)
1448 .Default(Intrinsic::not_intrinsic
);
1449 if (ID
!= Intrinsic::not_intrinsic
) {
1451 NewFn
= Intrinsic::getOrInsertDeclaration(F
->getParent(), ID
);
1454 break; // No other 'wasm.dot.i8x16.i7x16.*'.
1456 break; // No other 'wasm.*'.
1461 if (upgradeX86IntrinsicFunction(F
, Name
, NewFn
))
1465 auto *ST
= dyn_cast
<StructType
>(F
->getReturnType());
1466 if (ST
&& (!ST
->isLiteral() || ST
->isPacked()) &&
1467 F
->getIntrinsicID() != Intrinsic::not_intrinsic
) {
1468 // Replace return type with literal non-packed struct. Only do this for
1469 // intrinsics declared to return a struct, not for intrinsics with
1470 // overloaded return type, in which case the exact struct type will be
1471 // mangled into the name.
1472 SmallVector
<Intrinsic::IITDescriptor
> Desc
;
1473 Intrinsic::getIntrinsicInfoTableEntries(F
->getIntrinsicID(), Desc
);
1474 if (Desc
.front().Kind
== Intrinsic::IITDescriptor::Struct
) {
1475 auto *FT
= F
->getFunctionType();
1476 auto *NewST
= StructType::get(ST
->getContext(), ST
->elements());
1477 auto *NewFT
= FunctionType::get(NewST
, FT
->params(), FT
->isVarArg());
1478 std::string Name
= F
->getName().str();
1480 NewFn
= Function::Create(NewFT
, F
->getLinkage(), F
->getAddressSpace(),
1481 Name
, F
->getParent());
1483 // The new function may also need remangling.
1484 if (auto Result
= llvm::Intrinsic::remangleIntrinsicFunction(NewFn
))
1490 // Remangle our intrinsic since we upgrade the mangling
1491 auto Result
= llvm::Intrinsic::remangleIntrinsicFunction(F
);
1492 if (Result
!= std::nullopt
) {
1497 // This may not belong here. This function is effectively being overloaded
1498 // to both detect an intrinsic which needs upgrading, and to provide the
1499 // upgraded form of the intrinsic. We should perhaps have two separate
1500 // functions for this.
1504 bool llvm::UpgradeIntrinsicFunction(Function
*F
, Function
*&NewFn
,
1505 bool CanUpgradeDebugIntrinsicsToRecords
) {
1508 upgradeIntrinsicFunction1(F
, NewFn
, CanUpgradeDebugIntrinsicsToRecords
);
1509 assert(F
!= NewFn
&& "Intrinsic function upgraded to the same function");
1511 // Upgrade intrinsic attributes. This does not change the function.
1514 if (Intrinsic::ID id
= F
->getIntrinsicID())
1515 F
->setAttributes(Intrinsic::getAttributes(F
->getContext(), id
));
1519 GlobalVariable
*llvm::UpgradeGlobalVariable(GlobalVariable
*GV
) {
1520 if (!(GV
->hasName() && (GV
->getName() == "llvm.global_ctors" ||
1521 GV
->getName() == "llvm.global_dtors")) ||
1522 !GV
->hasInitializer())
1524 ArrayType
*ATy
= dyn_cast
<ArrayType
>(GV
->getValueType());
1527 StructType
*STy
= dyn_cast
<StructType
>(ATy
->getElementType());
1528 if (!STy
|| STy
->getNumElements() != 2)
1531 LLVMContext
&C
= GV
->getContext();
1533 auto EltTy
= StructType::get(STy
->getElementType(0), STy
->getElementType(1),
1535 Constant
*Init
= GV
->getInitializer();
1536 unsigned N
= Init
->getNumOperands();
1537 std::vector
<Constant
*> NewCtors(N
);
1538 for (unsigned i
= 0; i
!= N
; ++i
) {
1539 auto Ctor
= cast
<Constant
>(Init
->getOperand(i
));
1540 NewCtors
[i
] = ConstantStruct::get(EltTy
, Ctor
->getAggregateElement(0u),
1541 Ctor
->getAggregateElement(1),
1542 Constant::getNullValue(IRB
.getPtrTy()));
1544 Constant
*NewInit
= ConstantArray::get(ArrayType::get(EltTy
, N
), NewCtors
);
1546 return new GlobalVariable(NewInit
->getType(), false, GV
->getLinkage(),
1547 NewInit
, GV
->getName());
1550 // Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them
1551 // to byte shuffles.
1552 static Value
*upgradeX86PSLLDQIntrinsics(IRBuilder
<> &Builder
, Value
*Op
,
1554 auto *ResultTy
= cast
<FixedVectorType
>(Op
->getType());
1555 unsigned NumElts
= ResultTy
->getNumElements() * 8;
1557 // Bitcast from a 64-bit element type to a byte element type.
1558 Type
*VecTy
= FixedVectorType::get(Builder
.getInt8Ty(), NumElts
);
1559 Op
= Builder
.CreateBitCast(Op
, VecTy
, "cast");
1561 // We'll be shuffling in zeroes.
1562 Value
*Res
= Constant::getNullValue(VecTy
);
1564 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1565 // we'll just return the zero vector.
1568 // 256/512-bit version is split into 2/4 16-byte lanes.
1569 for (unsigned l
= 0; l
!= NumElts
; l
+= 16)
1570 for (unsigned i
= 0; i
!= 16; ++i
) {
1571 unsigned Idx
= NumElts
+ i
- Shift
;
1573 Idx
-= NumElts
- 16; // end of lane, switch operand.
1574 Idxs
[l
+ i
] = Idx
+ l
;
1577 Res
= Builder
.CreateShuffleVector(Res
, Op
, ArrayRef(Idxs
, NumElts
));
1580 // Bitcast back to a 64-bit element type.
1581 return Builder
.CreateBitCast(Res
, ResultTy
, "cast");
1584 // Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them
1585 // to byte shuffles.
1586 static Value
*upgradeX86PSRLDQIntrinsics(IRBuilder
<> &Builder
, Value
*Op
,
1588 auto *ResultTy
= cast
<FixedVectorType
>(Op
->getType());
1589 unsigned NumElts
= ResultTy
->getNumElements() * 8;
1591 // Bitcast from a 64-bit element type to a byte element type.
1592 Type
*VecTy
= FixedVectorType::get(Builder
.getInt8Ty(), NumElts
);
1593 Op
= Builder
.CreateBitCast(Op
, VecTy
, "cast");
1595 // We'll be shuffling in zeroes.
1596 Value
*Res
= Constant::getNullValue(VecTy
);
1598 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1599 // we'll just return the zero vector.
1602 // 256/512-bit version is split into 2/4 16-byte lanes.
1603 for (unsigned l
= 0; l
!= NumElts
; l
+= 16)
1604 for (unsigned i
= 0; i
!= 16; ++i
) {
1605 unsigned Idx
= i
+ Shift
;
1607 Idx
+= NumElts
- 16; // end of lane, switch operand.
1608 Idxs
[l
+ i
] = Idx
+ l
;
1611 Res
= Builder
.CreateShuffleVector(Op
, Res
, ArrayRef(Idxs
, NumElts
));
1614 // Bitcast back to a 64-bit element type.
1615 return Builder
.CreateBitCast(Res
, ResultTy
, "cast");
1618 static Value
*getX86MaskVec(IRBuilder
<> &Builder
, Value
*Mask
,
1620 assert(isPowerOf2_32(NumElts
) && "Expected power-of-2 mask elements");
1621 llvm::VectorType
*MaskTy
= FixedVectorType::get(
1622 Builder
.getInt1Ty(), cast
<IntegerType
>(Mask
->getType())->getBitWidth());
1623 Mask
= Builder
.CreateBitCast(Mask
, MaskTy
);
1625 // If we have less than 8 elements (1, 2 or 4), then the starting mask was an
1626 // i8 and we need to extract down to the right number of elements.
1629 for (unsigned i
= 0; i
!= NumElts
; ++i
)
1631 Mask
= Builder
.CreateShuffleVector(Mask
, Mask
, ArrayRef(Indices
, NumElts
),
1638 static Value
*emitX86Select(IRBuilder
<> &Builder
, Value
*Mask
, Value
*Op0
,
1640 // If the mask is all ones just emit the first operation.
1641 if (const auto *C
= dyn_cast
<Constant
>(Mask
))
1642 if (C
->isAllOnesValue())
1645 Mask
= getX86MaskVec(Builder
, Mask
,
1646 cast
<FixedVectorType
>(Op0
->getType())->getNumElements());
1647 return Builder
.CreateSelect(Mask
, Op0
, Op1
);
1650 static Value
*emitX86ScalarSelect(IRBuilder
<> &Builder
, Value
*Mask
, Value
*Op0
,
1652 // If the mask is all ones just emit the first operation.
1653 if (const auto *C
= dyn_cast
<Constant
>(Mask
))
1654 if (C
->isAllOnesValue())
1657 auto *MaskTy
= FixedVectorType::get(Builder
.getInt1Ty(),
1658 Mask
->getType()->getIntegerBitWidth());
1659 Mask
= Builder
.CreateBitCast(Mask
, MaskTy
);
1660 Mask
= Builder
.CreateExtractElement(Mask
, (uint64_t)0);
1661 return Builder
.CreateSelect(Mask
, Op0
, Op1
);
1664 // Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics.
1665 // PALIGNR handles large immediates by shifting while VALIGN masks the immediate
1666 // so we need to handle both cases. VALIGN also doesn't have 128-bit lanes.
1667 static Value
*upgradeX86ALIGNIntrinsics(IRBuilder
<> &Builder
, Value
*Op0
,
1668 Value
*Op1
, Value
*Shift
,
1669 Value
*Passthru
, Value
*Mask
,
1671 unsigned ShiftVal
= cast
<llvm::ConstantInt
>(Shift
)->getZExtValue();
1673 unsigned NumElts
= cast
<FixedVectorType
>(Op0
->getType())->getNumElements();
1674 assert((IsVALIGN
|| NumElts
% 16 == 0) && "Illegal NumElts for PALIGNR!");
1675 assert((!IsVALIGN
|| NumElts
<= 16) && "NumElts too large for VALIGN!");
1676 assert(isPowerOf2_32(NumElts
) && "NumElts not a power of 2!");
1678 // Mask the immediate for VALIGN.
1680 ShiftVal
&= (NumElts
- 1);
1682 // If palignr is shifting the pair of vectors more than the size of two
1683 // lanes, emit zero.
1685 return llvm::Constant::getNullValue(Op0
->getType());
1687 // If palignr is shifting the pair of input vectors more than one lane,
1688 // but less than two lanes, convert to shifting in zeroes.
1689 if (ShiftVal
> 16) {
1692 Op0
= llvm::Constant::getNullValue(Op0
->getType());
1696 // 256-bit palignr operates on 128-bit lanes so we need to handle that
1697 for (unsigned l
= 0; l
< NumElts
; l
+= 16) {
1698 for (unsigned i
= 0; i
!= 16; ++i
) {
1699 unsigned Idx
= ShiftVal
+ i
;
1700 if (!IsVALIGN
&& Idx
>= 16) // Disable wrap for VALIGN.
1701 Idx
+= NumElts
- 16; // End of lane, switch operand.
1702 Indices
[l
+ i
] = Idx
+ l
;
1706 Value
*Align
= Builder
.CreateShuffleVector(
1707 Op1
, Op0
, ArrayRef(Indices
, NumElts
), "palignr");
1709 return emitX86Select(Builder
, Mask
, Align
, Passthru
);
1712 static Value
*upgradeX86VPERMT2Intrinsics(IRBuilder
<> &Builder
, CallBase
&CI
,
1713 bool ZeroMask
, bool IndexForm
) {
1714 Type
*Ty
= CI
.getType();
1715 unsigned VecWidth
= Ty
->getPrimitiveSizeInBits();
1716 unsigned EltWidth
= Ty
->getScalarSizeInBits();
1717 bool IsFloat
= Ty
->isFPOrFPVectorTy();
1719 if (VecWidth
== 128 && EltWidth
== 32 && IsFloat
)
1720 IID
= Intrinsic::x86_avx512_vpermi2var_ps_128
;
1721 else if (VecWidth
== 128 && EltWidth
== 32 && !IsFloat
)
1722 IID
= Intrinsic::x86_avx512_vpermi2var_d_128
;
1723 else if (VecWidth
== 128 && EltWidth
== 64 && IsFloat
)
1724 IID
= Intrinsic::x86_avx512_vpermi2var_pd_128
;
1725 else if (VecWidth
== 128 && EltWidth
== 64 && !IsFloat
)
1726 IID
= Intrinsic::x86_avx512_vpermi2var_q_128
;
1727 else if (VecWidth
== 256 && EltWidth
== 32 && IsFloat
)
1728 IID
= Intrinsic::x86_avx512_vpermi2var_ps_256
;
1729 else if (VecWidth
== 256 && EltWidth
== 32 && !IsFloat
)
1730 IID
= Intrinsic::x86_avx512_vpermi2var_d_256
;
1731 else if (VecWidth
== 256 && EltWidth
== 64 && IsFloat
)
1732 IID
= Intrinsic::x86_avx512_vpermi2var_pd_256
;
1733 else if (VecWidth
== 256 && EltWidth
== 64 && !IsFloat
)
1734 IID
= Intrinsic::x86_avx512_vpermi2var_q_256
;
1735 else if (VecWidth
== 512 && EltWidth
== 32 && IsFloat
)
1736 IID
= Intrinsic::x86_avx512_vpermi2var_ps_512
;
1737 else if (VecWidth
== 512 && EltWidth
== 32 && !IsFloat
)
1738 IID
= Intrinsic::x86_avx512_vpermi2var_d_512
;
1739 else if (VecWidth
== 512 && EltWidth
== 64 && IsFloat
)
1740 IID
= Intrinsic::x86_avx512_vpermi2var_pd_512
;
1741 else if (VecWidth
== 512 && EltWidth
== 64 && !IsFloat
)
1742 IID
= Intrinsic::x86_avx512_vpermi2var_q_512
;
1743 else if (VecWidth
== 128 && EltWidth
== 16)
1744 IID
= Intrinsic::x86_avx512_vpermi2var_hi_128
;
1745 else if (VecWidth
== 256 && EltWidth
== 16)
1746 IID
= Intrinsic::x86_avx512_vpermi2var_hi_256
;
1747 else if (VecWidth
== 512 && EltWidth
== 16)
1748 IID
= Intrinsic::x86_avx512_vpermi2var_hi_512
;
1749 else if (VecWidth
== 128 && EltWidth
== 8)
1750 IID
= Intrinsic::x86_avx512_vpermi2var_qi_128
;
1751 else if (VecWidth
== 256 && EltWidth
== 8)
1752 IID
= Intrinsic::x86_avx512_vpermi2var_qi_256
;
1753 else if (VecWidth
== 512 && EltWidth
== 8)
1754 IID
= Intrinsic::x86_avx512_vpermi2var_qi_512
;
1756 llvm_unreachable("Unexpected intrinsic");
1758 Value
*Args
[] = { CI
.getArgOperand(0) , CI
.getArgOperand(1),
1759 CI
.getArgOperand(2) };
1761 // If this isn't index form we need to swap operand 0 and 1.
1763 std::swap(Args
[0], Args
[1]);
1765 Value
*V
= Builder
.CreateIntrinsic(IID
, {}, Args
);
1766 Value
*PassThru
= ZeroMask
? ConstantAggregateZero::get(Ty
)
1767 : Builder
.CreateBitCast(CI
.getArgOperand(1),
1769 return emitX86Select(Builder
, CI
.getArgOperand(3), V
, PassThru
);
1772 static Value
*upgradeX86BinaryIntrinsics(IRBuilder
<> &Builder
, CallBase
&CI
,
1773 Intrinsic::ID IID
) {
1774 Type
*Ty
= CI
.getType();
1775 Value
*Op0
= CI
.getOperand(0);
1776 Value
*Op1
= CI
.getOperand(1);
1777 Value
*Res
= Builder
.CreateIntrinsic(IID
, Ty
, {Op0
, Op1
});
1779 if (CI
.arg_size() == 4) { // For masked intrinsics.
1780 Value
*VecSrc
= CI
.getOperand(2);
1781 Value
*Mask
= CI
.getOperand(3);
1782 Res
= emitX86Select(Builder
, Mask
, Res
, VecSrc
);
1787 static Value
*upgradeX86Rotate(IRBuilder
<> &Builder
, CallBase
&CI
,
1788 bool IsRotateRight
) {
1789 Type
*Ty
= CI
.getType();
1790 Value
*Src
= CI
.getArgOperand(0);
1791 Value
*Amt
= CI
.getArgOperand(1);
1793 // Amount may be scalar immediate, in which case create a splat vector.
1794 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
1795 // we only care about the lowest log2 bits anyway.
1796 if (Amt
->getType() != Ty
) {
1797 unsigned NumElts
= cast
<FixedVectorType
>(Ty
)->getNumElements();
1798 Amt
= Builder
.CreateIntCast(Amt
, Ty
->getScalarType(), false);
1799 Amt
= Builder
.CreateVectorSplat(NumElts
, Amt
);
1802 Intrinsic::ID IID
= IsRotateRight
? Intrinsic::fshr
: Intrinsic::fshl
;
1803 Value
*Res
= Builder
.CreateIntrinsic(IID
, Ty
, {Src
, Src
, Amt
});
1805 if (CI
.arg_size() == 4) { // For masked intrinsics.
1806 Value
*VecSrc
= CI
.getOperand(2);
1807 Value
*Mask
= CI
.getOperand(3);
1808 Res
= emitX86Select(Builder
, Mask
, Res
, VecSrc
);
1813 static Value
*upgradeX86vpcom(IRBuilder
<> &Builder
, CallBase
&CI
, unsigned Imm
,
1815 Type
*Ty
= CI
.getType();
1816 Value
*LHS
= CI
.getArgOperand(0);
1817 Value
*RHS
= CI
.getArgOperand(1);
1819 CmpInst::Predicate Pred
;
1822 Pred
= IsSigned
? ICmpInst::ICMP_SLT
: ICmpInst::ICMP_ULT
;
1825 Pred
= IsSigned
? ICmpInst::ICMP_SLE
: ICmpInst::ICMP_ULE
;
1828 Pred
= IsSigned
? ICmpInst::ICMP_SGT
: ICmpInst::ICMP_UGT
;
1831 Pred
= IsSigned
? ICmpInst::ICMP_SGE
: ICmpInst::ICMP_UGE
;
1834 Pred
= ICmpInst::ICMP_EQ
;
1837 Pred
= ICmpInst::ICMP_NE
;
1840 return Constant::getNullValue(Ty
); // FALSE
1842 return Constant::getAllOnesValue(Ty
); // TRUE
1844 llvm_unreachable("Unknown XOP vpcom/vpcomu predicate");
1847 Value
*Cmp
= Builder
.CreateICmp(Pred
, LHS
, RHS
);
1848 Value
*Ext
= Builder
.CreateSExt(Cmp
, Ty
);
1852 static Value
*upgradeX86ConcatShift(IRBuilder
<> &Builder
, CallBase
&CI
,
1853 bool IsShiftRight
, bool ZeroMask
) {
1854 Type
*Ty
= CI
.getType();
1855 Value
*Op0
= CI
.getArgOperand(0);
1856 Value
*Op1
= CI
.getArgOperand(1);
1857 Value
*Amt
= CI
.getArgOperand(2);
1860 std::swap(Op0
, Op1
);
1862 // Amount may be scalar immediate, in which case create a splat vector.
1863 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
1864 // we only care about the lowest log2 bits anyway.
1865 if (Amt
->getType() != Ty
) {
1866 unsigned NumElts
= cast
<FixedVectorType
>(Ty
)->getNumElements();
1867 Amt
= Builder
.CreateIntCast(Amt
, Ty
->getScalarType(), false);
1868 Amt
= Builder
.CreateVectorSplat(NumElts
, Amt
);
1871 Intrinsic::ID IID
= IsShiftRight
? Intrinsic::fshr
: Intrinsic::fshl
;
1872 Value
*Res
= Builder
.CreateIntrinsic(IID
, Ty
, {Op0
, Op1
, Amt
});
1874 unsigned NumArgs
= CI
.arg_size();
1875 if (NumArgs
>= 4) { // For masked intrinsics.
1876 Value
*VecSrc
= NumArgs
== 5 ? CI
.getArgOperand(3) :
1877 ZeroMask
? ConstantAggregateZero::get(CI
.getType()) :
1878 CI
.getArgOperand(0);
1879 Value
*Mask
= CI
.getOperand(NumArgs
- 1);
1880 Res
= emitX86Select(Builder
, Mask
, Res
, VecSrc
);
1885 static Value
*upgradeMaskedStore(IRBuilder
<> &Builder
, Value
*Ptr
, Value
*Data
,
1886 Value
*Mask
, bool Aligned
) {
1887 const Align Alignment
=
1889 ? Align(Data
->getType()->getPrimitiveSizeInBits().getFixedValue() / 8)
1892 // If the mask is all ones just emit a regular store.
1893 if (const auto *C
= dyn_cast
<Constant
>(Mask
))
1894 if (C
->isAllOnesValue())
1895 return Builder
.CreateAlignedStore(Data
, Ptr
, Alignment
);
1897 // Convert the mask from an integer type to a vector of i1.
1898 unsigned NumElts
= cast
<FixedVectorType
>(Data
->getType())->getNumElements();
1899 Mask
= getX86MaskVec(Builder
, Mask
, NumElts
);
1900 return Builder
.CreateMaskedStore(Data
, Ptr
, Alignment
, Mask
);
1903 static Value
*upgradeMaskedLoad(IRBuilder
<> &Builder
, Value
*Ptr
,
1904 Value
*Passthru
, Value
*Mask
, bool Aligned
) {
1905 Type
*ValTy
= Passthru
->getType();
1906 const Align Alignment
=
1909 Passthru
->getType()->getPrimitiveSizeInBits().getFixedValue() /
1913 // If the mask is all ones just emit a regular store.
1914 if (const auto *C
= dyn_cast
<Constant
>(Mask
))
1915 if (C
->isAllOnesValue())
1916 return Builder
.CreateAlignedLoad(ValTy
, Ptr
, Alignment
);
1918 // Convert the mask from an integer type to a vector of i1.
1919 unsigned NumElts
= cast
<FixedVectorType
>(ValTy
)->getNumElements();
1920 Mask
= getX86MaskVec(Builder
, Mask
, NumElts
);
1921 return Builder
.CreateMaskedLoad(ValTy
, Ptr
, Alignment
, Mask
, Passthru
);
1924 static Value
*upgradeAbs(IRBuilder
<> &Builder
, CallBase
&CI
) {
1925 Type
*Ty
= CI
.getType();
1926 Value
*Op0
= CI
.getArgOperand(0);
1927 Value
*Res
= Builder
.CreateIntrinsic(Intrinsic::abs
, Ty
,
1928 {Op0
, Builder
.getInt1(false)});
1929 if (CI
.arg_size() == 3)
1930 Res
= emitX86Select(Builder
, CI
.getArgOperand(2), Res
, CI
.getArgOperand(1));
1934 static Value
*upgradePMULDQ(IRBuilder
<> &Builder
, CallBase
&CI
, bool IsSigned
) {
1935 Type
*Ty
= CI
.getType();
1937 // Arguments have a vXi32 type so cast to vXi64.
1938 Value
*LHS
= Builder
.CreateBitCast(CI
.getArgOperand(0), Ty
);
1939 Value
*RHS
= Builder
.CreateBitCast(CI
.getArgOperand(1), Ty
);
1942 // Shift left then arithmetic shift right.
1943 Constant
*ShiftAmt
= ConstantInt::get(Ty
, 32);
1944 LHS
= Builder
.CreateShl(LHS
, ShiftAmt
);
1945 LHS
= Builder
.CreateAShr(LHS
, ShiftAmt
);
1946 RHS
= Builder
.CreateShl(RHS
, ShiftAmt
);
1947 RHS
= Builder
.CreateAShr(RHS
, ShiftAmt
);
1949 // Clear the upper bits.
1950 Constant
*Mask
= ConstantInt::get(Ty
, 0xffffffff);
1951 LHS
= Builder
.CreateAnd(LHS
, Mask
);
1952 RHS
= Builder
.CreateAnd(RHS
, Mask
);
1955 Value
*Res
= Builder
.CreateMul(LHS
, RHS
);
1957 if (CI
.arg_size() == 4)
1958 Res
= emitX86Select(Builder
, CI
.getArgOperand(3), Res
, CI
.getArgOperand(2));
1963 // Applying mask on vector of i1's and make sure result is at least 8 bits wide.
1964 static Value
*applyX86MaskOn1BitsVec(IRBuilder
<> &Builder
, Value
*Vec
,
1966 unsigned NumElts
= cast
<FixedVectorType
>(Vec
->getType())->getNumElements();
1968 const auto *C
= dyn_cast
<Constant
>(Mask
);
1969 if (!C
|| !C
->isAllOnesValue())
1970 Vec
= Builder
.CreateAnd(Vec
, getX86MaskVec(Builder
, Mask
, NumElts
));
1975 for (unsigned i
= 0; i
!= NumElts
; ++i
)
1977 for (unsigned i
= NumElts
; i
!= 8; ++i
)
1978 Indices
[i
] = NumElts
+ i
% NumElts
;
1979 Vec
= Builder
.CreateShuffleVector(Vec
,
1980 Constant::getNullValue(Vec
->getType()),
1983 return Builder
.CreateBitCast(Vec
, Builder
.getIntNTy(std::max(NumElts
, 8U)));
1986 static Value
*upgradeMaskedCompare(IRBuilder
<> &Builder
, CallBase
&CI
,
1987 unsigned CC
, bool Signed
) {
1988 Value
*Op0
= CI
.getArgOperand(0);
1989 unsigned NumElts
= cast
<FixedVectorType
>(Op0
->getType())->getNumElements();
1993 Cmp
= Constant::getNullValue(
1994 FixedVectorType::get(Builder
.getInt1Ty(), NumElts
));
1995 } else if (CC
== 7) {
1996 Cmp
= Constant::getAllOnesValue(
1997 FixedVectorType::get(Builder
.getInt1Ty(), NumElts
));
1999 ICmpInst::Predicate Pred
;
2001 default: llvm_unreachable("Unknown condition code");
2002 case 0: Pred
= ICmpInst::ICMP_EQ
; break;
2003 case 1: Pred
= Signed
? ICmpInst::ICMP_SLT
: ICmpInst::ICMP_ULT
; break;
2004 case 2: Pred
= Signed
? ICmpInst::ICMP_SLE
: ICmpInst::ICMP_ULE
; break;
2005 case 4: Pred
= ICmpInst::ICMP_NE
; break;
2006 case 5: Pred
= Signed
? ICmpInst::ICMP_SGE
: ICmpInst::ICMP_UGE
; break;
2007 case 6: Pred
= Signed
? ICmpInst::ICMP_SGT
: ICmpInst::ICMP_UGT
; break;
2009 Cmp
= Builder
.CreateICmp(Pred
, Op0
, CI
.getArgOperand(1));
2012 Value
*Mask
= CI
.getArgOperand(CI
.arg_size() - 1);
2014 return applyX86MaskOn1BitsVec(Builder
, Cmp
, Mask
);
2017 // Replace a masked intrinsic with an older unmasked intrinsic.
2018 static Value
*upgradeX86MaskedShift(IRBuilder
<> &Builder
, CallBase
&CI
,
2019 Intrinsic::ID IID
) {
2020 Value
*Rep
= Builder
.CreateIntrinsic(
2021 IID
, {}, {CI
.getArgOperand(0), CI
.getArgOperand(1)});
2022 return emitX86Select(Builder
, CI
.getArgOperand(3), Rep
, CI
.getArgOperand(2));
2025 static Value
*upgradeMaskedMove(IRBuilder
<> &Builder
, CallBase
&CI
) {
2026 Value
* A
= CI
.getArgOperand(0);
2027 Value
* B
= CI
.getArgOperand(1);
2028 Value
* Src
= CI
.getArgOperand(2);
2029 Value
* Mask
= CI
.getArgOperand(3);
2031 Value
* AndNode
= Builder
.CreateAnd(Mask
, APInt(8, 1));
2032 Value
* Cmp
= Builder
.CreateIsNotNull(AndNode
);
2033 Value
* Extract1
= Builder
.CreateExtractElement(B
, (uint64_t)0);
2034 Value
* Extract2
= Builder
.CreateExtractElement(Src
, (uint64_t)0);
2035 Value
* Select
= Builder
.CreateSelect(Cmp
, Extract1
, Extract2
);
2036 return Builder
.CreateInsertElement(A
, Select
, (uint64_t)0);
2039 static Value
*upgradeMaskToInt(IRBuilder
<> &Builder
, CallBase
&CI
) {
2040 Value
* Op
= CI
.getArgOperand(0);
2041 Type
* ReturnOp
= CI
.getType();
2042 unsigned NumElts
= cast
<FixedVectorType
>(CI
.getType())->getNumElements();
2043 Value
*Mask
= getX86MaskVec(Builder
, Op
, NumElts
);
2044 return Builder
.CreateSExt(Mask
, ReturnOp
, "vpmovm2");
2047 // Replace intrinsic with unmasked version and a select.
2048 static bool upgradeAVX512MaskToSelect(StringRef Name
, IRBuilder
<> &Builder
,
2049 CallBase
&CI
, Value
*&Rep
) {
2050 Name
= Name
.substr(12); // Remove avx512.mask.
2052 unsigned VecWidth
= CI
.getType()->getPrimitiveSizeInBits();
2053 unsigned EltWidth
= CI
.getType()->getScalarSizeInBits();
2055 if (Name
.starts_with("max.p")) {
2056 if (VecWidth
== 128 && EltWidth
== 32)
2057 IID
= Intrinsic::x86_sse_max_ps
;
2058 else if (VecWidth
== 128 && EltWidth
== 64)
2059 IID
= Intrinsic::x86_sse2_max_pd
;
2060 else if (VecWidth
== 256 && EltWidth
== 32)
2061 IID
= Intrinsic::x86_avx_max_ps_256
;
2062 else if (VecWidth
== 256 && EltWidth
== 64)
2063 IID
= Intrinsic::x86_avx_max_pd_256
;
2065 llvm_unreachable("Unexpected intrinsic");
2066 } else if (Name
.starts_with("min.p")) {
2067 if (VecWidth
== 128 && EltWidth
== 32)
2068 IID
= Intrinsic::x86_sse_min_ps
;
2069 else if (VecWidth
== 128 && EltWidth
== 64)
2070 IID
= Intrinsic::x86_sse2_min_pd
;
2071 else if (VecWidth
== 256 && EltWidth
== 32)
2072 IID
= Intrinsic::x86_avx_min_ps_256
;
2073 else if (VecWidth
== 256 && EltWidth
== 64)
2074 IID
= Intrinsic::x86_avx_min_pd_256
;
2076 llvm_unreachable("Unexpected intrinsic");
2077 } else if (Name
.starts_with("pshuf.b.")) {
2078 if (VecWidth
== 128)
2079 IID
= Intrinsic::x86_ssse3_pshuf_b_128
;
2080 else if (VecWidth
== 256)
2081 IID
= Intrinsic::x86_avx2_pshuf_b
;
2082 else if (VecWidth
== 512)
2083 IID
= Intrinsic::x86_avx512_pshuf_b_512
;
2085 llvm_unreachable("Unexpected intrinsic");
2086 } else if (Name
.starts_with("pmul.hr.sw.")) {
2087 if (VecWidth
== 128)
2088 IID
= Intrinsic::x86_ssse3_pmul_hr_sw_128
;
2089 else if (VecWidth
== 256)
2090 IID
= Intrinsic::x86_avx2_pmul_hr_sw
;
2091 else if (VecWidth
== 512)
2092 IID
= Intrinsic::x86_avx512_pmul_hr_sw_512
;
2094 llvm_unreachable("Unexpected intrinsic");
2095 } else if (Name
.starts_with("pmulh.w.")) {
2096 if (VecWidth
== 128)
2097 IID
= Intrinsic::x86_sse2_pmulh_w
;
2098 else if (VecWidth
== 256)
2099 IID
= Intrinsic::x86_avx2_pmulh_w
;
2100 else if (VecWidth
== 512)
2101 IID
= Intrinsic::x86_avx512_pmulh_w_512
;
2103 llvm_unreachable("Unexpected intrinsic");
2104 } else if (Name
.starts_with("pmulhu.w.")) {
2105 if (VecWidth
== 128)
2106 IID
= Intrinsic::x86_sse2_pmulhu_w
;
2107 else if (VecWidth
== 256)
2108 IID
= Intrinsic::x86_avx2_pmulhu_w
;
2109 else if (VecWidth
== 512)
2110 IID
= Intrinsic::x86_avx512_pmulhu_w_512
;
2112 llvm_unreachable("Unexpected intrinsic");
2113 } else if (Name
.starts_with("pmaddw.d.")) {
2114 if (VecWidth
== 128)
2115 IID
= Intrinsic::x86_sse2_pmadd_wd
;
2116 else if (VecWidth
== 256)
2117 IID
= Intrinsic::x86_avx2_pmadd_wd
;
2118 else if (VecWidth
== 512)
2119 IID
= Intrinsic::x86_avx512_pmaddw_d_512
;
2121 llvm_unreachable("Unexpected intrinsic");
2122 } else if (Name
.starts_with("pmaddubs.w.")) {
2123 if (VecWidth
== 128)
2124 IID
= Intrinsic::x86_ssse3_pmadd_ub_sw_128
;
2125 else if (VecWidth
== 256)
2126 IID
= Intrinsic::x86_avx2_pmadd_ub_sw
;
2127 else if (VecWidth
== 512)
2128 IID
= Intrinsic::x86_avx512_pmaddubs_w_512
;
2130 llvm_unreachable("Unexpected intrinsic");
2131 } else if (Name
.starts_with("packsswb.")) {
2132 if (VecWidth
== 128)
2133 IID
= Intrinsic::x86_sse2_packsswb_128
;
2134 else if (VecWidth
== 256)
2135 IID
= Intrinsic::x86_avx2_packsswb
;
2136 else if (VecWidth
== 512)
2137 IID
= Intrinsic::x86_avx512_packsswb_512
;
2139 llvm_unreachable("Unexpected intrinsic");
2140 } else if (Name
.starts_with("packssdw.")) {
2141 if (VecWidth
== 128)
2142 IID
= Intrinsic::x86_sse2_packssdw_128
;
2143 else if (VecWidth
== 256)
2144 IID
= Intrinsic::x86_avx2_packssdw
;
2145 else if (VecWidth
== 512)
2146 IID
= Intrinsic::x86_avx512_packssdw_512
;
2148 llvm_unreachable("Unexpected intrinsic");
2149 } else if (Name
.starts_with("packuswb.")) {
2150 if (VecWidth
== 128)
2151 IID
= Intrinsic::x86_sse2_packuswb_128
;
2152 else if (VecWidth
== 256)
2153 IID
= Intrinsic::x86_avx2_packuswb
;
2154 else if (VecWidth
== 512)
2155 IID
= Intrinsic::x86_avx512_packuswb_512
;
2157 llvm_unreachable("Unexpected intrinsic");
2158 } else if (Name
.starts_with("packusdw.")) {
2159 if (VecWidth
== 128)
2160 IID
= Intrinsic::x86_sse41_packusdw
;
2161 else if (VecWidth
== 256)
2162 IID
= Intrinsic::x86_avx2_packusdw
;
2163 else if (VecWidth
== 512)
2164 IID
= Intrinsic::x86_avx512_packusdw_512
;
2166 llvm_unreachable("Unexpected intrinsic");
2167 } else if (Name
.starts_with("vpermilvar.")) {
2168 if (VecWidth
== 128 && EltWidth
== 32)
2169 IID
= Intrinsic::x86_avx_vpermilvar_ps
;
2170 else if (VecWidth
== 128 && EltWidth
== 64)
2171 IID
= Intrinsic::x86_avx_vpermilvar_pd
;
2172 else if (VecWidth
== 256 && EltWidth
== 32)
2173 IID
= Intrinsic::x86_avx_vpermilvar_ps_256
;
2174 else if (VecWidth
== 256 && EltWidth
== 64)
2175 IID
= Intrinsic::x86_avx_vpermilvar_pd_256
;
2176 else if (VecWidth
== 512 && EltWidth
== 32)
2177 IID
= Intrinsic::x86_avx512_vpermilvar_ps_512
;
2178 else if (VecWidth
== 512 && EltWidth
== 64)
2179 IID
= Intrinsic::x86_avx512_vpermilvar_pd_512
;
2181 llvm_unreachable("Unexpected intrinsic");
2182 } else if (Name
== "cvtpd2dq.256") {
2183 IID
= Intrinsic::x86_avx_cvt_pd2dq_256
;
2184 } else if (Name
== "cvtpd2ps.256") {
2185 IID
= Intrinsic::x86_avx_cvt_pd2_ps_256
;
2186 } else if (Name
== "cvttpd2dq.256") {
2187 IID
= Intrinsic::x86_avx_cvtt_pd2dq_256
;
2188 } else if (Name
== "cvttps2dq.128") {
2189 IID
= Intrinsic::x86_sse2_cvttps2dq
;
2190 } else if (Name
== "cvttps2dq.256") {
2191 IID
= Intrinsic::x86_avx_cvtt_ps2dq_256
;
2192 } else if (Name
.starts_with("permvar.")) {
2193 bool IsFloat
= CI
.getType()->isFPOrFPVectorTy();
2194 if (VecWidth
== 256 && EltWidth
== 32 && IsFloat
)
2195 IID
= Intrinsic::x86_avx2_permps
;
2196 else if (VecWidth
== 256 && EltWidth
== 32 && !IsFloat
)
2197 IID
= Intrinsic::x86_avx2_permd
;
2198 else if (VecWidth
== 256 && EltWidth
== 64 && IsFloat
)
2199 IID
= Intrinsic::x86_avx512_permvar_df_256
;
2200 else if (VecWidth
== 256 && EltWidth
== 64 && !IsFloat
)
2201 IID
= Intrinsic::x86_avx512_permvar_di_256
;
2202 else if (VecWidth
== 512 && EltWidth
== 32 && IsFloat
)
2203 IID
= Intrinsic::x86_avx512_permvar_sf_512
;
2204 else if (VecWidth
== 512 && EltWidth
== 32 && !IsFloat
)
2205 IID
= Intrinsic::x86_avx512_permvar_si_512
;
2206 else if (VecWidth
== 512 && EltWidth
== 64 && IsFloat
)
2207 IID
= Intrinsic::x86_avx512_permvar_df_512
;
2208 else if (VecWidth
== 512 && EltWidth
== 64 && !IsFloat
)
2209 IID
= Intrinsic::x86_avx512_permvar_di_512
;
2210 else if (VecWidth
== 128 && EltWidth
== 16)
2211 IID
= Intrinsic::x86_avx512_permvar_hi_128
;
2212 else if (VecWidth
== 256 && EltWidth
== 16)
2213 IID
= Intrinsic::x86_avx512_permvar_hi_256
;
2214 else if (VecWidth
== 512 && EltWidth
== 16)
2215 IID
= Intrinsic::x86_avx512_permvar_hi_512
;
2216 else if (VecWidth
== 128 && EltWidth
== 8)
2217 IID
= Intrinsic::x86_avx512_permvar_qi_128
;
2218 else if (VecWidth
== 256 && EltWidth
== 8)
2219 IID
= Intrinsic::x86_avx512_permvar_qi_256
;
2220 else if (VecWidth
== 512 && EltWidth
== 8)
2221 IID
= Intrinsic::x86_avx512_permvar_qi_512
;
2223 llvm_unreachable("Unexpected intrinsic");
2224 } else if (Name
.starts_with("dbpsadbw.")) {
2225 if (VecWidth
== 128)
2226 IID
= Intrinsic::x86_avx512_dbpsadbw_128
;
2227 else if (VecWidth
== 256)
2228 IID
= Intrinsic::x86_avx512_dbpsadbw_256
;
2229 else if (VecWidth
== 512)
2230 IID
= Intrinsic::x86_avx512_dbpsadbw_512
;
2232 llvm_unreachable("Unexpected intrinsic");
2233 } else if (Name
.starts_with("pmultishift.qb.")) {
2234 if (VecWidth
== 128)
2235 IID
= Intrinsic::x86_avx512_pmultishift_qb_128
;
2236 else if (VecWidth
== 256)
2237 IID
= Intrinsic::x86_avx512_pmultishift_qb_256
;
2238 else if (VecWidth
== 512)
2239 IID
= Intrinsic::x86_avx512_pmultishift_qb_512
;
2241 llvm_unreachable("Unexpected intrinsic");
2242 } else if (Name
.starts_with("conflict.")) {
2243 if (Name
[9] == 'd' && VecWidth
== 128)
2244 IID
= Intrinsic::x86_avx512_conflict_d_128
;
2245 else if (Name
[9] == 'd' && VecWidth
== 256)
2246 IID
= Intrinsic::x86_avx512_conflict_d_256
;
2247 else if (Name
[9] == 'd' && VecWidth
== 512)
2248 IID
= Intrinsic::x86_avx512_conflict_d_512
;
2249 else if (Name
[9] == 'q' && VecWidth
== 128)
2250 IID
= Intrinsic::x86_avx512_conflict_q_128
;
2251 else if (Name
[9] == 'q' && VecWidth
== 256)
2252 IID
= Intrinsic::x86_avx512_conflict_q_256
;
2253 else if (Name
[9] == 'q' && VecWidth
== 512)
2254 IID
= Intrinsic::x86_avx512_conflict_q_512
;
2256 llvm_unreachable("Unexpected intrinsic");
2257 } else if (Name
.starts_with("pavg.")) {
2258 if (Name
[5] == 'b' && VecWidth
== 128)
2259 IID
= Intrinsic::x86_sse2_pavg_b
;
2260 else if (Name
[5] == 'b' && VecWidth
== 256)
2261 IID
= Intrinsic::x86_avx2_pavg_b
;
2262 else if (Name
[5] == 'b' && VecWidth
== 512)
2263 IID
= Intrinsic::x86_avx512_pavg_b_512
;
2264 else if (Name
[5] == 'w' && VecWidth
== 128)
2265 IID
= Intrinsic::x86_sse2_pavg_w
;
2266 else if (Name
[5] == 'w' && VecWidth
== 256)
2267 IID
= Intrinsic::x86_avx2_pavg_w
;
2268 else if (Name
[5] == 'w' && VecWidth
== 512)
2269 IID
= Intrinsic::x86_avx512_pavg_w_512
;
2271 llvm_unreachable("Unexpected intrinsic");
2275 SmallVector
<Value
*, 4> Args(CI
.args());
2278 Rep
= Builder
.CreateIntrinsic(IID
, {}, Args
);
2279 unsigned NumArgs
= CI
.arg_size();
2280 Rep
= emitX86Select(Builder
, CI
.getArgOperand(NumArgs
- 1), Rep
,
2281 CI
.getArgOperand(NumArgs
- 2));
2285 /// Upgrade comment in call to inline asm that represents an objc retain release
2287 void llvm::UpgradeInlineAsmString(std::string
*AsmStr
) {
2289 if (AsmStr
->find("mov\tfp") == 0 &&
2290 AsmStr
->find("objc_retainAutoreleaseReturnValue") != std::string::npos
&&
2291 (Pos
= AsmStr
->find("# marker")) != std::string::npos
) {
2292 AsmStr
->replace(Pos
, 1, ";");
2296 static Value
*upgradeNVVMIntrinsicCall(StringRef Name
, CallBase
*CI
,
2297 Function
*F
, IRBuilder
<> &Builder
) {
2298 Value
*Rep
= nullptr;
2300 if (Name
== "abs.i" || Name
== "abs.ll") {
2301 Value
*Arg
= CI
->getArgOperand(0);
2302 Value
*Neg
= Builder
.CreateNeg(Arg
, "neg");
2303 Value
*Cmp
= Builder
.CreateICmpSGE(
2304 Arg
, llvm::Constant::getNullValue(Arg
->getType()), "abs.cond");
2305 Rep
= Builder
.CreateSelect(Cmp
, Arg
, Neg
, "abs");
2306 } else if (Name
.starts_with("atomic.load.add.f32.p") ||
2307 Name
.starts_with("atomic.load.add.f64.p")) {
2308 Value
*Ptr
= CI
->getArgOperand(0);
2309 Value
*Val
= CI
->getArgOperand(1);
2310 Rep
= Builder
.CreateAtomicRMW(AtomicRMWInst::FAdd
, Ptr
, Val
, MaybeAlign(),
2311 AtomicOrdering::SequentiallyConsistent
);
2312 } else if (Name
.consume_front("max.") &&
2313 (Name
== "s" || Name
== "i" || Name
== "ll" || Name
== "us" ||
2314 Name
== "ui" || Name
== "ull")) {
2315 Value
*Arg0
= CI
->getArgOperand(0);
2316 Value
*Arg1
= CI
->getArgOperand(1);
2317 Value
*Cmp
= Name
.starts_with("u")
2318 ? Builder
.CreateICmpUGE(Arg0
, Arg1
, "max.cond")
2319 : Builder
.CreateICmpSGE(Arg0
, Arg1
, "max.cond");
2320 Rep
= Builder
.CreateSelect(Cmp
, Arg0
, Arg1
, "max");
2321 } else if (Name
.consume_front("min.") &&
2322 (Name
== "s" || Name
== "i" || Name
== "ll" || Name
== "us" ||
2323 Name
== "ui" || Name
== "ull")) {
2324 Value
*Arg0
= CI
->getArgOperand(0);
2325 Value
*Arg1
= CI
->getArgOperand(1);
2326 Value
*Cmp
= Name
.starts_with("u")
2327 ? Builder
.CreateICmpULE(Arg0
, Arg1
, "min.cond")
2328 : Builder
.CreateICmpSLE(Arg0
, Arg1
, "min.cond");
2329 Rep
= Builder
.CreateSelect(Cmp
, Arg0
, Arg1
, "min");
2330 } else if (Name
== "clz.ll") {
2331 // llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 returns an i64.
2332 Value
*Arg
= CI
->getArgOperand(0);
2333 Value
*Ctlz
= Builder
.CreateIntrinsic(Intrinsic::ctlz
, {Arg
->getType()},
2334 {Arg
, Builder
.getFalse()},
2335 /*FMFSource=*/nullptr, "ctlz");
2336 Rep
= Builder
.CreateTrunc(Ctlz
, Builder
.getInt32Ty(), "ctlz.trunc");
2337 } else if (Name
== "popc.ll") {
2338 // llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 returns an
2340 Value
*Arg
= CI
->getArgOperand(0);
2341 Value
*Popc
= Builder
.CreateIntrinsic(Intrinsic::ctpop
, {Arg
->getType()},
2342 Arg
, /*FMFSource=*/nullptr, "ctpop");
2343 Rep
= Builder
.CreateTrunc(Popc
, Builder
.getInt32Ty(), "ctpop.trunc");
2344 } else if (Name
== "h2f") {
2345 Rep
= Builder
.CreateIntrinsic(Intrinsic::convert_from_fp16
,
2346 {Builder
.getFloatTy()}, CI
->getArgOperand(0),
2347 /*FMFSource=*/nullptr, "h2f");
2348 } else if (Name
.consume_front("bitcast.") &&
2349 (Name
== "f2i" || Name
== "i2f" || Name
== "ll2d" ||
2351 Rep
= Builder
.CreateBitCast(CI
->getArgOperand(0), CI
->getType());
2352 } else if (Name
== "rotate.b32") {
2353 Value
*Arg
= CI
->getOperand(0);
2354 Value
*ShiftAmt
= CI
->getOperand(1);
2355 Rep
= Builder
.CreateIntrinsic(Builder
.getInt32Ty(), Intrinsic::fshl
,
2356 {Arg
, Arg
, ShiftAmt
});
2357 } else if (Name
== "rotate.b64") {
2358 Type
*Int64Ty
= Builder
.getInt64Ty();
2359 Value
*Arg
= CI
->getOperand(0);
2360 Value
*ZExtShiftAmt
= Builder
.CreateZExt(CI
->getOperand(1), Int64Ty
);
2361 Rep
= Builder
.CreateIntrinsic(Int64Ty
, Intrinsic::fshl
,
2362 {Arg
, Arg
, ZExtShiftAmt
});
2363 } else if (Name
== "rotate.right.b64") {
2364 Type
*Int64Ty
= Builder
.getInt64Ty();
2365 Value
*Arg
= CI
->getOperand(0);
2366 Value
*ZExtShiftAmt
= Builder
.CreateZExt(CI
->getOperand(1), Int64Ty
);
2367 Rep
= Builder
.CreateIntrinsic(Int64Ty
, Intrinsic::fshr
,
2368 {Arg
, Arg
, ZExtShiftAmt
});
2369 } else if ((Name
.consume_front("ptr.gen.to.") &&
2370 (Name
.starts_with("local") || Name
.starts_with("shared") ||
2371 Name
.starts_with("global") || Name
.starts_with("constant"))) ||
2372 (Name
.consume_front("ptr.") &&
2373 (Name
.consume_front("local") || Name
.consume_front("shared") ||
2374 Name
.consume_front("global") ||
2375 Name
.consume_front("constant")) &&
2376 Name
.starts_with(".to.gen"))) {
2377 Rep
= Builder
.CreateAddrSpaceCast(CI
->getArgOperand(0), CI
->getType());
2378 } else if (Name
.consume_front("ldg.global")) {
2379 Value
*Ptr
= CI
->getArgOperand(0);
2380 Align PtrAlign
= cast
<ConstantInt
>(CI
->getArgOperand(1))->getAlignValue();
2381 // Use addrspace(1) for NVPTX ADDRESS_SPACE_GLOBAL
2382 Value
*ASC
= Builder
.CreateAddrSpaceCast(Ptr
, Builder
.getPtrTy(1));
2383 Instruction
*LD
= Builder
.CreateAlignedLoad(CI
->getType(), ASC
, PtrAlign
);
2384 MDNode
*MD
= MDNode::get(Builder
.getContext(), {});
2385 LD
->setMetadata(LLVMContext::MD_invariant_load
, MD
);
2388 Intrinsic::ID IID
= shouldUpgradeNVPTXBF16Intrinsic(Name
);
2389 if (IID
!= Intrinsic::not_intrinsic
&&
2390 !F
->getReturnType()->getScalarType()->isBFloatTy()) {
2392 Function
*NewFn
= Intrinsic::getOrInsertDeclaration(F
->getParent(), IID
);
2393 SmallVector
<Value
*, 2> Args
;
2394 for (size_t I
= 0; I
< NewFn
->arg_size(); ++I
) {
2395 Value
*Arg
= CI
->getArgOperand(I
);
2396 Type
*OldType
= Arg
->getType();
2397 Type
*NewType
= NewFn
->getArg(I
)->getType();
2399 (OldType
->isIntegerTy() && NewType
->getScalarType()->isBFloatTy())
2400 ? Builder
.CreateBitCast(Arg
, NewType
)
2403 Rep
= Builder
.CreateCall(NewFn
, Args
);
2404 if (F
->getReturnType()->isIntegerTy())
2405 Rep
= Builder
.CreateBitCast(Rep
, F
->getReturnType());
2412 static Value
*upgradeX86IntrinsicCall(StringRef Name
, CallBase
*CI
, Function
*F
,
2413 IRBuilder
<> &Builder
) {
2414 LLVMContext
&C
= F
->getContext();
2415 Value
*Rep
= nullptr;
2417 if (Name
.starts_with("sse4a.movnt.")) {
2418 SmallVector
<Metadata
*, 1> Elts
;
2420 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C
), 1)));
2421 MDNode
*Node
= MDNode::get(C
, Elts
);
2423 Value
*Arg0
= CI
->getArgOperand(0);
2424 Value
*Arg1
= CI
->getArgOperand(1);
2426 // Nontemporal (unaligned) store of the 0'th element of the float/double
2429 Builder
.CreateExtractElement(Arg1
, (uint64_t)0, "extractelement");
2431 StoreInst
*SI
= Builder
.CreateAlignedStore(Extract
, Arg0
, Align(1));
2432 SI
->setMetadata(LLVMContext::MD_nontemporal
, Node
);
2433 } else if (Name
.starts_with("avx.movnt.") ||
2434 Name
.starts_with("avx512.storent.")) {
2435 SmallVector
<Metadata
*, 1> Elts
;
2437 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C
), 1)));
2438 MDNode
*Node
= MDNode::get(C
, Elts
);
2440 Value
*Arg0
= CI
->getArgOperand(0);
2441 Value
*Arg1
= CI
->getArgOperand(1);
2443 StoreInst
*SI
= Builder
.CreateAlignedStore(
2445 Align(Arg1
->getType()->getPrimitiveSizeInBits().getFixedValue() / 8));
2446 SI
->setMetadata(LLVMContext::MD_nontemporal
, Node
);
2447 } else if (Name
== "sse2.storel.dq") {
2448 Value
*Arg0
= CI
->getArgOperand(0);
2449 Value
*Arg1
= CI
->getArgOperand(1);
2451 auto *NewVecTy
= FixedVectorType::get(Type::getInt64Ty(C
), 2);
2452 Value
*BC0
= Builder
.CreateBitCast(Arg1
, NewVecTy
, "cast");
2453 Value
*Elt
= Builder
.CreateExtractElement(BC0
, (uint64_t)0);
2454 Builder
.CreateAlignedStore(Elt
, Arg0
, Align(1));
2455 } else if (Name
.starts_with("sse.storeu.") ||
2456 Name
.starts_with("sse2.storeu.") ||
2457 Name
.starts_with("avx.storeu.")) {
2458 Value
*Arg0
= CI
->getArgOperand(0);
2459 Value
*Arg1
= CI
->getArgOperand(1);
2460 Builder
.CreateAlignedStore(Arg1
, Arg0
, Align(1));
2461 } else if (Name
== "avx512.mask.store.ss") {
2462 Value
*Mask
= Builder
.CreateAnd(CI
->getArgOperand(2), Builder
.getInt8(1));
2463 upgradeMaskedStore(Builder
, CI
->getArgOperand(0), CI
->getArgOperand(1),
2465 } else if (Name
.starts_with("avx512.mask.store")) {
2466 // "avx512.mask.storeu." or "avx512.mask.store."
2467 bool Aligned
= Name
[17] != 'u'; // "avx512.mask.storeu".
2468 upgradeMaskedStore(Builder
, CI
->getArgOperand(0), CI
->getArgOperand(1),
2469 CI
->getArgOperand(2), Aligned
);
2470 } else if (Name
.starts_with("sse2.pcmp") || Name
.starts_with("avx2.pcmp")) {
2471 // Upgrade packed integer vector compare intrinsics to compare instructions.
2472 // "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt."
2473 bool CmpEq
= Name
[9] == 'e';
2474 Rep
= Builder
.CreateICmp(CmpEq
? ICmpInst::ICMP_EQ
: ICmpInst::ICMP_SGT
,
2475 CI
->getArgOperand(0), CI
->getArgOperand(1));
2476 Rep
= Builder
.CreateSExt(Rep
, CI
->getType(), "");
2477 } else if (Name
.starts_with("avx512.broadcastm")) {
2478 Type
*ExtTy
= Type::getInt32Ty(C
);
2479 if (CI
->getOperand(0)->getType()->isIntegerTy(8))
2480 ExtTy
= Type::getInt64Ty(C
);
2481 unsigned NumElts
= CI
->getType()->getPrimitiveSizeInBits() /
2482 ExtTy
->getPrimitiveSizeInBits();
2483 Rep
= Builder
.CreateZExt(CI
->getArgOperand(0), ExtTy
);
2484 Rep
= Builder
.CreateVectorSplat(NumElts
, Rep
);
2485 } else if (Name
== "sse.sqrt.ss" || Name
== "sse2.sqrt.sd") {
2486 Value
*Vec
= CI
->getArgOperand(0);
2487 Value
*Elt0
= Builder
.CreateExtractElement(Vec
, (uint64_t)0);
2488 Elt0
= Builder
.CreateIntrinsic(Intrinsic::sqrt
, Elt0
->getType(), Elt0
);
2489 Rep
= Builder
.CreateInsertElement(Vec
, Elt0
, (uint64_t)0);
2490 } else if (Name
.starts_with("avx.sqrt.p") ||
2491 Name
.starts_with("sse2.sqrt.p") ||
2492 Name
.starts_with("sse.sqrt.p")) {
2493 Rep
= Builder
.CreateIntrinsic(Intrinsic::sqrt
, CI
->getType(),
2494 {CI
->getArgOperand(0)});
2495 } else if (Name
.starts_with("avx512.mask.sqrt.p")) {
2496 if (CI
->arg_size() == 4 &&
2497 (!isa
<ConstantInt
>(CI
->getArgOperand(3)) ||
2498 cast
<ConstantInt
>(CI
->getArgOperand(3))->getZExtValue() != 4)) {
2499 Intrinsic::ID IID
= Name
[18] == 's' ? Intrinsic::x86_avx512_sqrt_ps_512
2500 : Intrinsic::x86_avx512_sqrt_pd_512
;
2502 Value
*Args
[] = {CI
->getArgOperand(0), CI
->getArgOperand(3)};
2503 Rep
= Builder
.CreateIntrinsic(IID
, {}, Args
);
2505 Rep
= Builder
.CreateIntrinsic(Intrinsic::sqrt
, CI
->getType(),
2506 {CI
->getArgOperand(0)});
2509 emitX86Select(Builder
, CI
->getArgOperand(2), Rep
, CI
->getArgOperand(1));
2510 } else if (Name
.starts_with("avx512.ptestm") ||
2511 Name
.starts_with("avx512.ptestnm")) {
2512 Value
*Op0
= CI
->getArgOperand(0);
2513 Value
*Op1
= CI
->getArgOperand(1);
2514 Value
*Mask
= CI
->getArgOperand(2);
2515 Rep
= Builder
.CreateAnd(Op0
, Op1
);
2516 llvm::Type
*Ty
= Op0
->getType();
2517 Value
*Zero
= llvm::Constant::getNullValue(Ty
);
2518 ICmpInst::Predicate Pred
= Name
.starts_with("avx512.ptestm")
2520 : ICmpInst::ICMP_EQ
;
2521 Rep
= Builder
.CreateICmp(Pred
, Rep
, Zero
);
2522 Rep
= applyX86MaskOn1BitsVec(Builder
, Rep
, Mask
);
2523 } else if (Name
.starts_with("avx512.mask.pbroadcast")) {
2524 unsigned NumElts
= cast
<FixedVectorType
>(CI
->getArgOperand(1)->getType())
2526 Rep
= Builder
.CreateVectorSplat(NumElts
, CI
->getArgOperand(0));
2528 emitX86Select(Builder
, CI
->getArgOperand(2), Rep
, CI
->getArgOperand(1));
2529 } else if (Name
.starts_with("avx512.kunpck")) {
2530 unsigned NumElts
= CI
->getType()->getScalarSizeInBits();
2531 Value
*LHS
= getX86MaskVec(Builder
, CI
->getArgOperand(0), NumElts
);
2532 Value
*RHS
= getX86MaskVec(Builder
, CI
->getArgOperand(1), NumElts
);
2534 for (unsigned i
= 0; i
!= NumElts
; ++i
)
2537 // First extract half of each vector. This gives better codegen than
2538 // doing it in a single shuffle.
2539 LHS
= Builder
.CreateShuffleVector(LHS
, LHS
, ArrayRef(Indices
, NumElts
/ 2));
2540 RHS
= Builder
.CreateShuffleVector(RHS
, RHS
, ArrayRef(Indices
, NumElts
/ 2));
2541 // Concat the vectors.
2542 // NOTE: Operands have to be swapped to match intrinsic definition.
2543 Rep
= Builder
.CreateShuffleVector(RHS
, LHS
, ArrayRef(Indices
, NumElts
));
2544 Rep
= Builder
.CreateBitCast(Rep
, CI
->getType());
2545 } else if (Name
== "avx512.kand.w") {
2546 Value
*LHS
= getX86MaskVec(Builder
, CI
->getArgOperand(0), 16);
2547 Value
*RHS
= getX86MaskVec(Builder
, CI
->getArgOperand(1), 16);
2548 Rep
= Builder
.CreateAnd(LHS
, RHS
);
2549 Rep
= Builder
.CreateBitCast(Rep
, CI
->getType());
2550 } else if (Name
== "avx512.kandn.w") {
2551 Value
*LHS
= getX86MaskVec(Builder
, CI
->getArgOperand(0), 16);
2552 Value
*RHS
= getX86MaskVec(Builder
, CI
->getArgOperand(1), 16);
2553 LHS
= Builder
.CreateNot(LHS
);
2554 Rep
= Builder
.CreateAnd(LHS
, RHS
);
2555 Rep
= Builder
.CreateBitCast(Rep
, CI
->getType());
2556 } else if (Name
== "avx512.kor.w") {
2557 Value
*LHS
= getX86MaskVec(Builder
, CI
->getArgOperand(0), 16);
2558 Value
*RHS
= getX86MaskVec(Builder
, CI
->getArgOperand(1), 16);
2559 Rep
= Builder
.CreateOr(LHS
, RHS
);
2560 Rep
= Builder
.CreateBitCast(Rep
, CI
->getType());
2561 } else if (Name
== "avx512.kxor.w") {
2562 Value
*LHS
= getX86MaskVec(Builder
, CI
->getArgOperand(0), 16);
2563 Value
*RHS
= getX86MaskVec(Builder
, CI
->getArgOperand(1), 16);
2564 Rep
= Builder
.CreateXor(LHS
, RHS
);
2565 Rep
= Builder
.CreateBitCast(Rep
, CI
->getType());
2566 } else if (Name
== "avx512.kxnor.w") {
2567 Value
*LHS
= getX86MaskVec(Builder
, CI
->getArgOperand(0), 16);
2568 Value
*RHS
= getX86MaskVec(Builder
, CI
->getArgOperand(1), 16);
2569 LHS
= Builder
.CreateNot(LHS
);
2570 Rep
= Builder
.CreateXor(LHS
, RHS
);
2571 Rep
= Builder
.CreateBitCast(Rep
, CI
->getType());
2572 } else if (Name
== "avx512.knot.w") {
2573 Rep
= getX86MaskVec(Builder
, CI
->getArgOperand(0), 16);
2574 Rep
= Builder
.CreateNot(Rep
);
2575 Rep
= Builder
.CreateBitCast(Rep
, CI
->getType());
2576 } else if (Name
== "avx512.kortestz.w" || Name
== "avx512.kortestc.w") {
2577 Value
*LHS
= getX86MaskVec(Builder
, CI
->getArgOperand(0), 16);
2578 Value
*RHS
= getX86MaskVec(Builder
, CI
->getArgOperand(1), 16);
2579 Rep
= Builder
.CreateOr(LHS
, RHS
);
2580 Rep
= Builder
.CreateBitCast(Rep
, Builder
.getInt16Ty());
2582 if (Name
[14] == 'c')
2583 C
= ConstantInt::getAllOnesValue(Builder
.getInt16Ty());
2585 C
= ConstantInt::getNullValue(Builder
.getInt16Ty());
2586 Rep
= Builder
.CreateICmpEQ(Rep
, C
);
2587 Rep
= Builder
.CreateZExt(Rep
, Builder
.getInt32Ty());
2588 } else if (Name
== "sse.add.ss" || Name
== "sse2.add.sd" ||
2589 Name
== "sse.sub.ss" || Name
== "sse2.sub.sd" ||
2590 Name
== "sse.mul.ss" || Name
== "sse2.mul.sd" ||
2591 Name
== "sse.div.ss" || Name
== "sse2.div.sd") {
2592 Type
*I32Ty
= Type::getInt32Ty(C
);
2593 Value
*Elt0
= Builder
.CreateExtractElement(CI
->getArgOperand(0),
2594 ConstantInt::get(I32Ty
, 0));
2595 Value
*Elt1
= Builder
.CreateExtractElement(CI
->getArgOperand(1),
2596 ConstantInt::get(I32Ty
, 0));
2598 if (Name
.contains(".add."))
2599 EltOp
= Builder
.CreateFAdd(Elt0
, Elt1
);
2600 else if (Name
.contains(".sub."))
2601 EltOp
= Builder
.CreateFSub(Elt0
, Elt1
);
2602 else if (Name
.contains(".mul."))
2603 EltOp
= Builder
.CreateFMul(Elt0
, Elt1
);
2605 EltOp
= Builder
.CreateFDiv(Elt0
, Elt1
);
2606 Rep
= Builder
.CreateInsertElement(CI
->getArgOperand(0), EltOp
,
2607 ConstantInt::get(I32Ty
, 0));
2608 } else if (Name
.starts_with("avx512.mask.pcmp")) {
2609 // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt."
2610 bool CmpEq
= Name
[16] == 'e';
2611 Rep
= upgradeMaskedCompare(Builder
, *CI
, CmpEq
? 0 : 6, true);
2612 } else if (Name
.starts_with("avx512.mask.vpshufbitqmb.")) {
2613 Type
*OpTy
= CI
->getArgOperand(0)->getType();
2614 unsigned VecWidth
= OpTy
->getPrimitiveSizeInBits();
2618 llvm_unreachable("Unexpected intrinsic");
2620 IID
= Intrinsic::x86_avx512_vpshufbitqmb_128
;
2623 IID
= Intrinsic::x86_avx512_vpshufbitqmb_256
;
2626 IID
= Intrinsic::x86_avx512_vpshufbitqmb_512
;
2630 Rep
= Builder
.CreateIntrinsic(IID
, {},
2631 {CI
->getOperand(0), CI
->getArgOperand(1)});
2632 Rep
= applyX86MaskOn1BitsVec(Builder
, Rep
, CI
->getArgOperand(2));
2633 } else if (Name
.starts_with("avx512.mask.fpclass.p")) {
2634 Type
*OpTy
= CI
->getArgOperand(0)->getType();
2635 unsigned VecWidth
= OpTy
->getPrimitiveSizeInBits();
2636 unsigned EltWidth
= OpTy
->getScalarSizeInBits();
2638 if (VecWidth
== 128 && EltWidth
== 32)
2639 IID
= Intrinsic::x86_avx512_fpclass_ps_128
;
2640 else if (VecWidth
== 256 && EltWidth
== 32)
2641 IID
= Intrinsic::x86_avx512_fpclass_ps_256
;
2642 else if (VecWidth
== 512 && EltWidth
== 32)
2643 IID
= Intrinsic::x86_avx512_fpclass_ps_512
;
2644 else if (VecWidth
== 128 && EltWidth
== 64)
2645 IID
= Intrinsic::x86_avx512_fpclass_pd_128
;
2646 else if (VecWidth
== 256 && EltWidth
== 64)
2647 IID
= Intrinsic::x86_avx512_fpclass_pd_256
;
2648 else if (VecWidth
== 512 && EltWidth
== 64)
2649 IID
= Intrinsic::x86_avx512_fpclass_pd_512
;
2651 llvm_unreachable("Unexpected intrinsic");
2653 Rep
= Builder
.CreateIntrinsic(IID
, {},
2654 {CI
->getOperand(0), CI
->getArgOperand(1)});
2655 Rep
= applyX86MaskOn1BitsVec(Builder
, Rep
, CI
->getArgOperand(2));
2656 } else if (Name
.starts_with("avx512.cmp.p")) {
2657 SmallVector
<Value
*, 4> Args(CI
->args());
2658 Type
*OpTy
= Args
[0]->getType();
2659 unsigned VecWidth
= OpTy
->getPrimitiveSizeInBits();
2660 unsigned EltWidth
= OpTy
->getScalarSizeInBits();
2662 if (VecWidth
== 128 && EltWidth
== 32)
2663 IID
= Intrinsic::x86_avx512_mask_cmp_ps_128
;
2664 else if (VecWidth
== 256 && EltWidth
== 32)
2665 IID
= Intrinsic::x86_avx512_mask_cmp_ps_256
;
2666 else if (VecWidth
== 512 && EltWidth
== 32)
2667 IID
= Intrinsic::x86_avx512_mask_cmp_ps_512
;
2668 else if (VecWidth
== 128 && EltWidth
== 64)
2669 IID
= Intrinsic::x86_avx512_mask_cmp_pd_128
;
2670 else if (VecWidth
== 256 && EltWidth
== 64)
2671 IID
= Intrinsic::x86_avx512_mask_cmp_pd_256
;
2672 else if (VecWidth
== 512 && EltWidth
== 64)
2673 IID
= Intrinsic::x86_avx512_mask_cmp_pd_512
;
2675 llvm_unreachable("Unexpected intrinsic");
2677 Value
*Mask
= Constant::getAllOnesValue(CI
->getType());
2678 if (VecWidth
== 512)
2679 std::swap(Mask
, Args
.back());
2680 Args
.push_back(Mask
);
2682 Rep
= Builder
.CreateIntrinsic(IID
, {}, Args
);
2683 } else if (Name
.starts_with("avx512.mask.cmp.")) {
2684 // Integer compare intrinsics.
2685 unsigned Imm
= cast
<ConstantInt
>(CI
->getArgOperand(2))->getZExtValue();
2686 Rep
= upgradeMaskedCompare(Builder
, *CI
, Imm
, true);
2687 } else if (Name
.starts_with("avx512.mask.ucmp.")) {
2688 unsigned Imm
= cast
<ConstantInt
>(CI
->getArgOperand(2))->getZExtValue();
2689 Rep
= upgradeMaskedCompare(Builder
, *CI
, Imm
, false);
2690 } else if (Name
.starts_with("avx512.cvtb2mask.") ||
2691 Name
.starts_with("avx512.cvtw2mask.") ||
2692 Name
.starts_with("avx512.cvtd2mask.") ||
2693 Name
.starts_with("avx512.cvtq2mask.")) {
2694 Value
*Op
= CI
->getArgOperand(0);
2695 Value
*Zero
= llvm::Constant::getNullValue(Op
->getType());
2696 Rep
= Builder
.CreateICmp(ICmpInst::ICMP_SLT
, Op
, Zero
);
2697 Rep
= applyX86MaskOn1BitsVec(Builder
, Rep
, nullptr);
2698 } else if (Name
== "ssse3.pabs.b.128" || Name
== "ssse3.pabs.w.128" ||
2699 Name
== "ssse3.pabs.d.128" || Name
.starts_with("avx2.pabs") ||
2700 Name
.starts_with("avx512.mask.pabs")) {
2701 Rep
= upgradeAbs(Builder
, *CI
);
2702 } else if (Name
== "sse41.pmaxsb" || Name
== "sse2.pmaxs.w" ||
2703 Name
== "sse41.pmaxsd" || Name
.starts_with("avx2.pmaxs") ||
2704 Name
.starts_with("avx512.mask.pmaxs")) {
2705 Rep
= upgradeX86BinaryIntrinsics(Builder
, *CI
, Intrinsic::smax
);
2706 } else if (Name
== "sse2.pmaxu.b" || Name
== "sse41.pmaxuw" ||
2707 Name
== "sse41.pmaxud" || Name
.starts_with("avx2.pmaxu") ||
2708 Name
.starts_with("avx512.mask.pmaxu")) {
2709 Rep
= upgradeX86BinaryIntrinsics(Builder
, *CI
, Intrinsic::umax
);
2710 } else if (Name
== "sse41.pminsb" || Name
== "sse2.pmins.w" ||
2711 Name
== "sse41.pminsd" || Name
.starts_with("avx2.pmins") ||
2712 Name
.starts_with("avx512.mask.pmins")) {
2713 Rep
= upgradeX86BinaryIntrinsics(Builder
, *CI
, Intrinsic::smin
);
2714 } else if (Name
== "sse2.pminu.b" || Name
== "sse41.pminuw" ||
2715 Name
== "sse41.pminud" || Name
.starts_with("avx2.pminu") ||
2716 Name
.starts_with("avx512.mask.pminu")) {
2717 Rep
= upgradeX86BinaryIntrinsics(Builder
, *CI
, Intrinsic::umin
);
2718 } else if (Name
== "sse2.pmulu.dq" || Name
== "avx2.pmulu.dq" ||
2719 Name
== "avx512.pmulu.dq.512" ||
2720 Name
.starts_with("avx512.mask.pmulu.dq.")) {
2721 Rep
= upgradePMULDQ(Builder
, *CI
, /*Signed*/ false);
2722 } else if (Name
== "sse41.pmuldq" || Name
== "avx2.pmul.dq" ||
2723 Name
== "avx512.pmul.dq.512" ||
2724 Name
.starts_with("avx512.mask.pmul.dq.")) {
2725 Rep
= upgradePMULDQ(Builder
, *CI
, /*Signed*/ true);
2726 } else if (Name
== "sse.cvtsi2ss" || Name
== "sse2.cvtsi2sd" ||
2727 Name
== "sse.cvtsi642ss" || Name
== "sse2.cvtsi642sd") {
2729 Builder
.CreateSIToFP(CI
->getArgOperand(1),
2730 cast
<VectorType
>(CI
->getType())->getElementType());
2731 Rep
= Builder
.CreateInsertElement(CI
->getArgOperand(0), Rep
, (uint64_t)0);
2732 } else if (Name
== "avx512.cvtusi2sd") {
2734 Builder
.CreateUIToFP(CI
->getArgOperand(1),
2735 cast
<VectorType
>(CI
->getType())->getElementType());
2736 Rep
= Builder
.CreateInsertElement(CI
->getArgOperand(0), Rep
, (uint64_t)0);
2737 } else if (Name
== "sse2.cvtss2sd") {
2738 Rep
= Builder
.CreateExtractElement(CI
->getArgOperand(1), (uint64_t)0);
2739 Rep
= Builder
.CreateFPExt(
2740 Rep
, cast
<VectorType
>(CI
->getType())->getElementType());
2741 Rep
= Builder
.CreateInsertElement(CI
->getArgOperand(0), Rep
, (uint64_t)0);
2742 } else if (Name
== "sse2.cvtdq2pd" || Name
== "sse2.cvtdq2ps" ||
2743 Name
== "avx.cvtdq2.pd.256" || Name
== "avx.cvtdq2.ps.256" ||
2744 Name
.starts_with("avx512.mask.cvtdq2pd.") ||
2745 Name
.starts_with("avx512.mask.cvtudq2pd.") ||
2746 Name
.starts_with("avx512.mask.cvtdq2ps.") ||
2747 Name
.starts_with("avx512.mask.cvtudq2ps.") ||
2748 Name
.starts_with("avx512.mask.cvtqq2pd.") ||
2749 Name
.starts_with("avx512.mask.cvtuqq2pd.") ||
2750 Name
== "avx512.mask.cvtqq2ps.256" ||
2751 Name
== "avx512.mask.cvtqq2ps.512" ||
2752 Name
== "avx512.mask.cvtuqq2ps.256" ||
2753 Name
== "avx512.mask.cvtuqq2ps.512" || Name
== "sse2.cvtps2pd" ||
2754 Name
== "avx.cvt.ps2.pd.256" ||
2755 Name
== "avx512.mask.cvtps2pd.128" ||
2756 Name
== "avx512.mask.cvtps2pd.256") {
2757 auto *DstTy
= cast
<FixedVectorType
>(CI
->getType());
2758 Rep
= CI
->getArgOperand(0);
2759 auto *SrcTy
= cast
<FixedVectorType
>(Rep
->getType());
2761 unsigned NumDstElts
= DstTy
->getNumElements();
2762 if (NumDstElts
< SrcTy
->getNumElements()) {
2763 assert(NumDstElts
== 2 && "Unexpected vector size");
2764 Rep
= Builder
.CreateShuffleVector(Rep
, Rep
, ArrayRef
<int>{0, 1});
2767 bool IsPS2PD
= SrcTy
->getElementType()->isFloatTy();
2768 bool IsUnsigned
= Name
.contains("cvtu");
2770 Rep
= Builder
.CreateFPExt(Rep
, DstTy
, "cvtps2pd");
2771 else if (CI
->arg_size() == 4 &&
2772 (!isa
<ConstantInt
>(CI
->getArgOperand(3)) ||
2773 cast
<ConstantInt
>(CI
->getArgOperand(3))->getZExtValue() != 4)) {
2774 Intrinsic::ID IID
= IsUnsigned
? Intrinsic::x86_avx512_uitofp_round
2775 : Intrinsic::x86_avx512_sitofp_round
;
2776 Rep
= Builder
.CreateIntrinsic(IID
, {DstTy
, SrcTy
},
2777 {Rep
, CI
->getArgOperand(3)});
2779 Rep
= IsUnsigned
? Builder
.CreateUIToFP(Rep
, DstTy
, "cvt")
2780 : Builder
.CreateSIToFP(Rep
, DstTy
, "cvt");
2783 if (CI
->arg_size() >= 3)
2784 Rep
= emitX86Select(Builder
, CI
->getArgOperand(2), Rep
,
2785 CI
->getArgOperand(1));
2786 } else if (Name
.starts_with("avx512.mask.vcvtph2ps.") ||
2787 Name
.starts_with("vcvtph2ps.")) {
2788 auto *DstTy
= cast
<FixedVectorType
>(CI
->getType());
2789 Rep
= CI
->getArgOperand(0);
2790 auto *SrcTy
= cast
<FixedVectorType
>(Rep
->getType());
2791 unsigned NumDstElts
= DstTy
->getNumElements();
2792 if (NumDstElts
!= SrcTy
->getNumElements()) {
2793 assert(NumDstElts
== 4 && "Unexpected vector size");
2794 Rep
= Builder
.CreateShuffleVector(Rep
, Rep
, ArrayRef
<int>{0, 1, 2, 3});
2796 Rep
= Builder
.CreateBitCast(
2797 Rep
, FixedVectorType::get(Type::getHalfTy(C
), NumDstElts
));
2798 Rep
= Builder
.CreateFPExt(Rep
, DstTy
, "cvtph2ps");
2799 if (CI
->arg_size() >= 3)
2800 Rep
= emitX86Select(Builder
, CI
->getArgOperand(2), Rep
,
2801 CI
->getArgOperand(1));
2802 } else if (Name
.starts_with("avx512.mask.load")) {
2803 // "avx512.mask.loadu." or "avx512.mask.load."
2804 bool Aligned
= Name
[16] != 'u'; // "avx512.mask.loadu".
2805 Rep
= upgradeMaskedLoad(Builder
, CI
->getArgOperand(0), CI
->getArgOperand(1),
2806 CI
->getArgOperand(2), Aligned
);
2807 } else if (Name
.starts_with("avx512.mask.expand.load.")) {
2808 auto *ResultTy
= cast
<FixedVectorType
>(CI
->getType());
2809 Value
*MaskVec
= getX86MaskVec(Builder
, CI
->getArgOperand(2),
2810 ResultTy
->getNumElements());
2812 Rep
= Builder
.CreateIntrinsic(
2813 Intrinsic::masked_expandload
, ResultTy
,
2814 {CI
->getOperand(0), MaskVec
, CI
->getOperand(1)});
2815 } else if (Name
.starts_with("avx512.mask.compress.store.")) {
2816 auto *ResultTy
= cast
<VectorType
>(CI
->getArgOperand(1)->getType());
2818 getX86MaskVec(Builder
, CI
->getArgOperand(2),
2819 cast
<FixedVectorType
>(ResultTy
)->getNumElements());
2821 Rep
= Builder
.CreateIntrinsic(
2822 Intrinsic::masked_compressstore
, ResultTy
,
2823 {CI
->getArgOperand(1), CI
->getArgOperand(0), MaskVec
});
2824 } else if (Name
.starts_with("avx512.mask.compress.") ||
2825 Name
.starts_with("avx512.mask.expand.")) {
2826 auto *ResultTy
= cast
<FixedVectorType
>(CI
->getType());
2828 Value
*MaskVec
= getX86MaskVec(Builder
, CI
->getArgOperand(2),
2829 ResultTy
->getNumElements());
2831 bool IsCompress
= Name
[12] == 'c';
2832 Intrinsic::ID IID
= IsCompress
? Intrinsic::x86_avx512_mask_compress
2833 : Intrinsic::x86_avx512_mask_expand
;
2834 Rep
= Builder
.CreateIntrinsic(
2835 IID
, ResultTy
, {CI
->getOperand(0), CI
->getOperand(1), MaskVec
});
2836 } else if (Name
.starts_with("xop.vpcom")) {
2838 if (Name
.ends_with("ub") || Name
.ends_with("uw") || Name
.ends_with("ud") ||
2839 Name
.ends_with("uq"))
2841 else if (Name
.ends_with("b") || Name
.ends_with("w") ||
2842 Name
.ends_with("d") || Name
.ends_with("q"))
2845 llvm_unreachable("Unknown suffix");
2848 if (CI
->arg_size() == 3) {
2849 Imm
= cast
<ConstantInt
>(CI
->getArgOperand(2))->getZExtValue();
2851 Name
= Name
.substr(9); // strip off "xop.vpcom"
2852 if (Name
.starts_with("lt"))
2854 else if (Name
.starts_with("le"))
2856 else if (Name
.starts_with("gt"))
2858 else if (Name
.starts_with("ge"))
2860 else if (Name
.starts_with("eq"))
2862 else if (Name
.starts_with("ne"))
2864 else if (Name
.starts_with("false"))
2866 else if (Name
.starts_with("true"))
2869 llvm_unreachable("Unknown condition");
2872 Rep
= upgradeX86vpcom(Builder
, *CI
, Imm
, IsSigned
);
2873 } else if (Name
.starts_with("xop.vpcmov")) {
2874 Value
*Sel
= CI
->getArgOperand(2);
2875 Value
*NotSel
= Builder
.CreateNot(Sel
);
2876 Value
*Sel0
= Builder
.CreateAnd(CI
->getArgOperand(0), Sel
);
2877 Value
*Sel1
= Builder
.CreateAnd(CI
->getArgOperand(1), NotSel
);
2878 Rep
= Builder
.CreateOr(Sel0
, Sel1
);
2879 } else if (Name
.starts_with("xop.vprot") || Name
.starts_with("avx512.prol") ||
2880 Name
.starts_with("avx512.mask.prol")) {
2881 Rep
= upgradeX86Rotate(Builder
, *CI
, false);
2882 } else if (Name
.starts_with("avx512.pror") ||
2883 Name
.starts_with("avx512.mask.pror")) {
2884 Rep
= upgradeX86Rotate(Builder
, *CI
, true);
2885 } else if (Name
.starts_with("avx512.vpshld.") ||
2886 Name
.starts_with("avx512.mask.vpshld") ||
2887 Name
.starts_with("avx512.maskz.vpshld")) {
2888 bool ZeroMask
= Name
[11] == 'z';
2889 Rep
= upgradeX86ConcatShift(Builder
, *CI
, false, ZeroMask
);
2890 } else if (Name
.starts_with("avx512.vpshrd.") ||
2891 Name
.starts_with("avx512.mask.vpshrd") ||
2892 Name
.starts_with("avx512.maskz.vpshrd")) {
2893 bool ZeroMask
= Name
[11] == 'z';
2894 Rep
= upgradeX86ConcatShift(Builder
, *CI
, true, ZeroMask
);
2895 } else if (Name
== "sse42.crc32.64.8") {
2897 Builder
.CreateTrunc(CI
->getArgOperand(0), Type::getInt32Ty(C
));
2898 Rep
= Builder
.CreateIntrinsic(Intrinsic::x86_sse42_crc32_32_8
, {},
2899 {Trunc0
, CI
->getArgOperand(1)});
2900 Rep
= Builder
.CreateZExt(Rep
, CI
->getType(), "");
2901 } else if (Name
.starts_with("avx.vbroadcast.s") ||
2902 Name
.starts_with("avx512.vbroadcast.s")) {
2903 // Replace broadcasts with a series of insertelements.
2904 auto *VecTy
= cast
<FixedVectorType
>(CI
->getType());
2905 Type
*EltTy
= VecTy
->getElementType();
2906 unsigned EltNum
= VecTy
->getNumElements();
2907 Value
*Load
= Builder
.CreateLoad(EltTy
, CI
->getArgOperand(0));
2908 Type
*I32Ty
= Type::getInt32Ty(C
);
2909 Rep
= PoisonValue::get(VecTy
);
2910 for (unsigned I
= 0; I
< EltNum
; ++I
)
2911 Rep
= Builder
.CreateInsertElement(Rep
, Load
, ConstantInt::get(I32Ty
, I
));
2912 } else if (Name
.starts_with("sse41.pmovsx") ||
2913 Name
.starts_with("sse41.pmovzx") ||
2914 Name
.starts_with("avx2.pmovsx") ||
2915 Name
.starts_with("avx2.pmovzx") ||
2916 Name
.starts_with("avx512.mask.pmovsx") ||
2917 Name
.starts_with("avx512.mask.pmovzx")) {
2918 auto *DstTy
= cast
<FixedVectorType
>(CI
->getType());
2919 unsigned NumDstElts
= DstTy
->getNumElements();
2921 // Extract a subvector of the first NumDstElts lanes and sign/zero extend.
2922 SmallVector
<int, 8> ShuffleMask(NumDstElts
);
2923 for (unsigned i
= 0; i
!= NumDstElts
; ++i
)
2926 Value
*SV
= Builder
.CreateShuffleVector(CI
->getArgOperand(0), ShuffleMask
);
2928 bool DoSext
= Name
.contains("pmovsx");
2930 DoSext
? Builder
.CreateSExt(SV
, DstTy
) : Builder
.CreateZExt(SV
, DstTy
);
2931 // If there are 3 arguments, it's a masked intrinsic so we need a select.
2932 if (CI
->arg_size() == 3)
2933 Rep
= emitX86Select(Builder
, CI
->getArgOperand(2), Rep
,
2934 CI
->getArgOperand(1));
2935 } else if (Name
== "avx512.mask.pmov.qd.256" ||
2936 Name
== "avx512.mask.pmov.qd.512" ||
2937 Name
== "avx512.mask.pmov.wb.256" ||
2938 Name
== "avx512.mask.pmov.wb.512") {
2939 Type
*Ty
= CI
->getArgOperand(1)->getType();
2940 Rep
= Builder
.CreateTrunc(CI
->getArgOperand(0), Ty
);
2942 emitX86Select(Builder
, CI
->getArgOperand(2), Rep
, CI
->getArgOperand(1));
2943 } else if (Name
.starts_with("avx.vbroadcastf128") ||
2944 Name
== "avx2.vbroadcasti128") {
2945 // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.
2946 Type
*EltTy
= cast
<VectorType
>(CI
->getType())->getElementType();
2947 unsigned NumSrcElts
= 128 / EltTy
->getPrimitiveSizeInBits();
2948 auto *VT
= FixedVectorType::get(EltTy
, NumSrcElts
);
2949 Value
*Load
= Builder
.CreateAlignedLoad(VT
, CI
->getArgOperand(0), Align(1));
2950 if (NumSrcElts
== 2)
2951 Rep
= Builder
.CreateShuffleVector(Load
, ArrayRef
<int>{0, 1, 0, 1});
2953 Rep
= Builder
.CreateShuffleVector(Load
,
2954 ArrayRef
<int>{0, 1, 2, 3, 0, 1, 2, 3});
2955 } else if (Name
.starts_with("avx512.mask.shuf.i") ||
2956 Name
.starts_with("avx512.mask.shuf.f")) {
2957 unsigned Imm
= cast
<ConstantInt
>(CI
->getArgOperand(2))->getZExtValue();
2958 Type
*VT
= CI
->getType();
2959 unsigned NumLanes
= VT
->getPrimitiveSizeInBits() / 128;
2960 unsigned NumElementsInLane
= 128 / VT
->getScalarSizeInBits();
2961 unsigned ControlBitsMask
= NumLanes
- 1;
2962 unsigned NumControlBits
= NumLanes
/ 2;
2963 SmallVector
<int, 8> ShuffleMask(0);
2965 for (unsigned l
= 0; l
!= NumLanes
; ++l
) {
2966 unsigned LaneMask
= (Imm
>> (l
* NumControlBits
)) & ControlBitsMask
;
2967 // We actually need the other source.
2968 if (l
>= NumLanes
/ 2)
2969 LaneMask
+= NumLanes
;
2970 for (unsigned i
= 0; i
!= NumElementsInLane
; ++i
)
2971 ShuffleMask
.push_back(LaneMask
* NumElementsInLane
+ i
);
2973 Rep
= Builder
.CreateShuffleVector(CI
->getArgOperand(0),
2974 CI
->getArgOperand(1), ShuffleMask
);
2976 emitX86Select(Builder
, CI
->getArgOperand(4), Rep
, CI
->getArgOperand(3));
2977 } else if (Name
.starts_with("avx512.mask.broadcastf") ||
2978 Name
.starts_with("avx512.mask.broadcasti")) {
2979 unsigned NumSrcElts
= cast
<FixedVectorType
>(CI
->getArgOperand(0)->getType())
2981 unsigned NumDstElts
=
2982 cast
<FixedVectorType
>(CI
->getType())->getNumElements();
2984 SmallVector
<int, 8> ShuffleMask(NumDstElts
);
2985 for (unsigned i
= 0; i
!= NumDstElts
; ++i
)
2986 ShuffleMask
[i
] = i
% NumSrcElts
;
2988 Rep
= Builder
.CreateShuffleVector(CI
->getArgOperand(0),
2989 CI
->getArgOperand(0), ShuffleMask
);
2991 emitX86Select(Builder
, CI
->getArgOperand(2), Rep
, CI
->getArgOperand(1));
2992 } else if (Name
.starts_with("avx2.pbroadcast") ||
2993 Name
.starts_with("avx2.vbroadcast") ||
2994 Name
.starts_with("avx512.pbroadcast") ||
2995 Name
.starts_with("avx512.mask.broadcast.s")) {
2996 // Replace vp?broadcasts with a vector shuffle.
2997 Value
*Op
= CI
->getArgOperand(0);
2998 ElementCount EC
= cast
<VectorType
>(CI
->getType())->getElementCount();
2999 Type
*MaskTy
= VectorType::get(Type::getInt32Ty(C
), EC
);
3000 SmallVector
<int, 8> M
;
3001 ShuffleVectorInst::getShuffleMask(Constant::getNullValue(MaskTy
), M
);
3002 Rep
= Builder
.CreateShuffleVector(Op
, M
);
3004 if (CI
->arg_size() == 3)
3005 Rep
= emitX86Select(Builder
, CI
->getArgOperand(2), Rep
,
3006 CI
->getArgOperand(1));
3007 } else if (Name
.starts_with("sse2.padds.") ||
3008 Name
.starts_with("avx2.padds.") ||
3009 Name
.starts_with("avx512.padds.") ||
3010 Name
.starts_with("avx512.mask.padds.")) {
3011 Rep
= upgradeX86BinaryIntrinsics(Builder
, *CI
, Intrinsic::sadd_sat
);
3012 } else if (Name
.starts_with("sse2.psubs.") ||
3013 Name
.starts_with("avx2.psubs.") ||
3014 Name
.starts_with("avx512.psubs.") ||
3015 Name
.starts_with("avx512.mask.psubs.")) {
3016 Rep
= upgradeX86BinaryIntrinsics(Builder
, *CI
, Intrinsic::ssub_sat
);
3017 } else if (Name
.starts_with("sse2.paddus.") ||
3018 Name
.starts_with("avx2.paddus.") ||
3019 Name
.starts_with("avx512.mask.paddus.")) {
3020 Rep
= upgradeX86BinaryIntrinsics(Builder
, *CI
, Intrinsic::uadd_sat
);
3021 } else if (Name
.starts_with("sse2.psubus.") ||
3022 Name
.starts_with("avx2.psubus.") ||
3023 Name
.starts_with("avx512.mask.psubus.")) {
3024 Rep
= upgradeX86BinaryIntrinsics(Builder
, *CI
, Intrinsic::usub_sat
);
3025 } else if (Name
.starts_with("avx512.mask.palignr.")) {
3026 Rep
= upgradeX86ALIGNIntrinsics(Builder
, CI
->getArgOperand(0),
3027 CI
->getArgOperand(1), CI
->getArgOperand(2),
3028 CI
->getArgOperand(3), CI
->getArgOperand(4),
3030 } else if (Name
.starts_with("avx512.mask.valign.")) {
3031 Rep
= upgradeX86ALIGNIntrinsics(
3032 Builder
, CI
->getArgOperand(0), CI
->getArgOperand(1),
3033 CI
->getArgOperand(2), CI
->getArgOperand(3), CI
->getArgOperand(4), true);
3034 } else if (Name
== "sse2.psll.dq" || Name
== "avx2.psll.dq") {
3035 // 128/256-bit shift left specified in bits.
3036 unsigned Shift
= cast
<ConstantInt
>(CI
->getArgOperand(1))->getZExtValue();
3037 Rep
= upgradeX86PSLLDQIntrinsics(Builder
, CI
->getArgOperand(0),
3038 Shift
/ 8); // Shift is in bits.
3039 } else if (Name
== "sse2.psrl.dq" || Name
== "avx2.psrl.dq") {
3040 // 128/256-bit shift right specified in bits.
3041 unsigned Shift
= cast
<ConstantInt
>(CI
->getArgOperand(1))->getZExtValue();
3042 Rep
= upgradeX86PSRLDQIntrinsics(Builder
, CI
->getArgOperand(0),
3043 Shift
/ 8); // Shift is in bits.
3044 } else if (Name
== "sse2.psll.dq.bs" || Name
== "avx2.psll.dq.bs" ||
3045 Name
== "avx512.psll.dq.512") {
3046 // 128/256/512-bit shift left specified in bytes.
3047 unsigned Shift
= cast
<ConstantInt
>(CI
->getArgOperand(1))->getZExtValue();
3048 Rep
= upgradeX86PSLLDQIntrinsics(Builder
, CI
->getArgOperand(0), Shift
);
3049 } else if (Name
== "sse2.psrl.dq.bs" || Name
== "avx2.psrl.dq.bs" ||
3050 Name
== "avx512.psrl.dq.512") {
3051 // 128/256/512-bit shift right specified in bytes.
3052 unsigned Shift
= cast
<ConstantInt
>(CI
->getArgOperand(1))->getZExtValue();
3053 Rep
= upgradeX86PSRLDQIntrinsics(Builder
, CI
->getArgOperand(0), Shift
);
3054 } else if (Name
== "sse41.pblendw" || Name
.starts_with("sse41.blendp") ||
3055 Name
.starts_with("avx.blend.p") || Name
== "avx2.pblendw" ||
3056 Name
.starts_with("avx2.pblendd.")) {
3057 Value
*Op0
= CI
->getArgOperand(0);
3058 Value
*Op1
= CI
->getArgOperand(1);
3059 unsigned Imm
= cast
<ConstantInt
>(CI
->getArgOperand(2))->getZExtValue();
3060 auto *VecTy
= cast
<FixedVectorType
>(CI
->getType());
3061 unsigned NumElts
= VecTy
->getNumElements();
3063 SmallVector
<int, 16> Idxs(NumElts
);
3064 for (unsigned i
= 0; i
!= NumElts
; ++i
)
3065 Idxs
[i
] = ((Imm
>> (i
% 8)) & 1) ? i
+ NumElts
: i
;
3067 Rep
= Builder
.CreateShuffleVector(Op0
, Op1
, Idxs
);
3068 } else if (Name
.starts_with("avx.vinsertf128.") ||
3069 Name
== "avx2.vinserti128" ||
3070 Name
.starts_with("avx512.mask.insert")) {
3071 Value
*Op0
= CI
->getArgOperand(0);
3072 Value
*Op1
= CI
->getArgOperand(1);
3073 unsigned Imm
= cast
<ConstantInt
>(CI
->getArgOperand(2))->getZExtValue();
3074 unsigned DstNumElts
=
3075 cast
<FixedVectorType
>(CI
->getType())->getNumElements();
3076 unsigned SrcNumElts
=
3077 cast
<FixedVectorType
>(Op1
->getType())->getNumElements();
3078 unsigned Scale
= DstNumElts
/ SrcNumElts
;
3080 // Mask off the high bits of the immediate value; hardware ignores those.
3083 // Extend the second operand into a vector the size of the destination.
3084 SmallVector
<int, 8> Idxs(DstNumElts
);
3085 for (unsigned i
= 0; i
!= SrcNumElts
; ++i
)
3087 for (unsigned i
= SrcNumElts
; i
!= DstNumElts
; ++i
)
3088 Idxs
[i
] = SrcNumElts
;
3089 Rep
= Builder
.CreateShuffleVector(Op1
, Idxs
);
3091 // Insert the second operand into the first operand.
3093 // Note that there is no guarantee that instruction lowering will actually
3094 // produce a vinsertf128 instruction for the created shuffles. In
3095 // particular, the 0 immediate case involves no lane changes, so it can
3096 // be handled as a blend.
3098 // Example of shuffle mask for 32-bit elements:
3099 // Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
3100 // Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 >
3102 // First fill with identify mask.
3103 for (unsigned i
= 0; i
!= DstNumElts
; ++i
)
3105 // Then replace the elements where we need to insert.
3106 for (unsigned i
= 0; i
!= SrcNumElts
; ++i
)
3107 Idxs
[i
+ Imm
* SrcNumElts
] = i
+ DstNumElts
;
3108 Rep
= Builder
.CreateShuffleVector(Op0
, Rep
, Idxs
);
3110 // If the intrinsic has a mask operand, handle that.
3111 if (CI
->arg_size() == 5)
3112 Rep
= emitX86Select(Builder
, CI
->getArgOperand(4), Rep
,
3113 CI
->getArgOperand(3));
3114 } else if (Name
.starts_with("avx.vextractf128.") ||
3115 Name
== "avx2.vextracti128" ||
3116 Name
.starts_with("avx512.mask.vextract")) {
3117 Value
*Op0
= CI
->getArgOperand(0);
3118 unsigned Imm
= cast
<ConstantInt
>(CI
->getArgOperand(1))->getZExtValue();
3119 unsigned DstNumElts
=
3120 cast
<FixedVectorType
>(CI
->getType())->getNumElements();
3121 unsigned SrcNumElts
=
3122 cast
<FixedVectorType
>(Op0
->getType())->getNumElements();
3123 unsigned Scale
= SrcNumElts
/ DstNumElts
;
3125 // Mask off the high bits of the immediate value; hardware ignores those.
3128 // Get indexes for the subvector of the input vector.
3129 SmallVector
<int, 8> Idxs(DstNumElts
);
3130 for (unsigned i
= 0; i
!= DstNumElts
; ++i
) {
3131 Idxs
[i
] = i
+ (Imm
* DstNumElts
);
3133 Rep
= Builder
.CreateShuffleVector(Op0
, Op0
, Idxs
);
3135 // If the intrinsic has a mask operand, handle that.
3136 if (CI
->arg_size() == 4)
3137 Rep
= emitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
3138 CI
->getArgOperand(2));
3139 } else if (Name
.starts_with("avx512.mask.perm.df.") ||
3140 Name
.starts_with("avx512.mask.perm.di.")) {
3141 Value
*Op0
= CI
->getArgOperand(0);
3142 unsigned Imm
= cast
<ConstantInt
>(CI
->getArgOperand(1))->getZExtValue();
3143 auto *VecTy
= cast
<FixedVectorType
>(CI
->getType());
3144 unsigned NumElts
= VecTy
->getNumElements();
3146 SmallVector
<int, 8> Idxs(NumElts
);
3147 for (unsigned i
= 0; i
!= NumElts
; ++i
)
3148 Idxs
[i
] = (i
& ~0x3) + ((Imm
>> (2 * (i
& 0x3))) & 3);
3150 Rep
= Builder
.CreateShuffleVector(Op0
, Op0
, Idxs
);
3152 if (CI
->arg_size() == 4)
3153 Rep
= emitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
3154 CI
->getArgOperand(2));
3155 } else if (Name
.starts_with("avx.vperm2f128.") || Name
== "avx2.vperm2i128") {
3156 // The immediate permute control byte looks like this:
3157 // [1:0] - select 128 bits from sources for low half of destination
3159 // [3] - zero low half of destination
3160 // [5:4] - select 128 bits from sources for high half of destination
3162 // [7] - zero high half of destination
3164 uint8_t Imm
= cast
<ConstantInt
>(CI
->getArgOperand(2))->getZExtValue();
3166 unsigned NumElts
= cast
<FixedVectorType
>(CI
->getType())->getNumElements();
3167 unsigned HalfSize
= NumElts
/ 2;
3168 SmallVector
<int, 8> ShuffleMask(NumElts
);
3170 // Determine which operand(s) are actually in use for this instruction.
3171 Value
*V0
= (Imm
& 0x02) ? CI
->getArgOperand(1) : CI
->getArgOperand(0);
3172 Value
*V1
= (Imm
& 0x20) ? CI
->getArgOperand(1) : CI
->getArgOperand(0);
3174 // If needed, replace operands based on zero mask.
3175 V0
= (Imm
& 0x08) ? ConstantAggregateZero::get(CI
->getType()) : V0
;
3176 V1
= (Imm
& 0x80) ? ConstantAggregateZero::get(CI
->getType()) : V1
;
3178 // Permute low half of result.
3179 unsigned StartIndex
= (Imm
& 0x01) ? HalfSize
: 0;
3180 for (unsigned i
= 0; i
< HalfSize
; ++i
)
3181 ShuffleMask
[i
] = StartIndex
+ i
;
3183 // Permute high half of result.
3184 StartIndex
= (Imm
& 0x10) ? HalfSize
: 0;
3185 for (unsigned i
= 0; i
< HalfSize
; ++i
)
3186 ShuffleMask
[i
+ HalfSize
] = NumElts
+ StartIndex
+ i
;
3188 Rep
= Builder
.CreateShuffleVector(V0
, V1
, ShuffleMask
);
3190 } else if (Name
.starts_with("avx.vpermil.") || Name
== "sse2.pshuf.d" ||
3191 Name
.starts_with("avx512.mask.vpermil.p") ||
3192 Name
.starts_with("avx512.mask.pshuf.d.")) {
3193 Value
*Op0
= CI
->getArgOperand(0);
3194 unsigned Imm
= cast
<ConstantInt
>(CI
->getArgOperand(1))->getZExtValue();
3195 auto *VecTy
= cast
<FixedVectorType
>(CI
->getType());
3196 unsigned NumElts
= VecTy
->getNumElements();
3197 // Calculate the size of each index in the immediate.
3198 unsigned IdxSize
= 64 / VecTy
->getScalarSizeInBits();
3199 unsigned IdxMask
= ((1 << IdxSize
) - 1);
3201 SmallVector
<int, 8> Idxs(NumElts
);
3202 // Lookup the bits for this element, wrapping around the immediate every
3203 // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need
3204 // to offset by the first index of each group.
3205 for (unsigned i
= 0; i
!= NumElts
; ++i
)
3206 Idxs
[i
] = ((Imm
>> ((i
* IdxSize
) % 8)) & IdxMask
) | (i
& ~IdxMask
);
3208 Rep
= Builder
.CreateShuffleVector(Op0
, Op0
, Idxs
);
3210 if (CI
->arg_size() == 4)
3211 Rep
= emitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
3212 CI
->getArgOperand(2));
3213 } else if (Name
== "sse2.pshufl.w" ||
3214 Name
.starts_with("avx512.mask.pshufl.w.")) {
3215 Value
*Op0
= CI
->getArgOperand(0);
3216 unsigned Imm
= cast
<ConstantInt
>(CI
->getArgOperand(1))->getZExtValue();
3217 unsigned NumElts
= cast
<FixedVectorType
>(CI
->getType())->getNumElements();
3219 SmallVector
<int, 16> Idxs(NumElts
);
3220 for (unsigned l
= 0; l
!= NumElts
; l
+= 8) {
3221 for (unsigned i
= 0; i
!= 4; ++i
)
3222 Idxs
[i
+ l
] = ((Imm
>> (2 * i
)) & 0x3) + l
;
3223 for (unsigned i
= 4; i
!= 8; ++i
)
3224 Idxs
[i
+ l
] = i
+ l
;
3227 Rep
= Builder
.CreateShuffleVector(Op0
, Op0
, Idxs
);
3229 if (CI
->arg_size() == 4)
3230 Rep
= emitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
3231 CI
->getArgOperand(2));
3232 } else if (Name
== "sse2.pshufh.w" ||
3233 Name
.starts_with("avx512.mask.pshufh.w.")) {
3234 Value
*Op0
= CI
->getArgOperand(0);
3235 unsigned Imm
= cast
<ConstantInt
>(CI
->getArgOperand(1))->getZExtValue();
3236 unsigned NumElts
= cast
<FixedVectorType
>(CI
->getType())->getNumElements();
3238 SmallVector
<int, 16> Idxs(NumElts
);
3239 for (unsigned l
= 0; l
!= NumElts
; l
+= 8) {
3240 for (unsigned i
= 0; i
!= 4; ++i
)
3241 Idxs
[i
+ l
] = i
+ l
;
3242 for (unsigned i
= 0; i
!= 4; ++i
)
3243 Idxs
[i
+ l
+ 4] = ((Imm
>> (2 * i
)) & 0x3) + 4 + l
;
3246 Rep
= Builder
.CreateShuffleVector(Op0
, Op0
, Idxs
);
3248 if (CI
->arg_size() == 4)
3249 Rep
= emitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
3250 CI
->getArgOperand(2));
3251 } else if (Name
.starts_with("avx512.mask.shuf.p")) {
3252 Value
*Op0
= CI
->getArgOperand(0);
3253 Value
*Op1
= CI
->getArgOperand(1);
3254 unsigned Imm
= cast
<ConstantInt
>(CI
->getArgOperand(2))->getZExtValue();
3255 unsigned NumElts
= cast
<FixedVectorType
>(CI
->getType())->getNumElements();
3257 unsigned NumLaneElts
= 128 / CI
->getType()->getScalarSizeInBits();
3258 unsigned HalfLaneElts
= NumLaneElts
/ 2;
3260 SmallVector
<int, 16> Idxs(NumElts
);
3261 for (unsigned i
= 0; i
!= NumElts
; ++i
) {
3262 // Base index is the starting element of the lane.
3263 Idxs
[i
] = i
- (i
% NumLaneElts
);
3264 // If we are half way through the lane switch to the other source.
3265 if ((i
% NumLaneElts
) >= HalfLaneElts
)
3267 // Now select the specific element. By adding HalfLaneElts bits from
3268 // the immediate. Wrapping around the immediate every 8-bits.
3269 Idxs
[i
] += (Imm
>> ((i
* HalfLaneElts
) % 8)) & ((1 << HalfLaneElts
) - 1);
3272 Rep
= Builder
.CreateShuffleVector(Op0
, Op1
, Idxs
);
3275 emitX86Select(Builder
, CI
->getArgOperand(4), Rep
, CI
->getArgOperand(3));
3276 } else if (Name
.starts_with("avx512.mask.movddup") ||
3277 Name
.starts_with("avx512.mask.movshdup") ||
3278 Name
.starts_with("avx512.mask.movsldup")) {
3279 Value
*Op0
= CI
->getArgOperand(0);
3280 unsigned NumElts
= cast
<FixedVectorType
>(CI
->getType())->getNumElements();
3281 unsigned NumLaneElts
= 128 / CI
->getType()->getScalarSizeInBits();
3283 unsigned Offset
= 0;
3284 if (Name
.starts_with("avx512.mask.movshdup."))
3287 SmallVector
<int, 16> Idxs(NumElts
);
3288 for (unsigned l
= 0; l
!= NumElts
; l
+= NumLaneElts
)
3289 for (unsigned i
= 0; i
!= NumLaneElts
; i
+= 2) {
3290 Idxs
[i
+ l
+ 0] = i
+ l
+ Offset
;
3291 Idxs
[i
+ l
+ 1] = i
+ l
+ Offset
;
3294 Rep
= Builder
.CreateShuffleVector(Op0
, Op0
, Idxs
);
3297 emitX86Select(Builder
, CI
->getArgOperand(2), Rep
, CI
->getArgOperand(1));
3298 } else if (Name
.starts_with("avx512.mask.punpckl") ||
3299 Name
.starts_with("avx512.mask.unpckl.")) {
3300 Value
*Op0
= CI
->getArgOperand(0);
3301 Value
*Op1
= CI
->getArgOperand(1);
3302 int NumElts
= cast
<FixedVectorType
>(CI
->getType())->getNumElements();
3303 int NumLaneElts
= 128 / CI
->getType()->getScalarSizeInBits();
3305 SmallVector
<int, 64> Idxs(NumElts
);
3306 for (int l
= 0; l
!= NumElts
; l
+= NumLaneElts
)
3307 for (int i
= 0; i
!= NumLaneElts
; ++i
)
3308 Idxs
[i
+ l
] = l
+ (i
/ 2) + NumElts
* (i
% 2);
3310 Rep
= Builder
.CreateShuffleVector(Op0
, Op1
, Idxs
);
3313 emitX86Select(Builder
, CI
->getArgOperand(3), Rep
, CI
->getArgOperand(2));
3314 } else if (Name
.starts_with("avx512.mask.punpckh") ||
3315 Name
.starts_with("avx512.mask.unpckh.")) {
3316 Value
*Op0
= CI
->getArgOperand(0);
3317 Value
*Op1
= CI
->getArgOperand(1);
3318 int NumElts
= cast
<FixedVectorType
>(CI
->getType())->getNumElements();
3319 int NumLaneElts
= 128 / CI
->getType()->getScalarSizeInBits();
3321 SmallVector
<int, 64> Idxs(NumElts
);
3322 for (int l
= 0; l
!= NumElts
; l
+= NumLaneElts
)
3323 for (int i
= 0; i
!= NumLaneElts
; ++i
)
3324 Idxs
[i
+ l
] = (NumLaneElts
/ 2) + l
+ (i
/ 2) + NumElts
* (i
% 2);
3326 Rep
= Builder
.CreateShuffleVector(Op0
, Op1
, Idxs
);
3329 emitX86Select(Builder
, CI
->getArgOperand(3), Rep
, CI
->getArgOperand(2));
3330 } else if (Name
.starts_with("avx512.mask.and.") ||
3331 Name
.starts_with("avx512.mask.pand.")) {
3332 VectorType
*FTy
= cast
<VectorType
>(CI
->getType());
3333 VectorType
*ITy
= VectorType::getInteger(FTy
);
3334 Rep
= Builder
.CreateAnd(Builder
.CreateBitCast(CI
->getArgOperand(0), ITy
),
3335 Builder
.CreateBitCast(CI
->getArgOperand(1), ITy
));
3336 Rep
= Builder
.CreateBitCast(Rep
, FTy
);
3338 emitX86Select(Builder
, CI
->getArgOperand(3), Rep
, CI
->getArgOperand(2));
3339 } else if (Name
.starts_with("avx512.mask.andn.") ||
3340 Name
.starts_with("avx512.mask.pandn.")) {
3341 VectorType
*FTy
= cast
<VectorType
>(CI
->getType());
3342 VectorType
*ITy
= VectorType::getInteger(FTy
);
3343 Rep
= Builder
.CreateNot(Builder
.CreateBitCast(CI
->getArgOperand(0), ITy
));
3344 Rep
= Builder
.CreateAnd(Rep
,
3345 Builder
.CreateBitCast(CI
->getArgOperand(1), ITy
));
3346 Rep
= Builder
.CreateBitCast(Rep
, FTy
);
3348 emitX86Select(Builder
, CI
->getArgOperand(3), Rep
, CI
->getArgOperand(2));
3349 } else if (Name
.starts_with("avx512.mask.or.") ||
3350 Name
.starts_with("avx512.mask.por.")) {
3351 VectorType
*FTy
= cast
<VectorType
>(CI
->getType());
3352 VectorType
*ITy
= VectorType::getInteger(FTy
);
3353 Rep
= Builder
.CreateOr(Builder
.CreateBitCast(CI
->getArgOperand(0), ITy
),
3354 Builder
.CreateBitCast(CI
->getArgOperand(1), ITy
));
3355 Rep
= Builder
.CreateBitCast(Rep
, FTy
);
3357 emitX86Select(Builder
, CI
->getArgOperand(3), Rep
, CI
->getArgOperand(2));
3358 } else if (Name
.starts_with("avx512.mask.xor.") ||
3359 Name
.starts_with("avx512.mask.pxor.")) {
3360 VectorType
*FTy
= cast
<VectorType
>(CI
->getType());
3361 VectorType
*ITy
= VectorType::getInteger(FTy
);
3362 Rep
= Builder
.CreateXor(Builder
.CreateBitCast(CI
->getArgOperand(0), ITy
),
3363 Builder
.CreateBitCast(CI
->getArgOperand(1), ITy
));
3364 Rep
= Builder
.CreateBitCast(Rep
, FTy
);
3366 emitX86Select(Builder
, CI
->getArgOperand(3), Rep
, CI
->getArgOperand(2));
3367 } else if (Name
.starts_with("avx512.mask.padd.")) {
3368 Rep
= Builder
.CreateAdd(CI
->getArgOperand(0), CI
->getArgOperand(1));
3370 emitX86Select(Builder
, CI
->getArgOperand(3), Rep
, CI
->getArgOperand(2));
3371 } else if (Name
.starts_with("avx512.mask.psub.")) {
3372 Rep
= Builder
.CreateSub(CI
->getArgOperand(0), CI
->getArgOperand(1));
3374 emitX86Select(Builder
, CI
->getArgOperand(3), Rep
, CI
->getArgOperand(2));
3375 } else if (Name
.starts_with("avx512.mask.pmull.")) {
3376 Rep
= Builder
.CreateMul(CI
->getArgOperand(0), CI
->getArgOperand(1));
3378 emitX86Select(Builder
, CI
->getArgOperand(3), Rep
, CI
->getArgOperand(2));
3379 } else if (Name
.starts_with("avx512.mask.add.p")) {
3380 if (Name
.ends_with(".512")) {
3382 if (Name
[17] == 's')
3383 IID
= Intrinsic::x86_avx512_add_ps_512
;
3385 IID
= Intrinsic::x86_avx512_add_pd_512
;
3387 Rep
= Builder
.CreateIntrinsic(
3389 {CI
->getArgOperand(0), CI
->getArgOperand(1), CI
->getArgOperand(4)});
3391 Rep
= Builder
.CreateFAdd(CI
->getArgOperand(0), CI
->getArgOperand(1));
3394 emitX86Select(Builder
, CI
->getArgOperand(3), Rep
, CI
->getArgOperand(2));
3395 } else if (Name
.starts_with("avx512.mask.div.p")) {
3396 if (Name
.ends_with(".512")) {
3398 if (Name
[17] == 's')
3399 IID
= Intrinsic::x86_avx512_div_ps_512
;
3401 IID
= Intrinsic::x86_avx512_div_pd_512
;
3403 Rep
= Builder
.CreateIntrinsic(
3405 {CI
->getArgOperand(0), CI
->getArgOperand(1), CI
->getArgOperand(4)});
3407 Rep
= Builder
.CreateFDiv(CI
->getArgOperand(0), CI
->getArgOperand(1));
3410 emitX86Select(Builder
, CI
->getArgOperand(3), Rep
, CI
->getArgOperand(2));
3411 } else if (Name
.starts_with("avx512.mask.mul.p")) {
3412 if (Name
.ends_with(".512")) {
3414 if (Name
[17] == 's')
3415 IID
= Intrinsic::x86_avx512_mul_ps_512
;
3417 IID
= Intrinsic::x86_avx512_mul_pd_512
;
3419 Rep
= Builder
.CreateIntrinsic(
3421 {CI
->getArgOperand(0), CI
->getArgOperand(1), CI
->getArgOperand(4)});
3423 Rep
= Builder
.CreateFMul(CI
->getArgOperand(0), CI
->getArgOperand(1));
3426 emitX86Select(Builder
, CI
->getArgOperand(3), Rep
, CI
->getArgOperand(2));
3427 } else if (Name
.starts_with("avx512.mask.sub.p")) {
3428 if (Name
.ends_with(".512")) {
3430 if (Name
[17] == 's')
3431 IID
= Intrinsic::x86_avx512_sub_ps_512
;
3433 IID
= Intrinsic::x86_avx512_sub_pd_512
;
3435 Rep
= Builder
.CreateIntrinsic(
3437 {CI
->getArgOperand(0), CI
->getArgOperand(1), CI
->getArgOperand(4)});
3439 Rep
= Builder
.CreateFSub(CI
->getArgOperand(0), CI
->getArgOperand(1));
3442 emitX86Select(Builder
, CI
->getArgOperand(3), Rep
, CI
->getArgOperand(2));
3443 } else if ((Name
.starts_with("avx512.mask.max.p") ||
3444 Name
.starts_with("avx512.mask.min.p")) &&
3445 Name
.drop_front(18) == ".512") {
3446 bool IsDouble
= Name
[17] == 'd';
3447 bool IsMin
= Name
[13] == 'i';
3448 static const Intrinsic::ID MinMaxTbl
[2][2] = {
3449 {Intrinsic::x86_avx512_max_ps_512
, Intrinsic::x86_avx512_max_pd_512
},
3450 {Intrinsic::x86_avx512_min_ps_512
, Intrinsic::x86_avx512_min_pd_512
}};
3451 Intrinsic::ID IID
= MinMaxTbl
[IsMin
][IsDouble
];
3453 Rep
= Builder
.CreateIntrinsic(
3455 {CI
->getArgOperand(0), CI
->getArgOperand(1), CI
->getArgOperand(4)});
3457 emitX86Select(Builder
, CI
->getArgOperand(3), Rep
, CI
->getArgOperand(2));
3458 } else if (Name
.starts_with("avx512.mask.lzcnt.")) {
3460 Builder
.CreateIntrinsic(Intrinsic::ctlz
, CI
->getType(),
3461 {CI
->getArgOperand(0), Builder
.getInt1(false)});
3463 emitX86Select(Builder
, CI
->getArgOperand(2), Rep
, CI
->getArgOperand(1));
3464 } else if (Name
.starts_with("avx512.mask.psll")) {
3465 bool IsImmediate
= Name
[16] == 'i' || (Name
.size() > 18 && Name
[18] == 'i');
3466 bool IsVariable
= Name
[16] == 'v';
3467 char Size
= Name
[16] == '.' ? Name
[17]
3468 : Name
[17] == '.' ? Name
[18]
3469 : Name
[18] == '.' ? Name
[19]
3473 if (IsVariable
&& Name
[17] != '.') {
3474 if (Size
== 'd' && Name
[17] == '2') // avx512.mask.psllv2.di
3475 IID
= Intrinsic::x86_avx2_psllv_q
;
3476 else if (Size
== 'd' && Name
[17] == '4') // avx512.mask.psllv4.di
3477 IID
= Intrinsic::x86_avx2_psllv_q_256
;
3478 else if (Size
== 's' && Name
[17] == '4') // avx512.mask.psllv4.si
3479 IID
= Intrinsic::x86_avx2_psllv_d
;
3480 else if (Size
== 's' && Name
[17] == '8') // avx512.mask.psllv8.si
3481 IID
= Intrinsic::x86_avx2_psllv_d_256
;
3482 else if (Size
== 'h' && Name
[17] == '8') // avx512.mask.psllv8.hi
3483 IID
= Intrinsic::x86_avx512_psllv_w_128
;
3484 else if (Size
== 'h' && Name
[17] == '1') // avx512.mask.psllv16.hi
3485 IID
= Intrinsic::x86_avx512_psllv_w_256
;
3486 else if (Name
[17] == '3' && Name
[18] == '2') // avx512.mask.psllv32hi
3487 IID
= Intrinsic::x86_avx512_psllv_w_512
;
3489 llvm_unreachable("Unexpected size");
3490 } else if (Name
.ends_with(".128")) {
3491 if (Size
== 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128
3492 IID
= IsImmediate
? Intrinsic::x86_sse2_pslli_d
3493 : Intrinsic::x86_sse2_psll_d
;
3494 else if (Size
== 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.128
3495 IID
= IsImmediate
? Intrinsic::x86_sse2_pslli_q
3496 : Intrinsic::x86_sse2_psll_q
;
3497 else if (Size
== 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.128
3498 IID
= IsImmediate
? Intrinsic::x86_sse2_pslli_w
3499 : Intrinsic::x86_sse2_psll_w
;
3501 llvm_unreachable("Unexpected size");
3502 } else if (Name
.ends_with(".256")) {
3503 if (Size
== 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256
3504 IID
= IsImmediate
? Intrinsic::x86_avx2_pslli_d
3505 : Intrinsic::x86_avx2_psll_d
;
3506 else if (Size
== 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.256
3507 IID
= IsImmediate
? Intrinsic::x86_avx2_pslli_q
3508 : Intrinsic::x86_avx2_psll_q
;
3509 else if (Size
== 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.256
3510 IID
= IsImmediate
? Intrinsic::x86_avx2_pslli_w
3511 : Intrinsic::x86_avx2_psll_w
;
3513 llvm_unreachable("Unexpected size");
3515 if (Size
== 'd') // psll.di.512, pslli.d, psll.d, psllv.d.512
3516 IID
= IsImmediate
? Intrinsic::x86_avx512_pslli_d_512
3517 : IsVariable
? Intrinsic::x86_avx512_psllv_d_512
3518 : Intrinsic::x86_avx512_psll_d_512
;
3519 else if (Size
== 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.512
3520 IID
= IsImmediate
? Intrinsic::x86_avx512_pslli_q_512
3521 : IsVariable
? Intrinsic::x86_avx512_psllv_q_512
3522 : Intrinsic::x86_avx512_psll_q_512
;
3523 else if (Size
== 'w') // psll.wi.512, pslli.w, psll.w
3524 IID
= IsImmediate
? Intrinsic::x86_avx512_pslli_w_512
3525 : Intrinsic::x86_avx512_psll_w_512
;
3527 llvm_unreachable("Unexpected size");
3530 Rep
= upgradeX86MaskedShift(Builder
, *CI
, IID
);
3531 } else if (Name
.starts_with("avx512.mask.psrl")) {
3532 bool IsImmediate
= Name
[16] == 'i' || (Name
.size() > 18 && Name
[18] == 'i');
3533 bool IsVariable
= Name
[16] == 'v';
3534 char Size
= Name
[16] == '.' ? Name
[17]
3535 : Name
[17] == '.' ? Name
[18]
3536 : Name
[18] == '.' ? Name
[19]
3540 if (IsVariable
&& Name
[17] != '.') {
3541 if (Size
== 'd' && Name
[17] == '2') // avx512.mask.psrlv2.di
3542 IID
= Intrinsic::x86_avx2_psrlv_q
;
3543 else if (Size
== 'd' && Name
[17] == '4') // avx512.mask.psrlv4.di
3544 IID
= Intrinsic::x86_avx2_psrlv_q_256
;
3545 else if (Size
== 's' && Name
[17] == '4') // avx512.mask.psrlv4.si
3546 IID
= Intrinsic::x86_avx2_psrlv_d
;
3547 else if (Size
== 's' && Name
[17] == '8') // avx512.mask.psrlv8.si
3548 IID
= Intrinsic::x86_avx2_psrlv_d_256
;
3549 else if (Size
== 'h' && Name
[17] == '8') // avx512.mask.psrlv8.hi
3550 IID
= Intrinsic::x86_avx512_psrlv_w_128
;
3551 else if (Size
== 'h' && Name
[17] == '1') // avx512.mask.psrlv16.hi
3552 IID
= Intrinsic::x86_avx512_psrlv_w_256
;
3553 else if (Name
[17] == '3' && Name
[18] == '2') // avx512.mask.psrlv32hi
3554 IID
= Intrinsic::x86_avx512_psrlv_w_512
;
3556 llvm_unreachable("Unexpected size");
3557 } else if (Name
.ends_with(".128")) {
3558 if (Size
== 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128
3559 IID
= IsImmediate
? Intrinsic::x86_sse2_psrli_d
3560 : Intrinsic::x86_sse2_psrl_d
;
3561 else if (Size
== 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.128
3562 IID
= IsImmediate
? Intrinsic::x86_sse2_psrli_q
3563 : Intrinsic::x86_sse2_psrl_q
;
3564 else if (Size
== 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.128
3565 IID
= IsImmediate
? Intrinsic::x86_sse2_psrli_w
3566 : Intrinsic::x86_sse2_psrl_w
;
3568 llvm_unreachable("Unexpected size");
3569 } else if (Name
.ends_with(".256")) {
3570 if (Size
== 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256
3571 IID
= IsImmediate
? Intrinsic::x86_avx2_psrli_d
3572 : Intrinsic::x86_avx2_psrl_d
;
3573 else if (Size
== 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.256
3574 IID
= IsImmediate
? Intrinsic::x86_avx2_psrli_q
3575 : Intrinsic::x86_avx2_psrl_q
;
3576 else if (Size
== 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.256
3577 IID
= IsImmediate
? Intrinsic::x86_avx2_psrli_w
3578 : Intrinsic::x86_avx2_psrl_w
;
3580 llvm_unreachable("Unexpected size");
3582 if (Size
== 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.512
3583 IID
= IsImmediate
? Intrinsic::x86_avx512_psrli_d_512
3584 : IsVariable
? Intrinsic::x86_avx512_psrlv_d_512
3585 : Intrinsic::x86_avx512_psrl_d_512
;
3586 else if (Size
== 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.512
3587 IID
= IsImmediate
? Intrinsic::x86_avx512_psrli_q_512
3588 : IsVariable
? Intrinsic::x86_avx512_psrlv_q_512
3589 : Intrinsic::x86_avx512_psrl_q_512
;
3590 else if (Size
== 'w') // psrl.wi.512, psrli.w, psrl.w)
3591 IID
= IsImmediate
? Intrinsic::x86_avx512_psrli_w_512
3592 : Intrinsic::x86_avx512_psrl_w_512
;
3594 llvm_unreachable("Unexpected size");
3597 Rep
= upgradeX86MaskedShift(Builder
, *CI
, IID
);
3598 } else if (Name
.starts_with("avx512.mask.psra")) {
3599 bool IsImmediate
= Name
[16] == 'i' || (Name
.size() > 18 && Name
[18] == 'i');
3600 bool IsVariable
= Name
[16] == 'v';
3601 char Size
= Name
[16] == '.' ? Name
[17]
3602 : Name
[17] == '.' ? Name
[18]
3603 : Name
[18] == '.' ? Name
[19]
3607 if (IsVariable
&& Name
[17] != '.') {
3608 if (Size
== 's' && Name
[17] == '4') // avx512.mask.psrav4.si
3609 IID
= Intrinsic::x86_avx2_psrav_d
;
3610 else if (Size
== 's' && Name
[17] == '8') // avx512.mask.psrav8.si
3611 IID
= Intrinsic::x86_avx2_psrav_d_256
;
3612 else if (Size
== 'h' && Name
[17] == '8') // avx512.mask.psrav8.hi
3613 IID
= Intrinsic::x86_avx512_psrav_w_128
;
3614 else if (Size
== 'h' && Name
[17] == '1') // avx512.mask.psrav16.hi
3615 IID
= Intrinsic::x86_avx512_psrav_w_256
;
3616 else if (Name
[17] == '3' && Name
[18] == '2') // avx512.mask.psrav32hi
3617 IID
= Intrinsic::x86_avx512_psrav_w_512
;
3619 llvm_unreachable("Unexpected size");
3620 } else if (Name
.ends_with(".128")) {
3621 if (Size
== 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128
3622 IID
= IsImmediate
? Intrinsic::x86_sse2_psrai_d
3623 : Intrinsic::x86_sse2_psra_d
;
3624 else if (Size
== 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.128
3625 IID
= IsImmediate
? Intrinsic::x86_avx512_psrai_q_128
3626 : IsVariable
? Intrinsic::x86_avx512_psrav_q_128
3627 : Intrinsic::x86_avx512_psra_q_128
;
3628 else if (Size
== 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.128
3629 IID
= IsImmediate
? Intrinsic::x86_sse2_psrai_w
3630 : Intrinsic::x86_sse2_psra_w
;
3632 llvm_unreachable("Unexpected size");
3633 } else if (Name
.ends_with(".256")) {
3634 if (Size
== 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256
3635 IID
= IsImmediate
? Intrinsic::x86_avx2_psrai_d
3636 : Intrinsic::x86_avx2_psra_d
;
3637 else if (Size
== 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.256
3638 IID
= IsImmediate
? Intrinsic::x86_avx512_psrai_q_256
3639 : IsVariable
? Intrinsic::x86_avx512_psrav_q_256
3640 : Intrinsic::x86_avx512_psra_q_256
;
3641 else if (Size
== 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.256
3642 IID
= IsImmediate
? Intrinsic::x86_avx2_psrai_w
3643 : Intrinsic::x86_avx2_psra_w
;
3645 llvm_unreachable("Unexpected size");
3647 if (Size
== 'd') // psra.di.512, psrai.d, psra.d, psrav.d.512
3648 IID
= IsImmediate
? Intrinsic::x86_avx512_psrai_d_512
3649 : IsVariable
? Intrinsic::x86_avx512_psrav_d_512
3650 : Intrinsic::x86_avx512_psra_d_512
;
3651 else if (Size
== 'q') // psra.qi.512, psrai.q, psra.q
3652 IID
= IsImmediate
? Intrinsic::x86_avx512_psrai_q_512
3653 : IsVariable
? Intrinsic::x86_avx512_psrav_q_512
3654 : Intrinsic::x86_avx512_psra_q_512
;
3655 else if (Size
== 'w') // psra.wi.512, psrai.w, psra.w
3656 IID
= IsImmediate
? Intrinsic::x86_avx512_psrai_w_512
3657 : Intrinsic::x86_avx512_psra_w_512
;
3659 llvm_unreachable("Unexpected size");
3662 Rep
= upgradeX86MaskedShift(Builder
, *CI
, IID
);
3663 } else if (Name
.starts_with("avx512.mask.move.s")) {
3664 Rep
= upgradeMaskedMove(Builder
, *CI
);
3665 } else if (Name
.starts_with("avx512.cvtmask2")) {
3666 Rep
= upgradeMaskToInt(Builder
, *CI
);
3667 } else if (Name
.ends_with(".movntdqa")) {
3668 MDNode
*Node
= MDNode::get(
3669 C
, ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C
), 1)));
3671 LoadInst
*LI
= Builder
.CreateAlignedLoad(
3672 CI
->getType(), CI
->getArgOperand(0),
3673 Align(CI
->getType()->getPrimitiveSizeInBits().getFixedValue() / 8));
3674 LI
->setMetadata(LLVMContext::MD_nontemporal
, Node
);
3676 } else if (Name
.starts_with("fma.vfmadd.") ||
3677 Name
.starts_with("fma.vfmsub.") ||
3678 Name
.starts_with("fma.vfnmadd.") ||
3679 Name
.starts_with("fma.vfnmsub.")) {
3680 bool NegMul
= Name
[6] == 'n';
3681 bool NegAcc
= NegMul
? Name
[8] == 's' : Name
[7] == 's';
3682 bool IsScalar
= NegMul
? Name
[12] == 's' : Name
[11] == 's';
3684 Value
*Ops
[] = {CI
->getArgOperand(0), CI
->getArgOperand(1),
3685 CI
->getArgOperand(2)};
3688 Ops
[0] = Builder
.CreateExtractElement(Ops
[0], (uint64_t)0);
3689 Ops
[1] = Builder
.CreateExtractElement(Ops
[1], (uint64_t)0);
3690 Ops
[2] = Builder
.CreateExtractElement(Ops
[2], (uint64_t)0);
3693 if (NegMul
&& !IsScalar
)
3694 Ops
[0] = Builder
.CreateFNeg(Ops
[0]);
3695 if (NegMul
&& IsScalar
)
3696 Ops
[1] = Builder
.CreateFNeg(Ops
[1]);
3698 Ops
[2] = Builder
.CreateFNeg(Ops
[2]);
3700 Rep
= Builder
.CreateIntrinsic(Intrinsic::fma
, Ops
[0]->getType(), Ops
);
3703 Rep
= Builder
.CreateInsertElement(CI
->getArgOperand(0), Rep
, (uint64_t)0);
3704 } else if (Name
.starts_with("fma4.vfmadd.s")) {
3705 Value
*Ops
[] = {CI
->getArgOperand(0), CI
->getArgOperand(1),
3706 CI
->getArgOperand(2)};
3708 Ops
[0] = Builder
.CreateExtractElement(Ops
[0], (uint64_t)0);
3709 Ops
[1] = Builder
.CreateExtractElement(Ops
[1], (uint64_t)0);
3710 Ops
[2] = Builder
.CreateExtractElement(Ops
[2], (uint64_t)0);
3712 Rep
= Builder
.CreateIntrinsic(Intrinsic::fma
, Ops
[0]->getType(), Ops
);
3714 Rep
= Builder
.CreateInsertElement(Constant::getNullValue(CI
->getType()),
3716 } else if (Name
.starts_with("avx512.mask.vfmadd.s") ||
3717 Name
.starts_with("avx512.maskz.vfmadd.s") ||
3718 Name
.starts_with("avx512.mask3.vfmadd.s") ||
3719 Name
.starts_with("avx512.mask3.vfmsub.s") ||
3720 Name
.starts_with("avx512.mask3.vfnmsub.s")) {
3721 bool IsMask3
= Name
[11] == '3';
3722 bool IsMaskZ
= Name
[11] == 'z';
3723 // Drop the "avx512.mask." to make it easier.
3724 Name
= Name
.drop_front(IsMask3
|| IsMaskZ
? 13 : 12);
3725 bool NegMul
= Name
[2] == 'n';
3726 bool NegAcc
= NegMul
? Name
[4] == 's' : Name
[3] == 's';
3728 Value
*A
= CI
->getArgOperand(0);
3729 Value
*B
= CI
->getArgOperand(1);
3730 Value
*C
= CI
->getArgOperand(2);
3732 if (NegMul
&& (IsMask3
|| IsMaskZ
))
3733 A
= Builder
.CreateFNeg(A
);
3734 if (NegMul
&& !(IsMask3
|| IsMaskZ
))
3735 B
= Builder
.CreateFNeg(B
);
3737 C
= Builder
.CreateFNeg(C
);
3739 A
= Builder
.CreateExtractElement(A
, (uint64_t)0);
3740 B
= Builder
.CreateExtractElement(B
, (uint64_t)0);
3741 C
= Builder
.CreateExtractElement(C
, (uint64_t)0);
3743 if (!isa
<ConstantInt
>(CI
->getArgOperand(4)) ||
3744 cast
<ConstantInt
>(CI
->getArgOperand(4))->getZExtValue() != 4) {
3745 Value
*Ops
[] = {A
, B
, C
, CI
->getArgOperand(4)};
3748 if (Name
.back() == 'd')
3749 IID
= Intrinsic::x86_avx512_vfmadd_f64
;
3751 IID
= Intrinsic::x86_avx512_vfmadd_f32
;
3752 Rep
= Builder
.CreateIntrinsic(IID
, {}, Ops
);
3754 Rep
= Builder
.CreateIntrinsic(Intrinsic::fma
, A
->getType(), {A
, B
, C
});
3757 Value
*PassThru
= IsMaskZ
? Constant::getNullValue(Rep
->getType())
3761 // For Mask3 with NegAcc, we need to create a new extractelement that
3762 // avoids the negation above.
3763 if (NegAcc
&& IsMask3
)
3765 Builder
.CreateExtractElement(CI
->getArgOperand(2), (uint64_t)0);
3767 Rep
= emitX86ScalarSelect(Builder
, CI
->getArgOperand(3), Rep
, PassThru
);
3768 Rep
= Builder
.CreateInsertElement(CI
->getArgOperand(IsMask3
? 2 : 0), Rep
,
3770 } else if (Name
.starts_with("avx512.mask.vfmadd.p") ||
3771 Name
.starts_with("avx512.mask.vfnmadd.p") ||
3772 Name
.starts_with("avx512.mask.vfnmsub.p") ||
3773 Name
.starts_with("avx512.mask3.vfmadd.p") ||
3774 Name
.starts_with("avx512.mask3.vfmsub.p") ||
3775 Name
.starts_with("avx512.mask3.vfnmsub.p") ||
3776 Name
.starts_with("avx512.maskz.vfmadd.p")) {
3777 bool IsMask3
= Name
[11] == '3';
3778 bool IsMaskZ
= Name
[11] == 'z';
3779 // Drop the "avx512.mask." to make it easier.
3780 Name
= Name
.drop_front(IsMask3
|| IsMaskZ
? 13 : 12);
3781 bool NegMul
= Name
[2] == 'n';
3782 bool NegAcc
= NegMul
? Name
[4] == 's' : Name
[3] == 's';
3784 Value
*A
= CI
->getArgOperand(0);
3785 Value
*B
= CI
->getArgOperand(1);
3786 Value
*C
= CI
->getArgOperand(2);
3788 if (NegMul
&& (IsMask3
|| IsMaskZ
))
3789 A
= Builder
.CreateFNeg(A
);
3790 if (NegMul
&& !(IsMask3
|| IsMaskZ
))
3791 B
= Builder
.CreateFNeg(B
);
3793 C
= Builder
.CreateFNeg(C
);
3795 if (CI
->arg_size() == 5 &&
3796 (!isa
<ConstantInt
>(CI
->getArgOperand(4)) ||
3797 cast
<ConstantInt
>(CI
->getArgOperand(4))->getZExtValue() != 4)) {
3799 // Check the character before ".512" in string.
3800 if (Name
[Name
.size() - 5] == 's')
3801 IID
= Intrinsic::x86_avx512_vfmadd_ps_512
;
3803 IID
= Intrinsic::x86_avx512_vfmadd_pd_512
;
3805 Rep
= Builder
.CreateIntrinsic(IID
, {}, {A
, B
, C
, CI
->getArgOperand(4)});
3807 Rep
= Builder
.CreateIntrinsic(Intrinsic::fma
, A
->getType(), {A
, B
, C
});
3810 Value
*PassThru
= IsMaskZ
? llvm::Constant::getNullValue(CI
->getType())
3811 : IsMask3
? CI
->getArgOperand(2)
3812 : CI
->getArgOperand(0);
3814 Rep
= emitX86Select(Builder
, CI
->getArgOperand(3), Rep
, PassThru
);
3815 } else if (Name
.starts_with("fma.vfmsubadd.p")) {
3816 unsigned VecWidth
= CI
->getType()->getPrimitiveSizeInBits();
3817 unsigned EltWidth
= CI
->getType()->getScalarSizeInBits();
3819 if (VecWidth
== 128 && EltWidth
== 32)
3820 IID
= Intrinsic::x86_fma_vfmaddsub_ps
;
3821 else if (VecWidth
== 256 && EltWidth
== 32)
3822 IID
= Intrinsic::x86_fma_vfmaddsub_ps_256
;
3823 else if (VecWidth
== 128 && EltWidth
== 64)
3824 IID
= Intrinsic::x86_fma_vfmaddsub_pd
;
3825 else if (VecWidth
== 256 && EltWidth
== 64)
3826 IID
= Intrinsic::x86_fma_vfmaddsub_pd_256
;
3828 llvm_unreachable("Unexpected intrinsic");
3830 Value
*Ops
[] = {CI
->getArgOperand(0), CI
->getArgOperand(1),
3831 CI
->getArgOperand(2)};
3832 Ops
[2] = Builder
.CreateFNeg(Ops
[2]);
3833 Rep
= Builder
.CreateIntrinsic(IID
, {}, Ops
);
3834 } else if (Name
.starts_with("avx512.mask.vfmaddsub.p") ||
3835 Name
.starts_with("avx512.mask3.vfmaddsub.p") ||
3836 Name
.starts_with("avx512.maskz.vfmaddsub.p") ||
3837 Name
.starts_with("avx512.mask3.vfmsubadd.p")) {
3838 bool IsMask3
= Name
[11] == '3';
3839 bool IsMaskZ
= Name
[11] == 'z';
3840 // Drop the "avx512.mask." to make it easier.
3841 Name
= Name
.drop_front(IsMask3
|| IsMaskZ
? 13 : 12);
3842 bool IsSubAdd
= Name
[3] == 's';
3843 if (CI
->arg_size() == 5) {
3845 // Check the character before ".512" in string.
3846 if (Name
[Name
.size() - 5] == 's')
3847 IID
= Intrinsic::x86_avx512_vfmaddsub_ps_512
;
3849 IID
= Intrinsic::x86_avx512_vfmaddsub_pd_512
;
3851 Value
*Ops
[] = {CI
->getArgOperand(0), CI
->getArgOperand(1),
3852 CI
->getArgOperand(2), CI
->getArgOperand(4)};
3854 Ops
[2] = Builder
.CreateFNeg(Ops
[2]);
3856 Rep
= Builder
.CreateIntrinsic(IID
, {}, Ops
);
3858 int NumElts
= cast
<FixedVectorType
>(CI
->getType())->getNumElements();
3860 Value
*Ops
[] = {CI
->getArgOperand(0), CI
->getArgOperand(1),
3861 CI
->getArgOperand(2)};
3863 Function
*FMA
= Intrinsic::getOrInsertDeclaration(
3864 CI
->getModule(), Intrinsic::fma
, Ops
[0]->getType());
3865 Value
*Odd
= Builder
.CreateCall(FMA
, Ops
);
3866 Ops
[2] = Builder
.CreateFNeg(Ops
[2]);
3867 Value
*Even
= Builder
.CreateCall(FMA
, Ops
);
3870 std::swap(Even
, Odd
);
3872 SmallVector
<int, 32> Idxs(NumElts
);
3873 for (int i
= 0; i
!= NumElts
; ++i
)
3874 Idxs
[i
] = i
+ (i
% 2) * NumElts
;
3876 Rep
= Builder
.CreateShuffleVector(Even
, Odd
, Idxs
);
3879 Value
*PassThru
= IsMaskZ
? llvm::Constant::getNullValue(CI
->getType())
3880 : IsMask3
? CI
->getArgOperand(2)
3881 : CI
->getArgOperand(0);
3883 Rep
= emitX86Select(Builder
, CI
->getArgOperand(3), Rep
, PassThru
);
3884 } else if (Name
.starts_with("avx512.mask.pternlog.") ||
3885 Name
.starts_with("avx512.maskz.pternlog.")) {
3886 bool ZeroMask
= Name
[11] == 'z';
3887 unsigned VecWidth
= CI
->getType()->getPrimitiveSizeInBits();
3888 unsigned EltWidth
= CI
->getType()->getScalarSizeInBits();
3890 if (VecWidth
== 128 && EltWidth
== 32)
3891 IID
= Intrinsic::x86_avx512_pternlog_d_128
;
3892 else if (VecWidth
== 256 && EltWidth
== 32)
3893 IID
= Intrinsic::x86_avx512_pternlog_d_256
;
3894 else if (VecWidth
== 512 && EltWidth
== 32)
3895 IID
= Intrinsic::x86_avx512_pternlog_d_512
;
3896 else if (VecWidth
== 128 && EltWidth
== 64)
3897 IID
= Intrinsic::x86_avx512_pternlog_q_128
;
3898 else if (VecWidth
== 256 && EltWidth
== 64)
3899 IID
= Intrinsic::x86_avx512_pternlog_q_256
;
3900 else if (VecWidth
== 512 && EltWidth
== 64)
3901 IID
= Intrinsic::x86_avx512_pternlog_q_512
;
3903 llvm_unreachable("Unexpected intrinsic");
3905 Value
*Args
[] = {CI
->getArgOperand(0), CI
->getArgOperand(1),
3906 CI
->getArgOperand(2), CI
->getArgOperand(3)};
3907 Rep
= Builder
.CreateIntrinsic(IID
, {}, Args
);
3908 Value
*PassThru
= ZeroMask
? ConstantAggregateZero::get(CI
->getType())
3909 : CI
->getArgOperand(0);
3910 Rep
= emitX86Select(Builder
, CI
->getArgOperand(4), Rep
, PassThru
);
3911 } else if (Name
.starts_with("avx512.mask.vpmadd52") ||
3912 Name
.starts_with("avx512.maskz.vpmadd52")) {
3913 bool ZeroMask
= Name
[11] == 'z';
3914 bool High
= Name
[20] == 'h' || Name
[21] == 'h';
3915 unsigned VecWidth
= CI
->getType()->getPrimitiveSizeInBits();
3917 if (VecWidth
== 128 && !High
)
3918 IID
= Intrinsic::x86_avx512_vpmadd52l_uq_128
;
3919 else if (VecWidth
== 256 && !High
)
3920 IID
= Intrinsic::x86_avx512_vpmadd52l_uq_256
;
3921 else if (VecWidth
== 512 && !High
)
3922 IID
= Intrinsic::x86_avx512_vpmadd52l_uq_512
;
3923 else if (VecWidth
== 128 && High
)
3924 IID
= Intrinsic::x86_avx512_vpmadd52h_uq_128
;
3925 else if (VecWidth
== 256 && High
)
3926 IID
= Intrinsic::x86_avx512_vpmadd52h_uq_256
;
3927 else if (VecWidth
== 512 && High
)
3928 IID
= Intrinsic::x86_avx512_vpmadd52h_uq_512
;
3930 llvm_unreachable("Unexpected intrinsic");
3932 Value
*Args
[] = {CI
->getArgOperand(0), CI
->getArgOperand(1),
3933 CI
->getArgOperand(2)};
3934 Rep
= Builder
.CreateIntrinsic(IID
, {}, Args
);
3935 Value
*PassThru
= ZeroMask
? ConstantAggregateZero::get(CI
->getType())
3936 : CI
->getArgOperand(0);
3937 Rep
= emitX86Select(Builder
, CI
->getArgOperand(3), Rep
, PassThru
);
3938 } else if (Name
.starts_with("avx512.mask.vpermi2var.") ||
3939 Name
.starts_with("avx512.mask.vpermt2var.") ||
3940 Name
.starts_with("avx512.maskz.vpermt2var.")) {
3941 bool ZeroMask
= Name
[11] == 'z';
3942 bool IndexForm
= Name
[17] == 'i';
3943 Rep
= upgradeX86VPERMT2Intrinsics(Builder
, *CI
, ZeroMask
, IndexForm
);
3944 } else if (Name
.starts_with("avx512.mask.vpdpbusd.") ||
3945 Name
.starts_with("avx512.maskz.vpdpbusd.") ||
3946 Name
.starts_with("avx512.mask.vpdpbusds.") ||
3947 Name
.starts_with("avx512.maskz.vpdpbusds.")) {
3948 bool ZeroMask
= Name
[11] == 'z';
3949 bool IsSaturating
= Name
[ZeroMask
? 21 : 20] == 's';
3950 unsigned VecWidth
= CI
->getType()->getPrimitiveSizeInBits();
3952 if (VecWidth
== 128 && !IsSaturating
)
3953 IID
= Intrinsic::x86_avx512_vpdpbusd_128
;
3954 else if (VecWidth
== 256 && !IsSaturating
)
3955 IID
= Intrinsic::x86_avx512_vpdpbusd_256
;
3956 else if (VecWidth
== 512 && !IsSaturating
)
3957 IID
= Intrinsic::x86_avx512_vpdpbusd_512
;
3958 else if (VecWidth
== 128 && IsSaturating
)
3959 IID
= Intrinsic::x86_avx512_vpdpbusds_128
;
3960 else if (VecWidth
== 256 && IsSaturating
)
3961 IID
= Intrinsic::x86_avx512_vpdpbusds_256
;
3962 else if (VecWidth
== 512 && IsSaturating
)
3963 IID
= Intrinsic::x86_avx512_vpdpbusds_512
;
3965 llvm_unreachable("Unexpected intrinsic");
3967 Value
*Args
[] = {CI
->getArgOperand(0), CI
->getArgOperand(1),
3968 CI
->getArgOperand(2)};
3969 Rep
= Builder
.CreateIntrinsic(IID
, {}, Args
);
3970 Value
*PassThru
= ZeroMask
? ConstantAggregateZero::get(CI
->getType())
3971 : CI
->getArgOperand(0);
3972 Rep
= emitX86Select(Builder
, CI
->getArgOperand(3), Rep
, PassThru
);
3973 } else if (Name
.starts_with("avx512.mask.vpdpwssd.") ||
3974 Name
.starts_with("avx512.maskz.vpdpwssd.") ||
3975 Name
.starts_with("avx512.mask.vpdpwssds.") ||
3976 Name
.starts_with("avx512.maskz.vpdpwssds.")) {
3977 bool ZeroMask
= Name
[11] == 'z';
3978 bool IsSaturating
= Name
[ZeroMask
? 21 : 20] == 's';
3979 unsigned VecWidth
= CI
->getType()->getPrimitiveSizeInBits();
3981 if (VecWidth
== 128 && !IsSaturating
)
3982 IID
= Intrinsic::x86_avx512_vpdpwssd_128
;
3983 else if (VecWidth
== 256 && !IsSaturating
)
3984 IID
= Intrinsic::x86_avx512_vpdpwssd_256
;
3985 else if (VecWidth
== 512 && !IsSaturating
)
3986 IID
= Intrinsic::x86_avx512_vpdpwssd_512
;
3987 else if (VecWidth
== 128 && IsSaturating
)
3988 IID
= Intrinsic::x86_avx512_vpdpwssds_128
;
3989 else if (VecWidth
== 256 && IsSaturating
)
3990 IID
= Intrinsic::x86_avx512_vpdpwssds_256
;
3991 else if (VecWidth
== 512 && IsSaturating
)
3992 IID
= Intrinsic::x86_avx512_vpdpwssds_512
;
3994 llvm_unreachable("Unexpected intrinsic");
3996 Value
*Args
[] = {CI
->getArgOperand(0), CI
->getArgOperand(1),
3997 CI
->getArgOperand(2)};
3998 Rep
= Builder
.CreateIntrinsic(IID
, {}, Args
);
3999 Value
*PassThru
= ZeroMask
? ConstantAggregateZero::get(CI
->getType())
4000 : CI
->getArgOperand(0);
4001 Rep
= emitX86Select(Builder
, CI
->getArgOperand(3), Rep
, PassThru
);
4002 } else if (Name
== "addcarryx.u32" || Name
== "addcarryx.u64" ||
4003 Name
== "addcarry.u32" || Name
== "addcarry.u64" ||
4004 Name
== "subborrow.u32" || Name
== "subborrow.u64") {
4006 if (Name
[0] == 'a' && Name
.back() == '2')
4007 IID
= Intrinsic::x86_addcarry_32
;
4008 else if (Name
[0] == 'a' && Name
.back() == '4')
4009 IID
= Intrinsic::x86_addcarry_64
;
4010 else if (Name
[0] == 's' && Name
.back() == '2')
4011 IID
= Intrinsic::x86_subborrow_32
;
4012 else if (Name
[0] == 's' && Name
.back() == '4')
4013 IID
= Intrinsic::x86_subborrow_64
;
4015 llvm_unreachable("Unexpected intrinsic");
4017 // Make a call with 3 operands.
4018 Value
*Args
[] = {CI
->getArgOperand(0), CI
->getArgOperand(1),
4019 CI
->getArgOperand(2)};
4020 Value
*NewCall
= Builder
.CreateIntrinsic(IID
, {}, Args
);
4022 // Extract the second result and store it.
4023 Value
*Data
= Builder
.CreateExtractValue(NewCall
, 1);
4024 Builder
.CreateAlignedStore(Data
, CI
->getArgOperand(3), Align(1));
4025 // Replace the original call result with the first result of the new call.
4026 Value
*CF
= Builder
.CreateExtractValue(NewCall
, 0);
4028 CI
->replaceAllUsesWith(CF
);
4030 } else if (Name
.starts_with("avx512.mask.") &&
4031 upgradeAVX512MaskToSelect(Name
, Builder
, *CI
, Rep
)) {
4032 // Rep will be updated by the call in the condition.
4038 static Value
*upgradeAArch64IntrinsicCall(StringRef Name
, CallBase
*CI
,
4039 Function
*F
, IRBuilder
<> &Builder
) {
4040 if (Name
.starts_with("neon.bfcvt")) {
4041 if (Name
.starts_with("neon.bfcvtn2")) {
4042 SmallVector
<int, 32> LoMask(4);
4043 std::iota(LoMask
.begin(), LoMask
.end(), 0);
4044 SmallVector
<int, 32> ConcatMask(8);
4045 std::iota(ConcatMask
.begin(), ConcatMask
.end(), 0);
4046 Value
*Inactive
= Builder
.CreateShuffleVector(CI
->getOperand(0), LoMask
);
4048 Builder
.CreateFPTrunc(CI
->getOperand(1), Inactive
->getType());
4049 return Builder
.CreateShuffleVector(Inactive
, Trunc
, ConcatMask
);
4050 } else if (Name
.starts_with("neon.bfcvtn")) {
4051 SmallVector
<int, 32> ConcatMask(8);
4052 std::iota(ConcatMask
.begin(), ConcatMask
.end(), 0);
4054 FixedVectorType::get(Type::getBFloatTy(F
->getContext()), 4);
4055 Value
*Trunc
= Builder
.CreateFPTrunc(CI
->getOperand(0), V4BF16
);
4056 dbgs() << "Trunc: " << *Trunc
<< "\n";
4057 return Builder
.CreateShuffleVector(
4058 Trunc
, ConstantAggregateZero::get(V4BF16
), ConcatMask
);
4060 return Builder
.CreateFPTrunc(CI
->getOperand(0),
4061 Type::getBFloatTy(F
->getContext()));
4063 } else if (Name
.starts_with("sve.fcvt")) {
4064 Intrinsic::ID NewID
=
4065 StringSwitch
<Intrinsic::ID
>(Name
)
4066 .Case("sve.fcvt.bf16f32", Intrinsic::aarch64_sve_fcvt_bf16f32_v2
)
4067 .Case("sve.fcvtnt.bf16f32",
4068 Intrinsic::aarch64_sve_fcvtnt_bf16f32_v2
)
4069 .Default(Intrinsic::not_intrinsic
);
4070 if (NewID
== Intrinsic::not_intrinsic
)
4071 llvm_unreachable("Unhandled Intrinsic!");
4073 SmallVector
<Value
*, 3> Args(CI
->args());
4075 // The original intrinsics incorrectly used a predicate based on the
4076 // smallest element type rather than the largest.
4077 Type
*BadPredTy
= ScalableVectorType::get(Builder
.getInt1Ty(), 8);
4078 Type
*GoodPredTy
= ScalableVectorType::get(Builder
.getInt1Ty(), 4);
4080 if (Args
[1]->getType() != BadPredTy
)
4081 llvm_unreachable("Unexpected predicate type!");
4083 Args
[1] = Builder
.CreateIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool
,
4084 BadPredTy
, Args
[1]);
4085 Args
[1] = Builder
.CreateIntrinsic(
4086 Intrinsic::aarch64_sve_convert_from_svbool
, GoodPredTy
, Args
[1]);
4088 return Builder
.CreateIntrinsic(NewID
, {}, Args
, /*FMFSource=*/nullptr,
4092 llvm_unreachable("Unhandled Intrinsic!");
4095 static Value
*upgradeARMIntrinsicCall(StringRef Name
, CallBase
*CI
, Function
*F
,
4096 IRBuilder
<> &Builder
) {
4097 if (Name
== "mve.vctp64.old") {
4098 // Replace the old v4i1 vctp64 with a v2i1 vctp and predicate-casts to the
4100 Value
*VCTP
= Builder
.CreateIntrinsic(Intrinsic::arm_mve_vctp64
, {},
4101 CI
->getArgOperand(0),
4102 /*FMFSource=*/nullptr, CI
->getName());
4103 Value
*C1
= Builder
.CreateIntrinsic(
4104 Intrinsic::arm_mve_pred_v2i
,
4105 {VectorType::get(Builder
.getInt1Ty(), 2, false)}, VCTP
);
4106 return Builder
.CreateIntrinsic(
4107 Intrinsic::arm_mve_pred_i2v
,
4108 {VectorType::get(Builder
.getInt1Ty(), 4, false)}, C1
);
4109 } else if (Name
== "mve.mull.int.predicated.v2i64.v4i32.v4i1" ||
4110 Name
== "mve.vqdmull.predicated.v2i64.v4i32.v4i1" ||
4111 Name
== "mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1" ||
4112 Name
== "mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1" ||
4114 "mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1" ||
4115 Name
== "mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v4i1" ||
4116 Name
== "mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1" ||
4117 Name
== "mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1" ||
4119 "mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1" ||
4120 Name
== "mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v4i1" ||
4121 Name
== "cde.vcx1q.predicated.v2i64.v4i1" ||
4122 Name
== "cde.vcx1qa.predicated.v2i64.v4i1" ||
4123 Name
== "cde.vcx2q.predicated.v2i64.v4i1" ||
4124 Name
== "cde.vcx2qa.predicated.v2i64.v4i1" ||
4125 Name
== "cde.vcx3q.predicated.v2i64.v4i1" ||
4126 Name
== "cde.vcx3qa.predicated.v2i64.v4i1") {
4127 std::vector
<Type
*> Tys
;
4128 unsigned ID
= CI
->getIntrinsicID();
4129 Type
*V2I1Ty
= FixedVectorType::get(Builder
.getInt1Ty(), 2);
4131 case Intrinsic::arm_mve_mull_int_predicated
:
4132 case Intrinsic::arm_mve_vqdmull_predicated
:
4133 case Intrinsic::arm_mve_vldr_gather_base_predicated
:
4134 Tys
= {CI
->getType(), CI
->getOperand(0)->getType(), V2I1Ty
};
4136 case Intrinsic::arm_mve_vldr_gather_base_wb_predicated
:
4137 case Intrinsic::arm_mve_vstr_scatter_base_predicated
:
4138 case Intrinsic::arm_mve_vstr_scatter_base_wb_predicated
:
4139 Tys
= {CI
->getOperand(0)->getType(), CI
->getOperand(0)->getType(),
4142 case Intrinsic::arm_mve_vldr_gather_offset_predicated
:
4143 Tys
= {CI
->getType(), CI
->getOperand(0)->getType(),
4144 CI
->getOperand(1)->getType(), V2I1Ty
};
4146 case Intrinsic::arm_mve_vstr_scatter_offset_predicated
:
4147 Tys
= {CI
->getOperand(0)->getType(), CI
->getOperand(1)->getType(),
4148 CI
->getOperand(2)->getType(), V2I1Ty
};
4150 case Intrinsic::arm_cde_vcx1q_predicated
:
4151 case Intrinsic::arm_cde_vcx1qa_predicated
:
4152 case Intrinsic::arm_cde_vcx2q_predicated
:
4153 case Intrinsic::arm_cde_vcx2qa_predicated
:
4154 case Intrinsic::arm_cde_vcx3q_predicated
:
4155 case Intrinsic::arm_cde_vcx3qa_predicated
:
4156 Tys
= {CI
->getOperand(1)->getType(), V2I1Ty
};
4159 llvm_unreachable("Unhandled Intrinsic!");
4162 std::vector
<Value
*> Ops
;
4163 for (Value
*Op
: CI
->args()) {
4164 Type
*Ty
= Op
->getType();
4165 if (Ty
->getScalarSizeInBits() == 1) {
4166 Value
*C1
= Builder
.CreateIntrinsic(
4167 Intrinsic::arm_mve_pred_v2i
,
4168 {VectorType::get(Builder
.getInt1Ty(), 4, false)}, Op
);
4169 Op
= Builder
.CreateIntrinsic(Intrinsic::arm_mve_pred_i2v
, {V2I1Ty
}, C1
);
4174 return Builder
.CreateIntrinsic(ID
, Tys
, Ops
, /*FMFSource=*/nullptr,
4177 llvm_unreachable("Unknown function for ARM CallBase upgrade.");
4180 // These are expected to have the arguments:
4181 // atomic.intrin (ptr, rmw_value, ordering, scope, isVolatile)
4183 // Except for int_amdgcn_ds_fadd_v2bf16 which only has (ptr, rmw_value).
4185 static Value
*upgradeAMDGCNIntrinsicCall(StringRef Name
, CallBase
*CI
,
4186 Function
*F
, IRBuilder
<> &Builder
) {
4187 AtomicRMWInst::BinOp RMWOp
=
4188 StringSwitch
<AtomicRMWInst::BinOp
>(Name
)
4189 .StartsWith("ds.fadd", AtomicRMWInst::FAdd
)
4190 .StartsWith("ds.fmin", AtomicRMWInst::FMin
)
4191 .StartsWith("ds.fmax", AtomicRMWInst::FMax
)
4192 .StartsWith("atomic.inc.", AtomicRMWInst::UIncWrap
)
4193 .StartsWith("atomic.dec.", AtomicRMWInst::UDecWrap
)
4194 .StartsWith("global.atomic.fadd", AtomicRMWInst::FAdd
)
4195 .StartsWith("flat.atomic.fadd", AtomicRMWInst::FAdd
)
4196 .StartsWith("global.atomic.fmin", AtomicRMWInst::FMin
)
4197 .StartsWith("flat.atomic.fmin", AtomicRMWInst::FMin
)
4198 .StartsWith("global.atomic.fmax", AtomicRMWInst::FMax
)
4199 .StartsWith("flat.atomic.fmax", AtomicRMWInst::FMax
);
4201 unsigned NumOperands
= CI
->getNumOperands();
4202 if (NumOperands
< 3) // Malformed bitcode.
4205 Value
*Ptr
= CI
->getArgOperand(0);
4206 PointerType
*PtrTy
= dyn_cast
<PointerType
>(Ptr
->getType());
4207 if (!PtrTy
) // Malformed.
4210 Value
*Val
= CI
->getArgOperand(1);
4211 if (Val
->getType() != CI
->getType()) // Malformed.
4214 ConstantInt
*OrderArg
= nullptr;
4215 bool IsVolatile
= false;
4217 // These should have 5 arguments (plus the callee). A separate version of the
4218 // ds_fadd intrinsic was defined for bf16 which was missing arguments.
4219 if (NumOperands
> 3)
4220 OrderArg
= dyn_cast
<ConstantInt
>(CI
->getArgOperand(2));
4222 // Ignore scope argument at 3
4224 if (NumOperands
> 5) {
4225 ConstantInt
*VolatileArg
= dyn_cast
<ConstantInt
>(CI
->getArgOperand(4));
4226 IsVolatile
= !VolatileArg
|| !VolatileArg
->isZero();
4229 AtomicOrdering Order
= AtomicOrdering::SequentiallyConsistent
;
4230 if (OrderArg
&& isValidAtomicOrdering(OrderArg
->getZExtValue()))
4231 Order
= static_cast<AtomicOrdering
>(OrderArg
->getZExtValue());
4232 if (Order
== AtomicOrdering::NotAtomic
|| Order
== AtomicOrdering::Unordered
)
4233 Order
= AtomicOrdering::SequentiallyConsistent
;
4235 LLVMContext
&Ctx
= F
->getContext();
4237 // Handle the v2bf16 intrinsic which used <2 x i16> instead of <2 x bfloat>
4238 Type
*RetTy
= CI
->getType();
4239 if (VectorType
*VT
= dyn_cast
<VectorType
>(RetTy
)) {
4240 if (VT
->getElementType()->isIntegerTy(16)) {
4241 VectorType
*AsBF16
=
4242 VectorType::get(Type::getBFloatTy(Ctx
), VT
->getElementCount());
4243 Val
= Builder
.CreateBitCast(Val
, AsBF16
);
4247 // The scope argument never really worked correctly. Use agent as the most
4248 // conservative option which should still always produce the instruction.
4249 SyncScope::ID SSID
= Ctx
.getOrInsertSyncScopeID("agent");
4250 AtomicRMWInst
*RMW
=
4251 Builder
.CreateAtomicRMW(RMWOp
, Ptr
, Val
, std::nullopt
, Order
, SSID
);
4253 unsigned AddrSpace
= PtrTy
->getAddressSpace();
4254 if (AddrSpace
!= AMDGPUAS::LOCAL_ADDRESS
) {
4255 MDNode
*EmptyMD
= MDNode::get(F
->getContext(), {});
4256 RMW
->setMetadata("amdgpu.no.fine.grained.memory", EmptyMD
);
4257 if (RMWOp
== AtomicRMWInst::FAdd
&& RetTy
->isFloatTy())
4258 RMW
->setMetadata("amdgpu.ignore.denormal.mode", EmptyMD
);
4261 if (AddrSpace
== AMDGPUAS::FLAT_ADDRESS
) {
4262 MDBuilder
MDB(F
->getContext());
4263 MDNode
*RangeNotPrivate
=
4264 MDB
.createRange(APInt(32, AMDGPUAS::PRIVATE_ADDRESS
),
4265 APInt(32, AMDGPUAS::PRIVATE_ADDRESS
+ 1));
4266 RMW
->setMetadata(LLVMContext::MD_noalias_addrspace
, RangeNotPrivate
);
4270 RMW
->setVolatile(true);
4272 return Builder
.CreateBitCast(RMW
, RetTy
);
4275 /// Helper to unwrap intrinsic call MetadataAsValue operands.
4276 template <typename MDType
>
4277 static MDType
*unwrapMAVOp(CallBase
*CI
, unsigned Op
) {
4278 if (MetadataAsValue
*MAV
= dyn_cast
<MetadataAsValue
>(CI
->getArgOperand(Op
)))
4279 return dyn_cast
<MDType
>(MAV
->getMetadata());
4283 /// Convert debug intrinsic calls to non-instruction debug records.
4284 /// \p Name - Final part of the intrinsic name, e.g. 'value' in llvm.dbg.value.
4285 /// \p CI - The debug intrinsic call.
4286 static void upgradeDbgIntrinsicToDbgRecord(StringRef Name
, CallBase
*CI
) {
4287 DbgRecord
*DR
= nullptr;
4288 if (Name
== "label") {
4289 DR
= new DbgLabelRecord(unwrapMAVOp
<DILabel
>(CI
, 0), CI
->getDebugLoc());
4290 } else if (Name
== "assign") {
4291 DR
= new DbgVariableRecord(
4292 unwrapMAVOp
<Metadata
>(CI
, 0), unwrapMAVOp
<DILocalVariable
>(CI
, 1),
4293 unwrapMAVOp
<DIExpression
>(CI
, 2), unwrapMAVOp
<DIAssignID
>(CI
, 3),
4294 unwrapMAVOp
<Metadata
>(CI
, 4), unwrapMAVOp
<DIExpression
>(CI
, 5),
4296 } else if (Name
== "declare") {
4297 DR
= new DbgVariableRecord(
4298 unwrapMAVOp
<Metadata
>(CI
, 0), unwrapMAVOp
<DILocalVariable
>(CI
, 1),
4299 unwrapMAVOp
<DIExpression
>(CI
, 2), CI
->getDebugLoc(),
4300 DbgVariableRecord::LocationType::Declare
);
4301 } else if (Name
== "addr") {
4302 // Upgrade dbg.addr to dbg.value with DW_OP_deref.
4303 DIExpression
*Expr
= unwrapMAVOp
<DIExpression
>(CI
, 2);
4304 Expr
= DIExpression::append(Expr
, dwarf::DW_OP_deref
);
4305 DR
= new DbgVariableRecord(unwrapMAVOp
<Metadata
>(CI
, 0),
4306 unwrapMAVOp
<DILocalVariable
>(CI
, 1), Expr
,
4308 } else if (Name
== "value") {
4309 // An old version of dbg.value had an extra offset argument.
4311 unsigned ExprOp
= 2;
4312 if (CI
->arg_size() == 4) {
4313 auto *Offset
= dyn_cast_or_null
<Constant
>(CI
->getArgOperand(1));
4314 // Nonzero offset dbg.values get dropped without a replacement.
4315 if (!Offset
|| !Offset
->isZeroValue())
4320 DR
= new DbgVariableRecord(
4321 unwrapMAVOp
<Metadata
>(CI
, 0), unwrapMAVOp
<DILocalVariable
>(CI
, VarOp
),
4322 unwrapMAVOp
<DIExpression
>(CI
, ExprOp
), CI
->getDebugLoc());
4324 assert(DR
&& "Unhandled intrinsic kind in upgrade to DbgRecord");
4325 CI
->getParent()->insertDbgRecordBefore(DR
, CI
->getIterator());
4328 /// Upgrade a call to an old intrinsic. All argument and return casting must be
4329 /// provided to seamlessly integrate with existing context.
4330 void llvm::UpgradeIntrinsicCall(CallBase
*CI
, Function
*NewFn
) {
4331 // Note dyn_cast to Function is not quite the same as getCalledFunction, which
4332 // checks the callee's function type matches. It's likely we need to handle
4333 // type changes here.
4334 Function
*F
= dyn_cast
<Function
>(CI
->getCalledOperand());
4338 LLVMContext
&C
= CI
->getContext();
4339 IRBuilder
<> Builder(C
);
4340 Builder
.SetInsertPoint(CI
->getParent(), CI
->getIterator());
4343 bool FallthroughToDefaultUpgrade
= false;
4344 // Get the Function's name.
4345 StringRef Name
= F
->getName();
4347 assert(Name
.starts_with("llvm.") && "Intrinsic doesn't start with 'llvm.'");
4348 Name
= Name
.substr(5);
4350 bool IsX86
= Name
.consume_front("x86.");
4351 bool IsNVVM
= Name
.consume_front("nvvm.");
4352 bool IsAArch64
= Name
.consume_front("aarch64.");
4353 bool IsARM
= Name
.consume_front("arm.");
4354 bool IsAMDGCN
= Name
.consume_front("amdgcn.");
4355 bool IsDbg
= Name
.consume_front("dbg.");
4356 Value
*Rep
= nullptr;
4358 if (!IsX86
&& Name
== "stackprotectorcheck") {
4360 } else if (IsNVVM
) {
4361 Rep
= upgradeNVVMIntrinsicCall(Name
, CI
, F
, Builder
);
4363 Rep
= upgradeX86IntrinsicCall(Name
, CI
, F
, Builder
);
4364 } else if (IsAArch64
) {
4365 Rep
= upgradeAArch64IntrinsicCall(Name
, CI
, F
, Builder
);
4367 Rep
= upgradeARMIntrinsicCall(Name
, CI
, F
, Builder
);
4368 } else if (IsAMDGCN
) {
4369 Rep
= upgradeAMDGCNIntrinsicCall(Name
, CI
, F
, Builder
);
4371 // We might have decided we don't want the new format after all between
4372 // first requesting the upgrade and now; skip the conversion if that is
4373 // the case, and check here to see if the intrinsic needs to be upgraded
4375 if (!CI
->getModule()->IsNewDbgInfoFormat
) {
4377 upgradeIntrinsicFunction1(CI
->getCalledFunction(), NewFn
, false);
4380 FallthroughToDefaultUpgrade
= true;
4382 upgradeDbgIntrinsicToDbgRecord(Name
, CI
);
4385 llvm_unreachable("Unknown function for CallBase upgrade.");
4388 if (!FallthroughToDefaultUpgrade
) {
4390 CI
->replaceAllUsesWith(Rep
);
4391 CI
->eraseFromParent();
4396 const auto &DefaultCase
= [&]() -> void {
4397 if (CI
->getFunctionType() == NewFn
->getFunctionType()) {
4398 // Handle generic mangling change.
4400 (CI
->getCalledFunction()->getName() != NewFn
->getName()) &&
4401 "Unknown function for CallBase upgrade and isn't just a name change");
4402 CI
->setCalledFunction(NewFn
);
4406 // This must be an upgrade from a named to a literal struct.
4407 if (auto *OldST
= dyn_cast
<StructType
>(CI
->getType())) {
4408 assert(OldST
!= NewFn
->getReturnType() &&
4409 "Return type must have changed");
4410 assert(OldST
->getNumElements() ==
4411 cast
<StructType
>(NewFn
->getReturnType())->getNumElements() &&
4412 "Must have same number of elements");
4414 SmallVector
<Value
*> Args(CI
->args());
4415 CallInst
*NewCI
= Builder
.CreateCall(NewFn
, Args
);
4416 NewCI
->setAttributes(CI
->getAttributes());
4417 Value
*Res
= PoisonValue::get(OldST
);
4418 for (unsigned Idx
= 0; Idx
< OldST
->getNumElements(); ++Idx
) {
4419 Value
*Elem
= Builder
.CreateExtractValue(NewCI
, Idx
);
4420 Res
= Builder
.CreateInsertValue(Res
, Elem
, Idx
);
4422 CI
->replaceAllUsesWith(Res
);
4423 CI
->eraseFromParent();
4427 // We're probably about to produce something invalid. Let the verifier catch
4428 // it instead of dying here.
4429 CI
->setCalledOperand(
4430 ConstantExpr::getPointerCast(NewFn
, CI
->getCalledOperand()->getType()));
4433 CallInst
*NewCall
= nullptr;
4434 switch (NewFn
->getIntrinsicID()) {
4439 case Intrinsic::arm_neon_vst1
:
4440 case Intrinsic::arm_neon_vst2
:
4441 case Intrinsic::arm_neon_vst3
:
4442 case Intrinsic::arm_neon_vst4
:
4443 case Intrinsic::arm_neon_vst2lane
:
4444 case Intrinsic::arm_neon_vst3lane
:
4445 case Intrinsic::arm_neon_vst4lane
: {
4446 SmallVector
<Value
*, 4> Args(CI
->args());
4447 NewCall
= Builder
.CreateCall(NewFn
, Args
);
4450 case Intrinsic::aarch64_sve_bfmlalb_lane_v2
:
4451 case Intrinsic::aarch64_sve_bfmlalt_lane_v2
:
4452 case Intrinsic::aarch64_sve_bfdot_lane_v2
: {
4453 LLVMContext
&Ctx
= F
->getParent()->getContext();
4454 SmallVector
<Value
*, 4> Args(CI
->args());
4455 Args
[3] = ConstantInt::get(Type::getInt32Ty(Ctx
),
4456 cast
<ConstantInt
>(Args
[3])->getZExtValue());
4457 NewCall
= Builder
.CreateCall(NewFn
, Args
);
4460 case Intrinsic::aarch64_sve_ld3_sret
:
4461 case Intrinsic::aarch64_sve_ld4_sret
:
4462 case Intrinsic::aarch64_sve_ld2_sret
: {
4463 StringRef Name
= F
->getName();
4464 Name
= Name
.substr(5);
4465 unsigned N
= StringSwitch
<unsigned>(Name
)
4466 .StartsWith("aarch64.sve.ld2", 2)
4467 .StartsWith("aarch64.sve.ld3", 3)
4468 .StartsWith("aarch64.sve.ld4", 4)
4470 auto *RetTy
= cast
<ScalableVectorType
>(F
->getReturnType());
4471 unsigned MinElts
= RetTy
->getMinNumElements() / N
;
4472 SmallVector
<Value
*, 2> Args(CI
->args());
4473 Value
*NewLdCall
= Builder
.CreateCall(NewFn
, Args
);
4474 Value
*Ret
= llvm::PoisonValue::get(RetTy
);
4475 for (unsigned I
= 0; I
< N
; I
++) {
4476 Value
*Idx
= ConstantInt::get(Type::getInt64Ty(C
), I
* MinElts
);
4477 Value
*SRet
= Builder
.CreateExtractValue(NewLdCall
, I
);
4478 Ret
= Builder
.CreateInsertVector(RetTy
, Ret
, SRet
, Idx
);
4480 NewCall
= dyn_cast
<CallInst
>(Ret
);
4484 case Intrinsic::coro_end
: {
4485 SmallVector
<Value
*, 3> Args(CI
->args());
4486 Args
.push_back(ConstantTokenNone::get(CI
->getContext()));
4487 NewCall
= Builder
.CreateCall(NewFn
, Args
);
4491 case Intrinsic::vector_extract
: {
4492 StringRef Name
= F
->getName();
4493 Name
= Name
.substr(5); // Strip llvm
4494 if (!Name
.starts_with("aarch64.sve.tuple.get")) {
4498 auto *RetTy
= cast
<ScalableVectorType
>(F
->getReturnType());
4499 unsigned MinElts
= RetTy
->getMinNumElements();
4500 unsigned I
= cast
<ConstantInt
>(CI
->getArgOperand(1))->getZExtValue();
4501 Value
*NewIdx
= ConstantInt::get(Type::getInt64Ty(C
), I
* MinElts
);
4502 NewCall
= Builder
.CreateCall(NewFn
, {CI
->getArgOperand(0), NewIdx
});
4506 case Intrinsic::vector_insert
: {
4507 StringRef Name
= F
->getName();
4508 Name
= Name
.substr(5);
4509 if (!Name
.starts_with("aarch64.sve.tuple")) {
4513 if (Name
.starts_with("aarch64.sve.tuple.set")) {
4514 unsigned I
= cast
<ConstantInt
>(CI
->getArgOperand(1))->getZExtValue();
4515 auto *Ty
= cast
<ScalableVectorType
>(CI
->getArgOperand(2)->getType());
4517 ConstantInt::get(Type::getInt64Ty(C
), I
* Ty
->getMinNumElements());
4518 NewCall
= Builder
.CreateCall(
4519 NewFn
, {CI
->getArgOperand(0), CI
->getArgOperand(2), NewIdx
});
4522 if (Name
.starts_with("aarch64.sve.tuple.create")) {
4523 unsigned N
= StringSwitch
<unsigned>(Name
)
4524 .StartsWith("aarch64.sve.tuple.create2", 2)
4525 .StartsWith("aarch64.sve.tuple.create3", 3)
4526 .StartsWith("aarch64.sve.tuple.create4", 4)
4528 assert(N
> 1 && "Create is expected to be between 2-4");
4529 auto *RetTy
= cast
<ScalableVectorType
>(F
->getReturnType());
4530 Value
*Ret
= llvm::PoisonValue::get(RetTy
);
4531 unsigned MinElts
= RetTy
->getMinNumElements() / N
;
4532 for (unsigned I
= 0; I
< N
; I
++) {
4533 Value
*Idx
= ConstantInt::get(Type::getInt64Ty(C
), I
* MinElts
);
4534 Value
*V
= CI
->getArgOperand(I
);
4535 Ret
= Builder
.CreateInsertVector(RetTy
, Ret
, V
, Idx
);
4537 NewCall
= dyn_cast
<CallInst
>(Ret
);
4542 case Intrinsic::arm_neon_bfdot
:
4543 case Intrinsic::arm_neon_bfmmla
:
4544 case Intrinsic::arm_neon_bfmlalb
:
4545 case Intrinsic::arm_neon_bfmlalt
:
4546 case Intrinsic::aarch64_neon_bfdot
:
4547 case Intrinsic::aarch64_neon_bfmmla
:
4548 case Intrinsic::aarch64_neon_bfmlalb
:
4549 case Intrinsic::aarch64_neon_bfmlalt
: {
4550 SmallVector
<Value
*, 3> Args
;
4551 assert(CI
->arg_size() == 3 &&
4552 "Mismatch between function args and call args");
4553 size_t OperandWidth
=
4554 CI
->getArgOperand(1)->getType()->getPrimitiveSizeInBits();
4555 assert((OperandWidth
== 64 || OperandWidth
== 128) &&
4556 "Unexpected operand width");
4557 Type
*NewTy
= FixedVectorType::get(Type::getBFloatTy(C
), OperandWidth
/ 16);
4558 auto Iter
= CI
->args().begin();
4559 Args
.push_back(*Iter
++);
4560 Args
.push_back(Builder
.CreateBitCast(*Iter
++, NewTy
));
4561 Args
.push_back(Builder
.CreateBitCast(*Iter
++, NewTy
));
4562 NewCall
= Builder
.CreateCall(NewFn
, Args
);
4566 case Intrinsic::bitreverse
:
4567 NewCall
= Builder
.CreateCall(NewFn
, {CI
->getArgOperand(0)});
4570 case Intrinsic::ctlz
:
4571 case Intrinsic::cttz
:
4572 assert(CI
->arg_size() == 1 &&
4573 "Mismatch between function args and call args");
4575 Builder
.CreateCall(NewFn
, {CI
->getArgOperand(0), Builder
.getFalse()});
4578 case Intrinsic::objectsize
: {
4579 Value
*NullIsUnknownSize
=
4580 CI
->arg_size() == 2 ? Builder
.getFalse() : CI
->getArgOperand(2);
4582 CI
->arg_size() < 4 ? Builder
.getFalse() : CI
->getArgOperand(3);
4583 NewCall
= Builder
.CreateCall(
4584 NewFn
, {CI
->getArgOperand(0), CI
->getArgOperand(1), NullIsUnknownSize
, Dynamic
});
4588 case Intrinsic::ctpop
:
4589 NewCall
= Builder
.CreateCall(NewFn
, {CI
->getArgOperand(0)});
4592 case Intrinsic::convert_from_fp16
:
4593 NewCall
= Builder
.CreateCall(NewFn
, {CI
->getArgOperand(0)});
4596 case Intrinsic::dbg_value
: {
4597 StringRef Name
= F
->getName();
4598 Name
= Name
.substr(5); // Strip llvm.
4599 // Upgrade `dbg.addr` to `dbg.value` with `DW_OP_deref`.
4600 if (Name
.starts_with("dbg.addr")) {
4601 DIExpression
*Expr
= cast
<DIExpression
>(
4602 cast
<MetadataAsValue
>(CI
->getArgOperand(2))->getMetadata());
4603 Expr
= DIExpression::append(Expr
, dwarf::DW_OP_deref
);
4605 Builder
.CreateCall(NewFn
, {CI
->getArgOperand(0), CI
->getArgOperand(1),
4606 MetadataAsValue::get(C
, Expr
)});
4610 // Upgrade from the old version that had an extra offset argument.
4611 assert(CI
->arg_size() == 4);
4612 // Drop nonzero offsets instead of attempting to upgrade them.
4613 if (auto *Offset
= dyn_cast_or_null
<Constant
>(CI
->getArgOperand(1)))
4614 if (Offset
->isZeroValue()) {
4615 NewCall
= Builder
.CreateCall(
4617 {CI
->getArgOperand(0), CI
->getArgOperand(2), CI
->getArgOperand(3)});
4620 CI
->eraseFromParent();
4624 case Intrinsic::ptr_annotation
:
4625 // Upgrade from versions that lacked the annotation attribute argument.
4626 if (CI
->arg_size() != 4) {
4631 // Create a new call with an added null annotation attribute argument.
4633 Builder
.CreateCall(NewFn
, {CI
->getArgOperand(0), CI
->getArgOperand(1),
4634 CI
->getArgOperand(2), CI
->getArgOperand(3),
4635 Constant::getNullValue(Builder
.getPtrTy())});
4636 NewCall
->takeName(CI
);
4637 CI
->replaceAllUsesWith(NewCall
);
4638 CI
->eraseFromParent();
4641 case Intrinsic::var_annotation
:
4642 // Upgrade from versions that lacked the annotation attribute argument.
4643 if (CI
->arg_size() != 4) {
4647 // Create a new call with an added null annotation attribute argument.
4649 Builder
.CreateCall(NewFn
, {CI
->getArgOperand(0), CI
->getArgOperand(1),
4650 CI
->getArgOperand(2), CI
->getArgOperand(3),
4651 Constant::getNullValue(Builder
.getPtrTy())});
4652 NewCall
->takeName(CI
);
4653 CI
->replaceAllUsesWith(NewCall
);
4654 CI
->eraseFromParent();
4657 case Intrinsic::riscv_aes32dsi
:
4658 case Intrinsic::riscv_aes32dsmi
:
4659 case Intrinsic::riscv_aes32esi
:
4660 case Intrinsic::riscv_aes32esmi
:
4661 case Intrinsic::riscv_sm4ks
:
4662 case Intrinsic::riscv_sm4ed
: {
4663 // The last argument to these intrinsics used to be i8 and changed to i32.
4664 // The type overload for sm4ks and sm4ed was removed.
4665 Value
*Arg2
= CI
->getArgOperand(2);
4666 if (Arg2
->getType()->isIntegerTy(32) && !CI
->getType()->isIntegerTy(64))
4669 Value
*Arg0
= CI
->getArgOperand(0);
4670 Value
*Arg1
= CI
->getArgOperand(1);
4671 if (CI
->getType()->isIntegerTy(64)) {
4672 Arg0
= Builder
.CreateTrunc(Arg0
, Builder
.getInt32Ty());
4673 Arg1
= Builder
.CreateTrunc(Arg1
, Builder
.getInt32Ty());
4676 Arg2
= ConstantInt::get(Type::getInt32Ty(C
),
4677 cast
<ConstantInt
>(Arg2
)->getZExtValue());
4679 NewCall
= Builder
.CreateCall(NewFn
, {Arg0
, Arg1
, Arg2
});
4680 Value
*Res
= NewCall
;
4681 if (Res
->getType() != CI
->getType())
4682 Res
= Builder
.CreateIntCast(NewCall
, CI
->getType(), /*isSigned*/ true);
4683 NewCall
->takeName(CI
);
4684 CI
->replaceAllUsesWith(Res
);
4685 CI
->eraseFromParent();
4688 case Intrinsic::riscv_sha256sig0
:
4689 case Intrinsic::riscv_sha256sig1
:
4690 case Intrinsic::riscv_sha256sum0
:
4691 case Intrinsic::riscv_sha256sum1
:
4692 case Intrinsic::riscv_sm3p0
:
4693 case Intrinsic::riscv_sm3p1
: {
4694 // The last argument to these intrinsics used to be i8 and changed to i32.
4695 // The type overload for sm4ks and sm4ed was removed.
4696 if (!CI
->getType()->isIntegerTy(64))
4700 Builder
.CreateTrunc(CI
->getArgOperand(0), Builder
.getInt32Ty());
4702 NewCall
= Builder
.CreateCall(NewFn
, Arg
);
4704 Builder
.CreateIntCast(NewCall
, CI
->getType(), /*isSigned*/ true);
4705 NewCall
->takeName(CI
);
4706 CI
->replaceAllUsesWith(Res
);
4707 CI
->eraseFromParent();
4711 case Intrinsic::x86_xop_vfrcz_ss
:
4712 case Intrinsic::x86_xop_vfrcz_sd
:
4713 NewCall
= Builder
.CreateCall(NewFn
, {CI
->getArgOperand(1)});
4716 case Intrinsic::x86_xop_vpermil2pd
:
4717 case Intrinsic::x86_xop_vpermil2ps
:
4718 case Intrinsic::x86_xop_vpermil2pd_256
:
4719 case Intrinsic::x86_xop_vpermil2ps_256
: {
4720 SmallVector
<Value
*, 4> Args(CI
->args());
4721 VectorType
*FltIdxTy
= cast
<VectorType
>(Args
[2]->getType());
4722 VectorType
*IntIdxTy
= VectorType::getInteger(FltIdxTy
);
4723 Args
[2] = Builder
.CreateBitCast(Args
[2], IntIdxTy
);
4724 NewCall
= Builder
.CreateCall(NewFn
, Args
);
4728 case Intrinsic::x86_sse41_ptestc
:
4729 case Intrinsic::x86_sse41_ptestz
:
4730 case Intrinsic::x86_sse41_ptestnzc
: {
4731 // The arguments for these intrinsics used to be v4f32, and changed
4732 // to v2i64. This is purely a nop, since those are bitwise intrinsics.
4733 // So, the only thing required is a bitcast for both arguments.
4734 // First, check the arguments have the old type.
4735 Value
*Arg0
= CI
->getArgOperand(0);
4736 if (Arg0
->getType() != FixedVectorType::get(Type::getFloatTy(C
), 4))
4739 // Old intrinsic, add bitcasts
4740 Value
*Arg1
= CI
->getArgOperand(1);
4742 auto *NewVecTy
= FixedVectorType::get(Type::getInt64Ty(C
), 2);
4744 Value
*BC0
= Builder
.CreateBitCast(Arg0
, NewVecTy
, "cast");
4745 Value
*BC1
= Builder
.CreateBitCast(Arg1
, NewVecTy
, "cast");
4747 NewCall
= Builder
.CreateCall(NewFn
, {BC0
, BC1
});
4751 case Intrinsic::x86_rdtscp
: {
4752 // This used to take 1 arguments. If we have no arguments, it is already
4754 if (CI
->getNumOperands() == 0)
4757 NewCall
= Builder
.CreateCall(NewFn
);
4758 // Extract the second result and store it.
4759 Value
*Data
= Builder
.CreateExtractValue(NewCall
, 1);
4760 Builder
.CreateAlignedStore(Data
, CI
->getArgOperand(0), Align(1));
4761 // Replace the original call result with the first result of the new call.
4762 Value
*TSC
= Builder
.CreateExtractValue(NewCall
, 0);
4764 NewCall
->takeName(CI
);
4765 CI
->replaceAllUsesWith(TSC
);
4766 CI
->eraseFromParent();
4770 case Intrinsic::x86_sse41_insertps
:
4771 case Intrinsic::x86_sse41_dppd
:
4772 case Intrinsic::x86_sse41_dpps
:
4773 case Intrinsic::x86_sse41_mpsadbw
:
4774 case Intrinsic::x86_avx_dp_ps_256
:
4775 case Intrinsic::x86_avx2_mpsadbw
: {
4776 // Need to truncate the last argument from i32 to i8 -- this argument models
4777 // an inherently 8-bit immediate operand to these x86 instructions.
4778 SmallVector
<Value
*, 4> Args(CI
->args());
4780 // Replace the last argument with a trunc.
4781 Args
.back() = Builder
.CreateTrunc(Args
.back(), Type::getInt8Ty(C
), "trunc");
4782 NewCall
= Builder
.CreateCall(NewFn
, Args
);
4786 case Intrinsic::x86_avx512_mask_cmp_pd_128
:
4787 case Intrinsic::x86_avx512_mask_cmp_pd_256
:
4788 case Intrinsic::x86_avx512_mask_cmp_pd_512
:
4789 case Intrinsic::x86_avx512_mask_cmp_ps_128
:
4790 case Intrinsic::x86_avx512_mask_cmp_ps_256
:
4791 case Intrinsic::x86_avx512_mask_cmp_ps_512
: {
4792 SmallVector
<Value
*, 4> Args(CI
->args());
4794 cast
<FixedVectorType
>(Args
[0]->getType())->getNumElements();
4795 Args
[3] = getX86MaskVec(Builder
, Args
[3], NumElts
);
4797 NewCall
= Builder
.CreateCall(NewFn
, Args
);
4798 Value
*Res
= applyX86MaskOn1BitsVec(Builder
, NewCall
, nullptr);
4800 NewCall
->takeName(CI
);
4801 CI
->replaceAllUsesWith(Res
);
4802 CI
->eraseFromParent();
4806 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128
:
4807 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256
:
4808 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512
:
4809 case Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128
:
4810 case Intrinsic::x86_avx512bf16_cvtneps2bf16_256
:
4811 case Intrinsic::x86_avx512bf16_cvtneps2bf16_512
: {
4812 SmallVector
<Value
*, 4> Args(CI
->args());
4813 unsigned NumElts
= cast
<FixedVectorType
>(CI
->getType())->getNumElements();
4814 if (NewFn
->getIntrinsicID() ==
4815 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128
)
4816 Args
[1] = Builder
.CreateBitCast(
4817 Args
[1], FixedVectorType::get(Builder
.getBFloatTy(), NumElts
));
4819 NewCall
= Builder
.CreateCall(NewFn
, Args
);
4820 Value
*Res
= Builder
.CreateBitCast(
4821 NewCall
, FixedVectorType::get(Builder
.getInt16Ty(), NumElts
));
4823 NewCall
->takeName(CI
);
4824 CI
->replaceAllUsesWith(Res
);
4825 CI
->eraseFromParent();
4828 case Intrinsic::x86_avx512bf16_dpbf16ps_128
:
4829 case Intrinsic::x86_avx512bf16_dpbf16ps_256
:
4830 case Intrinsic::x86_avx512bf16_dpbf16ps_512
:{
4831 SmallVector
<Value
*, 4> Args(CI
->args());
4833 cast
<FixedVectorType
>(CI
->getType())->getNumElements() * 2;
4834 Args
[1] = Builder
.CreateBitCast(
4835 Args
[1], FixedVectorType::get(Builder
.getBFloatTy(), NumElts
));
4836 Args
[2] = Builder
.CreateBitCast(
4837 Args
[2], FixedVectorType::get(Builder
.getBFloatTy(), NumElts
));
4839 NewCall
= Builder
.CreateCall(NewFn
, Args
);
4843 case Intrinsic::thread_pointer
: {
4844 NewCall
= Builder
.CreateCall(NewFn
, {});
4848 case Intrinsic::memcpy
:
4849 case Intrinsic::memmove
:
4850 case Intrinsic::memset
: {
4851 // We have to make sure that the call signature is what we're expecting.
4852 // We only want to change the old signatures by removing the alignment arg:
4853 // @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i32, i1)
4854 // -> @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i1)
4855 // @llvm.memset...(i8*, i8, i[32|64], i32, i1)
4856 // -> @llvm.memset...(i8*, i8, i[32|64], i1)
4857 // Note: i8*'s in the above can be any pointer type
4858 if (CI
->arg_size() != 5) {
4862 // Remove alignment argument (3), and add alignment attributes to the
4863 // dest/src pointers.
4864 Value
*Args
[4] = {CI
->getArgOperand(0), CI
->getArgOperand(1),
4865 CI
->getArgOperand(2), CI
->getArgOperand(4)};
4866 NewCall
= Builder
.CreateCall(NewFn
, Args
);
4867 AttributeList OldAttrs
= CI
->getAttributes();
4868 AttributeList NewAttrs
= AttributeList::get(
4869 C
, OldAttrs
.getFnAttrs(), OldAttrs
.getRetAttrs(),
4870 {OldAttrs
.getParamAttrs(0), OldAttrs
.getParamAttrs(1),
4871 OldAttrs
.getParamAttrs(2), OldAttrs
.getParamAttrs(4)});
4872 NewCall
->setAttributes(NewAttrs
);
4873 auto *MemCI
= cast
<MemIntrinsic
>(NewCall
);
4874 // All mem intrinsics support dest alignment.
4875 const ConstantInt
*Align
= cast
<ConstantInt
>(CI
->getArgOperand(3));
4876 MemCI
->setDestAlignment(Align
->getMaybeAlignValue());
4877 // Memcpy/Memmove also support source alignment.
4878 if (auto *MTI
= dyn_cast
<MemTransferInst
>(MemCI
))
4879 MTI
->setSourceAlignment(Align
->getMaybeAlignValue());
4883 assert(NewCall
&& "Should have either set this variable or returned through "
4884 "the default case");
4885 NewCall
->takeName(CI
);
4886 CI
->replaceAllUsesWith(NewCall
);
4887 CI
->eraseFromParent();
4890 void llvm::UpgradeCallsToIntrinsic(Function
*F
) {
4891 assert(F
&& "Illegal attempt to upgrade a non-existent intrinsic.");
4893 // Check if this function should be upgraded and get the replacement function
4896 if (UpgradeIntrinsicFunction(F
, NewFn
)) {
4897 // Replace all users of the old function with the new function or new
4898 // instructions. This is not a range loop because the call is deleted.
4899 for (User
*U
: make_early_inc_range(F
->users()))
4900 if (CallBase
*CB
= dyn_cast
<CallBase
>(U
))
4901 UpgradeIntrinsicCall(CB
, NewFn
);
4903 // Remove old function, no longer used, from the module.
4904 F
->eraseFromParent();
4908 MDNode
*llvm::UpgradeTBAANode(MDNode
&MD
) {
4909 const unsigned NumOperands
= MD
.getNumOperands();
4910 if (NumOperands
== 0)
4911 return &MD
; // Invalid, punt to a verifier error.
4913 // Check if the tag uses struct-path aware TBAA format.
4914 if (isa
<MDNode
>(MD
.getOperand(0)) && NumOperands
>= 3)
4917 auto &Context
= MD
.getContext();
4918 if (NumOperands
== 3) {
4919 Metadata
*Elts
[] = {MD
.getOperand(0), MD
.getOperand(1)};
4920 MDNode
*ScalarType
= MDNode::get(Context
, Elts
);
4921 // Create a MDNode <ScalarType, ScalarType, offset 0, const>
4922 Metadata
*Elts2
[] = {ScalarType
, ScalarType
,
4923 ConstantAsMetadata::get(
4924 Constant::getNullValue(Type::getInt64Ty(Context
))),
4926 return MDNode::get(Context
, Elts2
);
4928 // Create a MDNode <MD, MD, offset 0>
4929 Metadata
*Elts
[] = {&MD
, &MD
, ConstantAsMetadata::get(Constant::getNullValue(
4930 Type::getInt64Ty(Context
)))};
4931 return MDNode::get(Context
, Elts
);
4934 Instruction
*llvm::UpgradeBitCastInst(unsigned Opc
, Value
*V
, Type
*DestTy
,
4935 Instruction
*&Temp
) {
4936 if (Opc
!= Instruction::BitCast
)
4940 Type
*SrcTy
= V
->getType();
4941 if (SrcTy
->isPtrOrPtrVectorTy() && DestTy
->isPtrOrPtrVectorTy() &&
4942 SrcTy
->getPointerAddressSpace() != DestTy
->getPointerAddressSpace()) {
4943 LLVMContext
&Context
= V
->getContext();
4945 // We have no information about target data layout, so we assume that
4946 // the maximum pointer size is 64bit.
4947 Type
*MidTy
= Type::getInt64Ty(Context
);
4948 Temp
= CastInst::Create(Instruction::PtrToInt
, V
, MidTy
);
4950 return CastInst::Create(Instruction::IntToPtr
, Temp
, DestTy
);
4956 Constant
*llvm::UpgradeBitCastExpr(unsigned Opc
, Constant
*C
, Type
*DestTy
) {
4957 if (Opc
!= Instruction::BitCast
)
4960 Type
*SrcTy
= C
->getType();
4961 if (SrcTy
->isPtrOrPtrVectorTy() && DestTy
->isPtrOrPtrVectorTy() &&
4962 SrcTy
->getPointerAddressSpace() != DestTy
->getPointerAddressSpace()) {
4963 LLVMContext
&Context
= C
->getContext();
4965 // We have no information about target data layout, so we assume that
4966 // the maximum pointer size is 64bit.
4967 Type
*MidTy
= Type::getInt64Ty(Context
);
4969 return ConstantExpr::getIntToPtr(ConstantExpr::getPtrToInt(C
, MidTy
),
4976 /// Check the debug info version number, if it is out-dated, drop the debug
4977 /// info. Return true if module is modified.
4978 bool llvm::UpgradeDebugInfo(Module
&M
) {
4979 if (DisableAutoUpgradeDebugInfo
)
4982 // We need to get metadata before the module is verified (i.e., getModuleFlag
4983 // makes assumptions that we haven't verified yet). Carefully extract the flag
4984 // from the metadata.
4985 unsigned Version
= 0;
4986 if (NamedMDNode
*ModFlags
= M
.getModuleFlagsMetadata()) {
4987 auto OpIt
= find_if(ModFlags
->operands(), [](const MDNode
*Flag
) {
4988 if (Flag
->getNumOperands() < 3)
4990 if (MDString
*K
= dyn_cast_or_null
<MDString
>(Flag
->getOperand(1)))
4991 return K
->getString() == "Debug Info Version";
4994 if (OpIt
!= ModFlags
->op_end()) {
4995 const MDOperand
&ValOp
= (*OpIt
)->getOperand(2);
4996 if (auto *CI
= mdconst::dyn_extract_or_null
<ConstantInt
>(ValOp
))
4997 Version
= CI
->getZExtValue();
5001 if (Version
== DEBUG_METADATA_VERSION
) {
5002 bool BrokenDebugInfo
= false;
5003 if (verifyModule(M
, &llvm::errs(), &BrokenDebugInfo
))
5004 report_fatal_error("Broken module found, compilation aborted!");
5005 if (!BrokenDebugInfo
)
5006 // Everything is ok.
5009 // Diagnose malformed debug info.
5010 DiagnosticInfoIgnoringInvalidDebugMetadata
Diag(M
);
5011 M
.getContext().diagnose(Diag
);
5014 bool Modified
= StripDebugInfo(M
);
5015 if (Modified
&& Version
!= DEBUG_METADATA_VERSION
) {
5016 // Diagnose a version mismatch.
5017 DiagnosticInfoDebugMetadataVersion
DiagVersion(M
, Version
);
5018 M
.getContext().diagnose(DiagVersion
);
5023 bool static upgradeSingleNVVMAnnotation(GlobalValue
*GV
, StringRef K
,
5024 const Metadata
*V
) {
5025 if (K
== "kernel") {
5026 if (!mdconst::extract
<ConstantInt
>(V
)->isZero())
5027 cast
<Function
>(GV
)->setCallingConv(CallingConv::PTX_Kernel
);
5031 // V is a bitfeild specifying two 16-bit values. The alignment value is
5032 // specfied in low 16-bits, The index is specified in the high bits. For the
5033 // index, 0 indicates the return value while higher values correspond to
5034 // each parameter (idx = param + 1).
5035 const uint64_t AlignIdxValuePair
=
5036 mdconst::extract
<ConstantInt
>(V
)->getZExtValue();
5037 const unsigned Idx
= (AlignIdxValuePair
>> 16);
5038 const Align StackAlign
= Align(AlignIdxValuePair
& 0xFFFF);
5039 // TODO: Skip adding the stackalign attribute for returns, for now.
5042 cast
<Function
>(GV
)->addAttributeAtIndex(
5043 Idx
, Attribute::getWithStackAlignment(GV
->getContext(), StackAlign
));
5050 void llvm::UpgradeNVVMAnnotations(Module
&M
) {
5051 NamedMDNode
*NamedMD
= M
.getNamedMetadata("nvvm.annotations");
5055 SmallVector
<MDNode
*, 8> NewNodes
;
5056 SmallSet
<const MDNode
*, 8> SeenNodes
;
5057 for (MDNode
*MD
: NamedMD
->operands()) {
5058 if (!SeenNodes
.insert(MD
).second
)
5061 auto *GV
= mdconst::dyn_extract_or_null
<GlobalValue
>(MD
->getOperand(0));
5065 assert((MD
->getNumOperands() % 2) == 1 && "Invalid number of operands");
5067 SmallVector
<Metadata
*, 8> NewOperands
{MD
->getOperand(0)};
5068 // Each nvvm.annotations metadata entry will be of the following form:
5069 // !{ ptr @gv, !"key1", value1, !"key2", value2, ... }
5070 // start index = 1, to skip the global variable key
5071 // increment = 2, to skip the value for each property-value pairs
5072 for (unsigned j
= 1, je
= MD
->getNumOperands(); j
< je
; j
+= 2) {
5073 MDString
*K
= cast
<MDString
>(MD
->getOperand(j
));
5074 const MDOperand
&V
= MD
->getOperand(j
+ 1);
5075 bool Upgraded
= upgradeSingleNVVMAnnotation(GV
, K
->getString(), V
);
5077 NewOperands
.append({K
, V
});
5080 if (NewOperands
.size() > 1)
5081 NewNodes
.push_back(MDNode::get(M
.getContext(), NewOperands
));
5084 NamedMD
->clearOperands();
5085 for (MDNode
*N
: NewNodes
)
5086 NamedMD
->addOperand(N
);
5089 /// This checks for objc retain release marker which should be upgraded. It
5090 /// returns true if module is modified.
5091 static bool upgradeRetainReleaseMarker(Module
&M
) {
5092 bool Changed
= false;
5093 const char *MarkerKey
= "clang.arc.retainAutoreleasedReturnValueMarker";
5094 NamedMDNode
*ModRetainReleaseMarker
= M
.getNamedMetadata(MarkerKey
);
5095 if (ModRetainReleaseMarker
) {
5096 MDNode
*Op
= ModRetainReleaseMarker
->getOperand(0);
5098 MDString
*ID
= dyn_cast_or_null
<MDString
>(Op
->getOperand(0));
5100 SmallVector
<StringRef
, 4> ValueComp
;
5101 ID
->getString().split(ValueComp
, "#");
5102 if (ValueComp
.size() == 2) {
5103 std::string NewValue
= ValueComp
[0].str() + ";" + ValueComp
[1].str();
5104 ID
= MDString::get(M
.getContext(), NewValue
);
5106 M
.addModuleFlag(Module::Error
, MarkerKey
, ID
);
5107 M
.eraseNamedMetadata(ModRetainReleaseMarker
);
5115 void llvm::UpgradeARCRuntime(Module
&M
) {
5116 // This lambda converts normal function calls to ARC runtime functions to
5118 auto UpgradeToIntrinsic
= [&](const char *OldFunc
,
5119 llvm::Intrinsic::ID IntrinsicFunc
) {
5120 Function
*Fn
= M
.getFunction(OldFunc
);
5126 llvm::Intrinsic::getOrInsertDeclaration(&M
, IntrinsicFunc
);
5128 for (User
*U
: make_early_inc_range(Fn
->users())) {
5129 CallInst
*CI
= dyn_cast
<CallInst
>(U
);
5130 if (!CI
|| CI
->getCalledFunction() != Fn
)
5133 IRBuilder
<> Builder(CI
->getParent(), CI
->getIterator());
5134 FunctionType
*NewFuncTy
= NewFn
->getFunctionType();
5135 SmallVector
<Value
*, 2> Args
;
5137 // Don't upgrade the intrinsic if it's not valid to bitcast the return
5138 // value to the return type of the old function.
5139 if (NewFuncTy
->getReturnType() != CI
->getType() &&
5140 !CastInst::castIsValid(Instruction::BitCast
, CI
,
5141 NewFuncTy
->getReturnType()))
5144 bool InvalidCast
= false;
5146 for (unsigned I
= 0, E
= CI
->arg_size(); I
!= E
; ++I
) {
5147 Value
*Arg
= CI
->getArgOperand(I
);
5149 // Bitcast argument to the parameter type of the new function if it's
5150 // not a variadic argument.
5151 if (I
< NewFuncTy
->getNumParams()) {
5152 // Don't upgrade the intrinsic if it's not valid to bitcast the argument
5153 // to the parameter type of the new function.
5154 if (!CastInst::castIsValid(Instruction::BitCast
, Arg
,
5155 NewFuncTy
->getParamType(I
))) {
5159 Arg
= Builder
.CreateBitCast(Arg
, NewFuncTy
->getParamType(I
));
5161 Args
.push_back(Arg
);
5167 // Create a call instruction that calls the new function.
5168 CallInst
*NewCall
= Builder
.CreateCall(NewFuncTy
, NewFn
, Args
);
5169 NewCall
->setTailCallKind(cast
<CallInst
>(CI
)->getTailCallKind());
5170 NewCall
->takeName(CI
);
5172 // Bitcast the return value back to the type of the old call.
5173 Value
*NewRetVal
= Builder
.CreateBitCast(NewCall
, CI
->getType());
5175 if (!CI
->use_empty())
5176 CI
->replaceAllUsesWith(NewRetVal
);
5177 CI
->eraseFromParent();
5180 if (Fn
->use_empty())
5181 Fn
->eraseFromParent();
5184 // Unconditionally convert a call to "clang.arc.use" to a call to
5185 // "llvm.objc.clang.arc.use".
5186 UpgradeToIntrinsic("clang.arc.use", llvm::Intrinsic::objc_clang_arc_use
);
5188 // Upgrade the retain release marker. If there is no need to upgrade
5189 // the marker, that means either the module is already new enough to contain
5190 // new intrinsics or it is not ARC. There is no need to upgrade runtime call.
5191 if (!upgradeRetainReleaseMarker(M
))
5194 std::pair
<const char *, llvm::Intrinsic::ID
> RuntimeFuncs
[] = {
5195 {"objc_autorelease", llvm::Intrinsic::objc_autorelease
},
5196 {"objc_autoreleasePoolPop", llvm::Intrinsic::objc_autoreleasePoolPop
},
5197 {"objc_autoreleasePoolPush", llvm::Intrinsic::objc_autoreleasePoolPush
},
5198 {"objc_autoreleaseReturnValue",
5199 llvm::Intrinsic::objc_autoreleaseReturnValue
},
5200 {"objc_copyWeak", llvm::Intrinsic::objc_copyWeak
},
5201 {"objc_destroyWeak", llvm::Intrinsic::objc_destroyWeak
},
5202 {"objc_initWeak", llvm::Intrinsic::objc_initWeak
},
5203 {"objc_loadWeak", llvm::Intrinsic::objc_loadWeak
},
5204 {"objc_loadWeakRetained", llvm::Intrinsic::objc_loadWeakRetained
},
5205 {"objc_moveWeak", llvm::Intrinsic::objc_moveWeak
},
5206 {"objc_release", llvm::Intrinsic::objc_release
},
5207 {"objc_retain", llvm::Intrinsic::objc_retain
},
5208 {"objc_retainAutorelease", llvm::Intrinsic::objc_retainAutorelease
},
5209 {"objc_retainAutoreleaseReturnValue",
5210 llvm::Intrinsic::objc_retainAutoreleaseReturnValue
},
5211 {"objc_retainAutoreleasedReturnValue",
5212 llvm::Intrinsic::objc_retainAutoreleasedReturnValue
},
5213 {"objc_retainBlock", llvm::Intrinsic::objc_retainBlock
},
5214 {"objc_storeStrong", llvm::Intrinsic::objc_storeStrong
},
5215 {"objc_storeWeak", llvm::Intrinsic::objc_storeWeak
},
5216 {"objc_unsafeClaimAutoreleasedReturnValue",
5217 llvm::Intrinsic::objc_unsafeClaimAutoreleasedReturnValue
},
5218 {"objc_retainedObject", llvm::Intrinsic::objc_retainedObject
},
5219 {"objc_unretainedObject", llvm::Intrinsic::objc_unretainedObject
},
5220 {"objc_unretainedPointer", llvm::Intrinsic::objc_unretainedPointer
},
5221 {"objc_retain_autorelease", llvm::Intrinsic::objc_retain_autorelease
},
5222 {"objc_sync_enter", llvm::Intrinsic::objc_sync_enter
},
5223 {"objc_sync_exit", llvm::Intrinsic::objc_sync_exit
},
5224 {"objc_arc_annotation_topdown_bbstart",
5225 llvm::Intrinsic::objc_arc_annotation_topdown_bbstart
},
5226 {"objc_arc_annotation_topdown_bbend",
5227 llvm::Intrinsic::objc_arc_annotation_topdown_bbend
},
5228 {"objc_arc_annotation_bottomup_bbstart",
5229 llvm::Intrinsic::objc_arc_annotation_bottomup_bbstart
},
5230 {"objc_arc_annotation_bottomup_bbend",
5231 llvm::Intrinsic::objc_arc_annotation_bottomup_bbend
}};
5233 for (auto &I
: RuntimeFuncs
)
5234 UpgradeToIntrinsic(I
.first
, I
.second
);
5237 bool llvm::UpgradeModuleFlags(Module
&M
) {
5238 NamedMDNode
*ModFlags
= M
.getModuleFlagsMetadata();
5242 bool HasObjCFlag
= false, HasClassProperties
= false, Changed
= false;
5243 bool HasSwiftVersionFlag
= false;
5244 uint8_t SwiftMajorVersion
, SwiftMinorVersion
;
5245 uint32_t SwiftABIVersion
;
5246 auto Int8Ty
= Type::getInt8Ty(M
.getContext());
5247 auto Int32Ty
= Type::getInt32Ty(M
.getContext());
5249 for (unsigned I
= 0, E
= ModFlags
->getNumOperands(); I
!= E
; ++I
) {
5250 MDNode
*Op
= ModFlags
->getOperand(I
);
5251 if (Op
->getNumOperands() != 3)
5253 MDString
*ID
= dyn_cast_or_null
<MDString
>(Op
->getOperand(1));
5256 auto SetBehavior
= [&](Module::ModFlagBehavior B
) {
5257 Metadata
*Ops
[3] = {ConstantAsMetadata::get(ConstantInt::get(
5258 Type::getInt32Ty(M
.getContext()), B
)),
5259 MDString::get(M
.getContext(), ID
->getString()),
5261 ModFlags
->setOperand(I
, MDNode::get(M
.getContext(), Ops
));
5265 if (ID
->getString() == "Objective-C Image Info Version")
5267 if (ID
->getString() == "Objective-C Class Properties")
5268 HasClassProperties
= true;
5269 // Upgrade PIC from Error/Max to Min.
5270 if (ID
->getString() == "PIC Level") {
5271 if (auto *Behavior
=
5272 mdconst::dyn_extract_or_null
<ConstantInt
>(Op
->getOperand(0))) {
5273 uint64_t V
= Behavior
->getLimitedValue();
5274 if (V
== Module::Error
|| V
== Module::Max
)
5275 SetBehavior(Module::Min
);
5278 // Upgrade "PIE Level" from Error to Max.
5279 if (ID
->getString() == "PIE Level")
5280 if (auto *Behavior
=
5281 mdconst::dyn_extract_or_null
<ConstantInt
>(Op
->getOperand(0)))
5282 if (Behavior
->getLimitedValue() == Module::Error
)
5283 SetBehavior(Module::Max
);
5285 // Upgrade branch protection and return address signing module flags. The
5286 // module flag behavior for these fields were Error and now they are Min.
5287 if (ID
->getString() == "branch-target-enforcement" ||
5288 ID
->getString().starts_with("sign-return-address")) {
5289 if (auto *Behavior
=
5290 mdconst::dyn_extract_or_null
<ConstantInt
>(Op
->getOperand(0))) {
5291 if (Behavior
->getLimitedValue() == Module::Error
) {
5292 Type
*Int32Ty
= Type::getInt32Ty(M
.getContext());
5293 Metadata
*Ops
[3] = {
5294 ConstantAsMetadata::get(ConstantInt::get(Int32Ty
, Module::Min
)),
5295 Op
->getOperand(1), Op
->getOperand(2)};
5296 ModFlags
->setOperand(I
, MDNode::get(M
.getContext(), Ops
));
5302 // Upgrade Objective-C Image Info Section. Removed the whitespce in the
5303 // section name so that llvm-lto will not complain about mismatching
5304 // module flags that is functionally the same.
5305 if (ID
->getString() == "Objective-C Image Info Section") {
5306 if (auto *Value
= dyn_cast_or_null
<MDString
>(Op
->getOperand(2))) {
5307 SmallVector
<StringRef
, 4> ValueComp
;
5308 Value
->getString().split(ValueComp
, " ");
5309 if (ValueComp
.size() != 1) {
5310 std::string NewValue
;
5311 for (auto &S
: ValueComp
)
5312 NewValue
+= S
.str();
5313 Metadata
*Ops
[3] = {Op
->getOperand(0), Op
->getOperand(1),
5314 MDString::get(M
.getContext(), NewValue
)};
5315 ModFlags
->setOperand(I
, MDNode::get(M
.getContext(), Ops
));
5321 // IRUpgrader turns a i32 type "Objective-C Garbage Collection" into i8 value.
5322 // If the higher bits are set, it adds new module flag for swift info.
5323 if (ID
->getString() == "Objective-C Garbage Collection") {
5324 auto Md
= dyn_cast
<ConstantAsMetadata
>(Op
->getOperand(2));
5326 assert(Md
->getValue() && "Expected non-empty metadata");
5327 auto Type
= Md
->getValue()->getType();
5330 unsigned Val
= Md
->getValue()->getUniqueInteger().getZExtValue();
5331 if ((Val
& 0xff) != Val
) {
5332 HasSwiftVersionFlag
= true;
5333 SwiftABIVersion
= (Val
& 0xff00) >> 8;
5334 SwiftMajorVersion
= (Val
& 0xff000000) >> 24;
5335 SwiftMinorVersion
= (Val
& 0xff0000) >> 16;
5337 Metadata
*Ops
[3] = {
5338 ConstantAsMetadata::get(ConstantInt::get(Int32Ty
,Module::Error
)),
5340 ConstantAsMetadata::get(ConstantInt::get(Int8Ty
,Val
& 0xff))};
5341 ModFlags
->setOperand(I
, MDNode::get(M
.getContext(), Ops
));
5346 if (ID
->getString() == "amdgpu_code_object_version") {
5347 Metadata
*Ops
[3] = {
5349 MDString::get(M
.getContext(), "amdhsa_code_object_version"),
5351 ModFlags
->setOperand(I
, MDNode::get(M
.getContext(), Ops
));
5356 // "Objective-C Class Properties" is recently added for Objective-C. We
5357 // upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module
5358 // flag of value 0, so we can correclty downgrade this flag when trying to
5359 // link an ObjC bitcode without this module flag with an ObjC bitcode with
5360 // this module flag.
5361 if (HasObjCFlag
&& !HasClassProperties
) {
5362 M
.addModuleFlag(llvm::Module::Override
, "Objective-C Class Properties",
5367 if (HasSwiftVersionFlag
) {
5368 M
.addModuleFlag(Module::Error
, "Swift ABI Version",
5370 M
.addModuleFlag(Module::Error
, "Swift Major Version",
5371 ConstantInt::get(Int8Ty
, SwiftMajorVersion
));
5372 M
.addModuleFlag(Module::Error
, "Swift Minor Version",
5373 ConstantInt::get(Int8Ty
, SwiftMinorVersion
));
5380 void llvm::UpgradeSectionAttributes(Module
&M
) {
5381 auto TrimSpaces
= [](StringRef Section
) -> std::string
{
5382 SmallVector
<StringRef
, 5> Components
;
5383 Section
.split(Components
, ',');
5385 SmallString
<32> Buffer
;
5386 raw_svector_ostream
OS(Buffer
);
5388 for (auto Component
: Components
)
5389 OS
<< ',' << Component
.trim();
5391 return std::string(OS
.str().substr(1));
5394 for (auto &GV
: M
.globals()) {
5395 if (!GV
.hasSection())
5398 StringRef Section
= GV
.getSection();
5400 if (!Section
.starts_with("__DATA, __objc_catlist"))
5403 // __DATA, __objc_catlist, regular, no_dead_strip
5404 // __DATA,__objc_catlist,regular,no_dead_strip
5405 GV
.setSection(TrimSpaces(Section
));
5410 // Prior to LLVM 10.0, the strictfp attribute could be used on individual
5411 // callsites within a function that did not also have the strictfp attribute.
5412 // Since 10.0, if strict FP semantics are needed within a function, the
5413 // function must have the strictfp attribute and all calls within the function
5414 // must also have the strictfp attribute. This latter restriction is
5415 // necessary to prevent unwanted libcall simplification when a function is
5416 // being cloned (such as for inlining).
5418 // The "dangling" strictfp attribute usage was only used to prevent constant
5419 // folding and other libcall simplification. The nobuiltin attribute on the
5420 // callsite has the same effect.
5421 struct StrictFPUpgradeVisitor
: public InstVisitor
<StrictFPUpgradeVisitor
> {
5422 StrictFPUpgradeVisitor() = default;
5424 void visitCallBase(CallBase
&Call
) {
5425 if (!Call
.isStrictFP())
5427 if (isa
<ConstrainedFPIntrinsic
>(&Call
))
5429 // If we get here, the caller doesn't have the strictfp attribute
5430 // but this callsite does. Replace the strictfp attribute with nobuiltin.
5431 Call
.removeFnAttr(Attribute::StrictFP
);
5432 Call
.addFnAttr(Attribute::NoBuiltin
);
5436 /// Replace "amdgpu-unsafe-fp-atomics" metadata with atomicrmw metadata
5437 struct AMDGPUUnsafeFPAtomicsUpgradeVisitor
5438 : public InstVisitor
<AMDGPUUnsafeFPAtomicsUpgradeVisitor
> {
5439 AMDGPUUnsafeFPAtomicsUpgradeVisitor() = default;
5441 void visitAtomicRMWInst(AtomicRMWInst
&RMW
) {
5442 if (!RMW
.isFloatingPointOperation())
5445 MDNode
*Empty
= MDNode::get(RMW
.getContext(), {});
5446 RMW
.setMetadata("amdgpu.no.fine.grained.host.memory", Empty
);
5447 RMW
.setMetadata("amdgpu.no.remote.memory.access", Empty
);
5448 RMW
.setMetadata("amdgpu.ignore.denormal.mode", Empty
);
5453 void llvm::UpgradeFunctionAttributes(Function
&F
) {
5454 // If a function definition doesn't have the strictfp attribute,
5455 // convert any callsite strictfp attributes to nobuiltin.
5456 if (!F
.isDeclaration() && !F
.hasFnAttribute(Attribute::StrictFP
)) {
5457 StrictFPUpgradeVisitor SFPV
;
5461 // Remove all incompatibile attributes from function.
5462 F
.removeRetAttrs(AttributeFuncs::typeIncompatible(
5463 F
.getReturnType(), F
.getAttributes().getRetAttrs()));
5464 for (auto &Arg
: F
.args())
5466 AttributeFuncs::typeIncompatible(Arg
.getType(), Arg
.getAttributes()));
5468 // Older versions of LLVM treated an "implicit-section-name" attribute
5469 // similarly to directly setting the section on a Function.
5470 if (Attribute A
= F
.getFnAttribute("implicit-section-name");
5471 A
.isValid() && A
.isStringAttribute()) {
5472 F
.setSection(A
.getValueAsString());
5473 F
.removeFnAttr("implicit-section-name");
5477 // For some reason this is called twice, and the first time is before any
5478 // instructions are loaded into the body.
5480 if (Attribute A
= F
.getFnAttribute("amdgpu-unsafe-fp-atomics");
5483 if (A
.getValueAsBool()) {
5484 AMDGPUUnsafeFPAtomicsUpgradeVisitor Visitor
;
5488 // We will leave behind dead attribute uses on external declarations, but
5489 // clang never added these to declarations anyway.
5490 F
.removeFnAttr("amdgpu-unsafe-fp-atomics");
5495 static bool isOldLoopArgument(Metadata
*MD
) {
5496 auto *T
= dyn_cast_or_null
<MDTuple
>(MD
);
5499 if (T
->getNumOperands() < 1)
5501 auto *S
= dyn_cast_or_null
<MDString
>(T
->getOperand(0));
5504 return S
->getString().starts_with("llvm.vectorizer.");
5507 static MDString
*upgradeLoopTag(LLVMContext
&C
, StringRef OldTag
) {
5508 StringRef OldPrefix
= "llvm.vectorizer.";
5509 assert(OldTag
.starts_with(OldPrefix
) && "Expected old prefix");
5511 if (OldTag
== "llvm.vectorizer.unroll")
5512 return MDString::get(C
, "llvm.loop.interleave.count");
5514 return MDString::get(
5515 C
, (Twine("llvm.loop.vectorize.") + OldTag
.drop_front(OldPrefix
.size()))
5519 static Metadata
*upgradeLoopArgument(Metadata
*MD
) {
5520 auto *T
= dyn_cast_or_null
<MDTuple
>(MD
);
5523 if (T
->getNumOperands() < 1)
5525 auto *OldTag
= dyn_cast_or_null
<MDString
>(T
->getOperand(0));
5528 if (!OldTag
->getString().starts_with("llvm.vectorizer."))
5531 // This has an old tag. Upgrade it.
5532 SmallVector
<Metadata
*, 8> Ops
;
5533 Ops
.reserve(T
->getNumOperands());
5534 Ops
.push_back(upgradeLoopTag(T
->getContext(), OldTag
->getString()));
5535 for (unsigned I
= 1, E
= T
->getNumOperands(); I
!= E
; ++I
)
5536 Ops
.push_back(T
->getOperand(I
));
5538 return MDTuple::get(T
->getContext(), Ops
);
5541 MDNode
*llvm::upgradeInstructionLoopAttachment(MDNode
&N
) {
5542 auto *T
= dyn_cast
<MDTuple
>(&N
);
5546 if (none_of(T
->operands(), isOldLoopArgument
))
5549 SmallVector
<Metadata
*, 8> Ops
;
5550 Ops
.reserve(T
->getNumOperands());
5551 for (Metadata
*MD
: T
->operands())
5552 Ops
.push_back(upgradeLoopArgument(MD
));
5554 return MDTuple::get(T
->getContext(), Ops
);
5557 std::string
llvm::UpgradeDataLayoutString(StringRef DL
, StringRef TT
) {
5559 // The only data layout upgrades needed for pre-GCN, SPIR or SPIRV are setting
5560 // the address space of globals to 1. This does not apply to SPIRV Logical.
5561 if (((T
.isAMDGPU() && !T
.isAMDGCN()) ||
5562 (T
.isSPIR() || (T
.isSPIRV() && !T
.isSPIRVLogical()))) &&
5563 !DL
.contains("-G") && !DL
.starts_with("G")) {
5564 return DL
.empty() ? std::string("G1") : (DL
+ "-G1").str();
5567 if (T
.isLoongArch64() || T
.isRISCV64()) {
5568 // Make i32 a native type for 64-bit LoongArch and RISC-V.
5569 auto I
= DL
.find("-n64-");
5570 if (I
!= StringRef::npos
)
5571 return (DL
.take_front(I
) + "-n32:64-" + DL
.drop_front(I
+ 5)).str();
5575 std::string Res
= DL
.str();
5576 // AMDGCN data layout upgrades.
5578 // Define address spaces for constants.
5579 if (!DL
.contains("-G") && !DL
.starts_with("G"))
5580 Res
.append(Res
.empty() ? "G1" : "-G1");
5582 // Add missing non-integral declarations.
5583 // This goes before adding new address spaces to prevent incoherent string
5585 if (!DL
.contains("-ni") && !DL
.starts_with("ni"))
5586 Res
.append("-ni:7:8:9");
5587 // Update ni:7 to ni:7:8:9.
5588 if (DL
.ends_with("ni:7"))
5590 if (DL
.ends_with("ni:7:8"))
5593 // Add sizing for address spaces 7 and 8 (fat raw buffers and buffer
5594 // resources) An empty data layout has already been upgraded to G1 by now.
5595 if (!DL
.contains("-p7") && !DL
.starts_with("p7"))
5596 Res
.append("-p7:160:256:256:32");
5597 if (!DL
.contains("-p8") && !DL
.starts_with("p8"))
5598 Res
.append("-p8:128:128");
5599 if (!DL
.contains("-p9") && !DL
.starts_with("p9"))
5600 Res
.append("-p9:192:256:256:32");
5605 auto AddPtr32Ptr64AddrSpaces
= [&DL
, &Res
]() {
5606 // If the datalayout matches the expected format, add pointer size address
5607 // spaces to the datalayout.
5608 StringRef AddrSpaces
{"-p270:32:32-p271:32:32-p272:64:64"};
5609 if (!DL
.contains(AddrSpaces
)) {
5610 SmallVector
<StringRef
, 4> Groups
;
5611 Regex
R("^([Ee]-m:[a-z](-p:32:32)?)(-.*)$");
5612 if (R
.match(Res
, &Groups
))
5613 Res
= (Groups
[1] + AddrSpaces
+ Groups
[3]).str();
5617 // AArch64 data layout upgrades.
5618 if (T
.isAArch64()) {
5620 if (!DL
.empty() && !DL
.contains("-Fn32"))
5621 Res
.append("-Fn32");
5622 AddPtr32Ptr64AddrSpaces();
5626 if (T
.isSPARC() || (T
.isMIPS64() && !DL
.contains("m:m")) || T
.isPPC64() ||
5628 // Mips64 with o32 ABI did not add "-i128:128".
5630 std::string I64
= "-i64:64";
5631 std::string I128
= "-i128:128";
5632 if (!StringRef(Res
).contains(I128
)) {
5633 size_t Pos
= Res
.find(I64
);
5634 if (Pos
!= size_t(-1))
5635 Res
.insert(Pos
+ I64
.size(), I128
);
5643 AddPtr32Ptr64AddrSpaces();
5645 // i128 values need to be 16-byte-aligned. LLVM already called into libgcc
5646 // for i128 operations prior to this being reflected in the data layout, and
5647 // clang mostly produced LLVM IR that already aligned i128 to 16 byte
5648 // boundaries, so although this is a breaking change, the upgrade is expected
5649 // to fix more IR than it breaks.
5650 // Intel MCU is an exception and uses 4-byte-alignment.
5651 if (!T
.isOSIAMCU()) {
5652 std::string I128
= "-i128:128";
5653 if (StringRef Ref
= Res
; !Ref
.contains(I128
)) {
5654 SmallVector
<StringRef
, 4> Groups
;
5655 Regex
R("^(e(-[mpi][^-]*)*)((-[^mpi][^-]*)*)$");
5656 if (R
.match(Res
, &Groups
))
5657 Res
= (Groups
[1] + I128
+ Groups
[3]).str();
5661 // For 32-bit MSVC targets, raise the alignment of f80 values to 16 bytes.
5662 // Raising the alignment is safe because Clang did not produce f80 values in
5663 // the MSVC environment before this upgrade was added.
5664 if (T
.isWindowsMSVCEnvironment() && !T
.isArch64Bit()) {
5665 StringRef Ref
= Res
;
5666 auto I
= Ref
.find("-f80:32-");
5667 if (I
!= StringRef::npos
)
5668 Res
= (Ref
.take_front(I
) + "-f80:128-" + Ref
.drop_front(I
+ 8)).str();
5674 void llvm::UpgradeAttributes(AttrBuilder
&B
) {
5675 StringRef FramePointer
;
5676 Attribute A
= B
.getAttribute("no-frame-pointer-elim");
5678 // The value can be "true" or "false".
5679 FramePointer
= A
.getValueAsString() == "true" ? "all" : "none";
5680 B
.removeAttribute("no-frame-pointer-elim");
5682 if (B
.contains("no-frame-pointer-elim-non-leaf")) {
5683 // The value is ignored. "no-frame-pointer-elim"="true" takes priority.
5684 if (FramePointer
!= "all")
5685 FramePointer
= "non-leaf";
5686 B
.removeAttribute("no-frame-pointer-elim-non-leaf");
5688 if (!FramePointer
.empty())
5689 B
.addAttribute("frame-pointer", FramePointer
);
5691 A
= B
.getAttribute("null-pointer-is-valid");
5693 // The value can be "true" or "false".
5694 bool NullPointerIsValid
= A
.getValueAsString() == "true";
5695 B
.removeAttribute("null-pointer-is-valid");
5696 if (NullPointerIsValid
)
5697 B
.addAttribute(Attribute::NullPointerIsValid
);
5701 void llvm::UpgradeOperandBundles(std::vector
<OperandBundleDef
> &Bundles
) {
5702 // clang.arc.attachedcall bundles are now required to have an operand.
5703 // If they don't, it's okay to drop them entirely: when there is an operand,
5704 // the "attachedcall" is meaningful and required, but without an operand,
5705 // it's just a marker NOP. Dropping it merely prevents an optimization.
5706 erase_if(Bundles
, [&](OperandBundleDef
&OBD
) {
5707 return OBD
.getTag() == "clang.arc.attachedcall" &&
5708 OBD
.inputs().empty();