1 //===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements the auto-upgrade helper functions.
10 // This is where deprecated IR intrinsics and other IR features are updated to
11 // current specifications.
13 //===----------------------------------------------------------------------===//
15 #include "llvm/IR/AutoUpgrade.h"
16 #include "llvm/ADT/StringSwitch.h"
17 #include "llvm/IR/Constants.h"
18 #include "llvm/IR/DIBuilder.h"
19 #include "llvm/IR/DebugInfo.h"
20 #include "llvm/IR/DiagnosticInfo.h"
21 #include "llvm/IR/Function.h"
22 #include "llvm/IR/IRBuilder.h"
23 #include "llvm/IR/Instruction.h"
24 #include "llvm/IR/IntrinsicInst.h"
25 #include "llvm/IR/LLVMContext.h"
26 #include "llvm/IR/Module.h"
27 #include "llvm/IR/Verifier.h"
28 #include "llvm/Support/ErrorHandling.h"
29 #include "llvm/Support/Regex.h"
33 static void rename(GlobalValue
*GV
) { GV
->setName(GV
->getName() + ".old"); }
35 // Upgrade the declarations of the SSE4.1 ptest intrinsics whose arguments have
36 // changed their type from v4f32 to v2i64.
37 static bool UpgradePTESTIntrinsic(Function
* F
, Intrinsic::ID IID
,
39 // Check whether this is an old version of the function, which received
41 Type
*Arg0Type
= F
->getFunctionType()->getParamType(0);
42 if (Arg0Type
!= VectorType::get(Type::getFloatTy(F
->getContext()), 4))
45 // Yes, it's old, replace it with new version.
47 NewFn
= Intrinsic::getDeclaration(F
->getParent(), IID
);
51 // Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
52 // arguments have changed their type from i32 to i8.
53 static bool UpgradeX86IntrinsicsWith8BitMask(Function
*F
, Intrinsic::ID IID
,
55 // Check that the last argument is an i32.
56 Type
*LastArgType
= F
->getFunctionType()->getParamType(
57 F
->getFunctionType()->getNumParams() - 1);
58 if (!LastArgType
->isIntegerTy(32))
61 // Move this function aside and map down.
63 NewFn
= Intrinsic::getDeclaration(F
->getParent(), IID
);
67 static bool ShouldUpgradeX86Intrinsic(Function
*F
, StringRef Name
) {
68 // All of the intrinsics matches below should be marked with which llvm
69 // version started autoupgrading them. At some point in the future we would
70 // like to use this information to remove upgrade code for some older
71 // intrinsics. It is currently undecided how we will determine that future
73 if (Name
== "addcarryx.u32" || // Added in 8.0
74 Name
== "addcarryx.u64" || // Added in 8.0
75 Name
== "addcarry.u32" || // Added in 8.0
76 Name
== "addcarry.u64" || // Added in 8.0
77 Name
== "subborrow.u32" || // Added in 8.0
78 Name
== "subborrow.u64" || // Added in 8.0
79 Name
.startswith("sse2.padds.") || // Added in 8.0
80 Name
.startswith("sse2.psubs.") || // Added in 8.0
81 Name
.startswith("sse2.paddus.") || // Added in 8.0
82 Name
.startswith("sse2.psubus.") || // Added in 8.0
83 Name
.startswith("avx2.padds.") || // Added in 8.0
84 Name
.startswith("avx2.psubs.") || // Added in 8.0
85 Name
.startswith("avx2.paddus.") || // Added in 8.0
86 Name
.startswith("avx2.psubus.") || // Added in 8.0
87 Name
.startswith("avx512.padds.") || // Added in 8.0
88 Name
.startswith("avx512.psubs.") || // Added in 8.0
89 Name
.startswith("avx512.mask.padds.") || // Added in 8.0
90 Name
.startswith("avx512.mask.psubs.") || // Added in 8.0
91 Name
.startswith("avx512.mask.paddus.") || // Added in 8.0
92 Name
.startswith("avx512.mask.psubus.") || // Added in 8.0
93 Name
=="ssse3.pabs.b.128" || // Added in 6.0
94 Name
=="ssse3.pabs.w.128" || // Added in 6.0
95 Name
=="ssse3.pabs.d.128" || // Added in 6.0
96 Name
.startswith("fma4.vfmadd.s") || // Added in 7.0
97 Name
.startswith("fma.vfmadd.") || // Added in 7.0
98 Name
.startswith("fma.vfmsub.") || // Added in 7.0
99 Name
.startswith("fma.vfmaddsub.") || // Added in 7.0
100 Name
.startswith("fma.vfmsubadd.") || // Added in 7.0
101 Name
.startswith("fma.vfnmadd.") || // Added in 7.0
102 Name
.startswith("fma.vfnmsub.") || // Added in 7.0
103 Name
.startswith("avx512.mask.vfmadd.") || // Added in 7.0
104 Name
.startswith("avx512.mask.vfnmadd.") || // Added in 7.0
105 Name
.startswith("avx512.mask.vfnmsub.") || // Added in 7.0
106 Name
.startswith("avx512.mask3.vfmadd.") || // Added in 7.0
107 Name
.startswith("avx512.maskz.vfmadd.") || // Added in 7.0
108 Name
.startswith("avx512.mask3.vfmsub.") || // Added in 7.0
109 Name
.startswith("avx512.mask3.vfnmsub.") || // Added in 7.0
110 Name
.startswith("avx512.mask.vfmaddsub.") || // Added in 7.0
111 Name
.startswith("avx512.maskz.vfmaddsub.") || // Added in 7.0
112 Name
.startswith("avx512.mask3.vfmaddsub.") || // Added in 7.0
113 Name
.startswith("avx512.mask3.vfmsubadd.") || // Added in 7.0
114 Name
.startswith("avx512.mask.shuf.i") || // Added in 6.0
115 Name
.startswith("avx512.mask.shuf.f") || // Added in 6.0
116 Name
.startswith("avx512.kunpck") || //added in 6.0
117 Name
.startswith("avx2.pabs.") || // Added in 6.0
118 Name
.startswith("avx512.mask.pabs.") || // Added in 6.0
119 Name
.startswith("avx512.broadcastm") || // Added in 6.0
120 Name
== "sse.sqrt.ss" || // Added in 7.0
121 Name
== "sse2.sqrt.sd" || // Added in 7.0
122 Name
.startswith("avx512.mask.sqrt.p") || // Added in 7.0
123 Name
.startswith("avx.sqrt.p") || // Added in 7.0
124 Name
.startswith("sse2.sqrt.p") || // Added in 7.0
125 Name
.startswith("sse.sqrt.p") || // Added in 7.0
126 Name
.startswith("avx512.mask.pbroadcast") || // Added in 6.0
127 Name
.startswith("sse2.pcmpeq.") || // Added in 3.1
128 Name
.startswith("sse2.pcmpgt.") || // Added in 3.1
129 Name
.startswith("avx2.pcmpeq.") || // Added in 3.1
130 Name
.startswith("avx2.pcmpgt.") || // Added in 3.1
131 Name
.startswith("avx512.mask.pcmpeq.") || // Added in 3.9
132 Name
.startswith("avx512.mask.pcmpgt.") || // Added in 3.9
133 Name
.startswith("avx.vperm2f128.") || // Added in 6.0
134 Name
== "avx2.vperm2i128" || // Added in 6.0
135 Name
== "sse.add.ss" || // Added in 4.0
136 Name
== "sse2.add.sd" || // Added in 4.0
137 Name
== "sse.sub.ss" || // Added in 4.0
138 Name
== "sse2.sub.sd" || // Added in 4.0
139 Name
== "sse.mul.ss" || // Added in 4.0
140 Name
== "sse2.mul.sd" || // Added in 4.0
141 Name
== "sse.div.ss" || // Added in 4.0
142 Name
== "sse2.div.sd" || // Added in 4.0
143 Name
== "sse41.pmaxsb" || // Added in 3.9
144 Name
== "sse2.pmaxs.w" || // Added in 3.9
145 Name
== "sse41.pmaxsd" || // Added in 3.9
146 Name
== "sse2.pmaxu.b" || // Added in 3.9
147 Name
== "sse41.pmaxuw" || // Added in 3.9
148 Name
== "sse41.pmaxud" || // Added in 3.9
149 Name
== "sse41.pminsb" || // Added in 3.9
150 Name
== "sse2.pmins.w" || // Added in 3.9
151 Name
== "sse41.pminsd" || // Added in 3.9
152 Name
== "sse2.pminu.b" || // Added in 3.9
153 Name
== "sse41.pminuw" || // Added in 3.9
154 Name
== "sse41.pminud" || // Added in 3.9
155 Name
== "avx512.kand.w" || // Added in 7.0
156 Name
== "avx512.kandn.w" || // Added in 7.0
157 Name
== "avx512.knot.w" || // Added in 7.0
158 Name
== "avx512.kor.w" || // Added in 7.0
159 Name
== "avx512.kxor.w" || // Added in 7.0
160 Name
== "avx512.kxnor.w" || // Added in 7.0
161 Name
== "avx512.kortestc.w" || // Added in 7.0
162 Name
== "avx512.kortestz.w" || // Added in 7.0
163 Name
.startswith("avx512.mask.pshuf.b.") || // Added in 4.0
164 Name
.startswith("avx2.pmax") || // Added in 3.9
165 Name
.startswith("avx2.pmin") || // Added in 3.9
166 Name
.startswith("avx512.mask.pmax") || // Added in 4.0
167 Name
.startswith("avx512.mask.pmin") || // Added in 4.0
168 Name
.startswith("avx2.vbroadcast") || // Added in 3.8
169 Name
.startswith("avx2.pbroadcast") || // Added in 3.8
170 Name
.startswith("avx.vpermil.") || // Added in 3.1
171 Name
.startswith("sse2.pshuf") || // Added in 3.9
172 Name
.startswith("avx512.pbroadcast") || // Added in 3.9
173 Name
.startswith("avx512.mask.broadcast.s") || // Added in 3.9
174 Name
.startswith("avx512.mask.movddup") || // Added in 3.9
175 Name
.startswith("avx512.mask.movshdup") || // Added in 3.9
176 Name
.startswith("avx512.mask.movsldup") || // Added in 3.9
177 Name
.startswith("avx512.mask.pshuf.d.") || // Added in 3.9
178 Name
.startswith("avx512.mask.pshufl.w.") || // Added in 3.9
179 Name
.startswith("avx512.mask.pshufh.w.") || // Added in 3.9
180 Name
.startswith("avx512.mask.shuf.p") || // Added in 4.0
181 Name
.startswith("avx512.mask.vpermil.p") || // Added in 3.9
182 Name
.startswith("avx512.mask.perm.df.") || // Added in 3.9
183 Name
.startswith("avx512.mask.perm.di.") || // Added in 3.9
184 Name
.startswith("avx512.mask.punpckl") || // Added in 3.9
185 Name
.startswith("avx512.mask.punpckh") || // Added in 3.9
186 Name
.startswith("avx512.mask.unpckl.") || // Added in 3.9
187 Name
.startswith("avx512.mask.unpckh.") || // Added in 3.9
188 Name
.startswith("avx512.mask.pand.") || // Added in 3.9
189 Name
.startswith("avx512.mask.pandn.") || // Added in 3.9
190 Name
.startswith("avx512.mask.por.") || // Added in 3.9
191 Name
.startswith("avx512.mask.pxor.") || // Added in 3.9
192 Name
.startswith("avx512.mask.and.") || // Added in 3.9
193 Name
.startswith("avx512.mask.andn.") || // Added in 3.9
194 Name
.startswith("avx512.mask.or.") || // Added in 3.9
195 Name
.startswith("avx512.mask.xor.") || // Added in 3.9
196 Name
.startswith("avx512.mask.padd.") || // Added in 4.0
197 Name
.startswith("avx512.mask.psub.") || // Added in 4.0
198 Name
.startswith("avx512.mask.pmull.") || // Added in 4.0
199 Name
.startswith("avx512.mask.cvtdq2pd.") || // Added in 4.0
200 Name
.startswith("avx512.mask.cvtudq2pd.") || // Added in 4.0
201 Name
.startswith("avx512.mask.cvtudq2ps.") || // Added in 7.0 updated 9.0
202 Name
.startswith("avx512.mask.cvtqq2pd.") || // Added in 7.0 updated 9.0
203 Name
.startswith("avx512.mask.cvtuqq2pd.") || // Added in 7.0 updated 9.0
204 Name
.startswith("avx512.mask.cvtdq2ps.") || // Added in 7.0 updated 9.0
205 Name
== "avx512.mask.cvtqq2ps.256" || // Added in 9.0
206 Name
== "avx512.mask.cvtqq2ps.512" || // Added in 9.0
207 Name
== "avx512.mask.cvtuqq2ps.256" || // Added in 9.0
208 Name
== "avx512.mask.cvtuqq2ps.512" || // Added in 9.0
209 Name
== "avx512.mask.cvtpd2dq.256" || // Added in 7.0
210 Name
== "avx512.mask.cvtpd2ps.256" || // Added in 7.0
211 Name
== "avx512.mask.cvttpd2dq.256" || // Added in 7.0
212 Name
== "avx512.mask.cvttps2dq.128" || // Added in 7.0
213 Name
== "avx512.mask.cvttps2dq.256" || // Added in 7.0
214 Name
== "avx512.mask.cvtps2pd.128" || // Added in 7.0
215 Name
== "avx512.mask.cvtps2pd.256" || // Added in 7.0
216 Name
== "avx512.cvtusi2sd" || // Added in 7.0
217 Name
.startswith("avx512.mask.permvar.") || // Added in 7.0
218 Name
== "sse2.pmulu.dq" || // Added in 7.0
219 Name
== "sse41.pmuldq" || // Added in 7.0
220 Name
== "avx2.pmulu.dq" || // Added in 7.0
221 Name
== "avx2.pmul.dq" || // Added in 7.0
222 Name
== "avx512.pmulu.dq.512" || // Added in 7.0
223 Name
== "avx512.pmul.dq.512" || // Added in 7.0
224 Name
.startswith("avx512.mask.pmul.dq.") || // Added in 4.0
225 Name
.startswith("avx512.mask.pmulu.dq.") || // Added in 4.0
226 Name
.startswith("avx512.mask.pmul.hr.sw.") || // Added in 7.0
227 Name
.startswith("avx512.mask.pmulh.w.") || // Added in 7.0
228 Name
.startswith("avx512.mask.pmulhu.w.") || // Added in 7.0
229 Name
.startswith("avx512.mask.pmaddw.d.") || // Added in 7.0
230 Name
.startswith("avx512.mask.pmaddubs.w.") || // Added in 7.0
231 Name
.startswith("avx512.mask.packsswb.") || // Added in 5.0
232 Name
.startswith("avx512.mask.packssdw.") || // Added in 5.0
233 Name
.startswith("avx512.mask.packuswb.") || // Added in 5.0
234 Name
.startswith("avx512.mask.packusdw.") || // Added in 5.0
235 Name
.startswith("avx512.mask.cmp.b") || // Added in 5.0
236 Name
.startswith("avx512.mask.cmp.d") || // Added in 5.0
237 Name
.startswith("avx512.mask.cmp.q") || // Added in 5.0
238 Name
.startswith("avx512.mask.cmp.w") || // Added in 5.0
239 Name
.startswith("avx512.mask.cmp.p") || // Added in 7.0
240 Name
.startswith("avx512.mask.ucmp.") || // Added in 5.0
241 Name
.startswith("avx512.cvtb2mask.") || // Added in 7.0
242 Name
.startswith("avx512.cvtw2mask.") || // Added in 7.0
243 Name
.startswith("avx512.cvtd2mask.") || // Added in 7.0
244 Name
.startswith("avx512.cvtq2mask.") || // Added in 7.0
245 Name
.startswith("avx512.mask.vpermilvar.") || // Added in 4.0
246 Name
.startswith("avx512.mask.psll.d") || // Added in 4.0
247 Name
.startswith("avx512.mask.psll.q") || // Added in 4.0
248 Name
.startswith("avx512.mask.psll.w") || // Added in 4.0
249 Name
.startswith("avx512.mask.psra.d") || // Added in 4.0
250 Name
.startswith("avx512.mask.psra.q") || // Added in 4.0
251 Name
.startswith("avx512.mask.psra.w") || // Added in 4.0
252 Name
.startswith("avx512.mask.psrl.d") || // Added in 4.0
253 Name
.startswith("avx512.mask.psrl.q") || // Added in 4.0
254 Name
.startswith("avx512.mask.psrl.w") || // Added in 4.0
255 Name
.startswith("avx512.mask.pslli") || // Added in 4.0
256 Name
.startswith("avx512.mask.psrai") || // Added in 4.0
257 Name
.startswith("avx512.mask.psrli") || // Added in 4.0
258 Name
.startswith("avx512.mask.psllv") || // Added in 4.0
259 Name
.startswith("avx512.mask.psrav") || // Added in 4.0
260 Name
.startswith("avx512.mask.psrlv") || // Added in 4.0
261 Name
.startswith("sse41.pmovsx") || // Added in 3.8
262 Name
.startswith("sse41.pmovzx") || // Added in 3.9
263 Name
.startswith("avx2.pmovsx") || // Added in 3.9
264 Name
.startswith("avx2.pmovzx") || // Added in 3.9
265 Name
.startswith("avx512.mask.pmovsx") || // Added in 4.0
266 Name
.startswith("avx512.mask.pmovzx") || // Added in 4.0
267 Name
.startswith("avx512.mask.lzcnt.") || // Added in 5.0
268 Name
.startswith("avx512.mask.pternlog.") || // Added in 7.0
269 Name
.startswith("avx512.maskz.pternlog.") || // Added in 7.0
270 Name
.startswith("avx512.mask.vpmadd52") || // Added in 7.0
271 Name
.startswith("avx512.maskz.vpmadd52") || // Added in 7.0
272 Name
.startswith("avx512.mask.vpermi2var.") || // Added in 7.0
273 Name
.startswith("avx512.mask.vpermt2var.") || // Added in 7.0
274 Name
.startswith("avx512.maskz.vpermt2var.") || // Added in 7.0
275 Name
.startswith("avx512.mask.vpdpbusd.") || // Added in 7.0
276 Name
.startswith("avx512.maskz.vpdpbusd.") || // Added in 7.0
277 Name
.startswith("avx512.mask.vpdpbusds.") || // Added in 7.0
278 Name
.startswith("avx512.maskz.vpdpbusds.") || // Added in 7.0
279 Name
.startswith("avx512.mask.vpdpwssd.") || // Added in 7.0
280 Name
.startswith("avx512.maskz.vpdpwssd.") || // Added in 7.0
281 Name
.startswith("avx512.mask.vpdpwssds.") || // Added in 7.0
282 Name
.startswith("avx512.maskz.vpdpwssds.") || // Added in 7.0
283 Name
.startswith("avx512.mask.dbpsadbw.") || // Added in 7.0
284 Name
.startswith("avx512.mask.vpshld.") || // Added in 7.0
285 Name
.startswith("avx512.mask.vpshrd.") || // Added in 7.0
286 Name
.startswith("avx512.mask.vpshldv.") || // Added in 8.0
287 Name
.startswith("avx512.mask.vpshrdv.") || // Added in 8.0
288 Name
.startswith("avx512.maskz.vpshldv.") || // Added in 8.0
289 Name
.startswith("avx512.maskz.vpshrdv.") || // Added in 8.0
290 Name
.startswith("avx512.vpshld.") || // Added in 8.0
291 Name
.startswith("avx512.vpshrd.") || // Added in 8.0
292 Name
.startswith("avx512.mask.add.p") || // Added in 7.0. 128/256 in 4.0
293 Name
.startswith("avx512.mask.sub.p") || // Added in 7.0. 128/256 in 4.0
294 Name
.startswith("avx512.mask.mul.p") || // Added in 7.0. 128/256 in 4.0
295 Name
.startswith("avx512.mask.div.p") || // Added in 7.0. 128/256 in 4.0
296 Name
.startswith("avx512.mask.max.p") || // Added in 7.0. 128/256 in 5.0
297 Name
.startswith("avx512.mask.min.p") || // Added in 7.0. 128/256 in 5.0
298 Name
.startswith("avx512.mask.fpclass.p") || // Added in 7.0
299 Name
.startswith("avx512.mask.vpshufbitqmb.") || // Added in 8.0
300 Name
.startswith("avx512.mask.pmultishift.qb.") || // Added in 8.0
301 Name
.startswith("avx512.mask.conflict.") || // Added in 9.0
302 Name
== "avx512.mask.pmov.qd.256" || // Added in 9.0
303 Name
== "avx512.mask.pmov.qd.512" || // Added in 9.0
304 Name
== "avx512.mask.pmov.wb.256" || // Added in 9.0
305 Name
== "avx512.mask.pmov.wb.512" || // Added in 9.0
306 Name
== "sse.cvtsi2ss" || // Added in 7.0
307 Name
== "sse.cvtsi642ss" || // Added in 7.0
308 Name
== "sse2.cvtsi2sd" || // Added in 7.0
309 Name
== "sse2.cvtsi642sd" || // Added in 7.0
310 Name
== "sse2.cvtss2sd" || // Added in 7.0
311 Name
== "sse2.cvtdq2pd" || // Added in 3.9
312 Name
== "sse2.cvtdq2ps" || // Added in 7.0
313 Name
== "sse2.cvtps2pd" || // Added in 3.9
314 Name
== "avx.cvtdq2.pd.256" || // Added in 3.9
315 Name
== "avx.cvtdq2.ps.256" || // Added in 7.0
316 Name
== "avx.cvt.ps2.pd.256" || // Added in 3.9
317 Name
.startswith("avx.vinsertf128.") || // Added in 3.7
318 Name
== "avx2.vinserti128" || // Added in 3.7
319 Name
.startswith("avx512.mask.insert") || // Added in 4.0
320 Name
.startswith("avx.vextractf128.") || // Added in 3.7
321 Name
== "avx2.vextracti128" || // Added in 3.7
322 Name
.startswith("avx512.mask.vextract") || // Added in 4.0
323 Name
.startswith("sse4a.movnt.") || // Added in 3.9
324 Name
.startswith("avx.movnt.") || // Added in 3.2
325 Name
.startswith("avx512.storent.") || // Added in 3.9
326 Name
== "sse41.movntdqa" || // Added in 5.0
327 Name
== "avx2.movntdqa" || // Added in 5.0
328 Name
== "avx512.movntdqa" || // Added in 5.0
329 Name
== "sse2.storel.dq" || // Added in 3.9
330 Name
.startswith("sse.storeu.") || // Added in 3.9
331 Name
.startswith("sse2.storeu.") || // Added in 3.9
332 Name
.startswith("avx.storeu.") || // Added in 3.9
333 Name
.startswith("avx512.mask.storeu.") || // Added in 3.9
334 Name
.startswith("avx512.mask.store.p") || // Added in 3.9
335 Name
.startswith("avx512.mask.store.b.") || // Added in 3.9
336 Name
.startswith("avx512.mask.store.w.") || // Added in 3.9
337 Name
.startswith("avx512.mask.store.d.") || // Added in 3.9
338 Name
.startswith("avx512.mask.store.q.") || // Added in 3.9
339 Name
== "avx512.mask.store.ss" || // Added in 7.0
340 Name
.startswith("avx512.mask.loadu.") || // Added in 3.9
341 Name
.startswith("avx512.mask.load.") || // Added in 3.9
342 Name
.startswith("avx512.mask.expand.load.") || // Added in 7.0
343 Name
.startswith("avx512.mask.compress.store.") || // Added in 7.0
344 Name
.startswith("avx512.mask.expand.b") || // Added in 9.0
345 Name
.startswith("avx512.mask.expand.w") || // Added in 9.0
346 Name
.startswith("avx512.mask.expand.d") || // Added in 9.0
347 Name
.startswith("avx512.mask.expand.q") || // Added in 9.0
348 Name
.startswith("avx512.mask.expand.p") || // Added in 9.0
349 Name
.startswith("avx512.mask.compress.b") || // Added in 9.0
350 Name
.startswith("avx512.mask.compress.w") || // Added in 9.0
351 Name
.startswith("avx512.mask.compress.d") || // Added in 9.0
352 Name
.startswith("avx512.mask.compress.q") || // Added in 9.0
353 Name
.startswith("avx512.mask.compress.p") || // Added in 9.0
354 Name
== "sse42.crc32.64.8" || // Added in 3.4
355 Name
.startswith("avx.vbroadcast.s") || // Added in 3.5
356 Name
.startswith("avx512.vbroadcast.s") || // Added in 7.0
357 Name
.startswith("avx512.mask.palignr.") || // Added in 3.9
358 Name
.startswith("avx512.mask.valign.") || // Added in 4.0
359 Name
.startswith("sse2.psll.dq") || // Added in 3.7
360 Name
.startswith("sse2.psrl.dq") || // Added in 3.7
361 Name
.startswith("avx2.psll.dq") || // Added in 3.7
362 Name
.startswith("avx2.psrl.dq") || // Added in 3.7
363 Name
.startswith("avx512.psll.dq") || // Added in 3.9
364 Name
.startswith("avx512.psrl.dq") || // Added in 3.9
365 Name
== "sse41.pblendw" || // Added in 3.7
366 Name
.startswith("sse41.blendp") || // Added in 3.7
367 Name
.startswith("avx.blend.p") || // Added in 3.7
368 Name
== "avx2.pblendw" || // Added in 3.7
369 Name
.startswith("avx2.pblendd.") || // Added in 3.7
370 Name
.startswith("avx.vbroadcastf128") || // Added in 4.0
371 Name
== "avx2.vbroadcasti128" || // Added in 3.7
372 Name
.startswith("avx512.mask.broadcastf") || // Added in 6.0
373 Name
.startswith("avx512.mask.broadcasti") || // Added in 6.0
374 Name
== "xop.vpcmov" || // Added in 3.8
375 Name
== "xop.vpcmov.256" || // Added in 5.0
376 Name
.startswith("avx512.mask.move.s") || // Added in 4.0
377 Name
.startswith("avx512.cvtmask2") || // Added in 5.0
378 Name
.startswith("xop.vpcom") || // Added in 3.2, Updated in 9.0
379 Name
.startswith("xop.vprot") || // Added in 8.0
380 Name
.startswith("avx512.prol") || // Added in 8.0
381 Name
.startswith("avx512.pror") || // Added in 8.0
382 Name
.startswith("avx512.mask.prorv.") || // Added in 8.0
383 Name
.startswith("avx512.mask.pror.") || // Added in 8.0
384 Name
.startswith("avx512.mask.prolv.") || // Added in 8.0
385 Name
.startswith("avx512.mask.prol.") || // Added in 8.0
386 Name
.startswith("avx512.ptestm") || //Added in 6.0
387 Name
.startswith("avx512.ptestnm") || //Added in 6.0
388 Name
.startswith("avx512.mask.pavg")) // Added in 6.0
394 static bool UpgradeX86IntrinsicFunction(Function
*F
, StringRef Name
,
396 // Only handle intrinsics that start with "x86.".
397 if (!Name
.startswith("x86."))
399 // Remove "x86." prefix.
400 Name
= Name
.substr(4);
402 if (ShouldUpgradeX86Intrinsic(F
, Name
)) {
407 if (Name
== "rdtscp") { // Added in 8.0
408 // If this intrinsic has 0 operands, it's the new version.
409 if (F
->getFunctionType()->getNumParams() == 0)
413 NewFn
= Intrinsic::getDeclaration(F
->getParent(),
414 Intrinsic::x86_rdtscp
);
418 // SSE4.1 ptest functions may have an old signature.
419 if (Name
.startswith("sse41.ptest")) { // Added in 3.2
420 if (Name
.substr(11) == "c")
421 return UpgradePTESTIntrinsic(F
, Intrinsic::x86_sse41_ptestc
, NewFn
);
422 if (Name
.substr(11) == "z")
423 return UpgradePTESTIntrinsic(F
, Intrinsic::x86_sse41_ptestz
, NewFn
);
424 if (Name
.substr(11) == "nzc")
425 return UpgradePTESTIntrinsic(F
, Intrinsic::x86_sse41_ptestnzc
, NewFn
);
427 // Several blend and other instructions with masks used the wrong number of
429 if (Name
== "sse41.insertps") // Added in 3.6
430 return UpgradeX86IntrinsicsWith8BitMask(F
, Intrinsic::x86_sse41_insertps
,
432 if (Name
== "sse41.dppd") // Added in 3.6
433 return UpgradeX86IntrinsicsWith8BitMask(F
, Intrinsic::x86_sse41_dppd
,
435 if (Name
== "sse41.dpps") // Added in 3.6
436 return UpgradeX86IntrinsicsWith8BitMask(F
, Intrinsic::x86_sse41_dpps
,
438 if (Name
== "sse41.mpsadbw") // Added in 3.6
439 return UpgradeX86IntrinsicsWith8BitMask(F
, Intrinsic::x86_sse41_mpsadbw
,
441 if (Name
== "avx.dp.ps.256") // Added in 3.6
442 return UpgradeX86IntrinsicsWith8BitMask(F
, Intrinsic::x86_avx_dp_ps_256
,
444 if (Name
== "avx2.mpsadbw") // Added in 3.6
445 return UpgradeX86IntrinsicsWith8BitMask(F
, Intrinsic::x86_avx2_mpsadbw
,
448 // frcz.ss/sd may need to have an argument dropped. Added in 3.2
449 if (Name
.startswith("xop.vfrcz.ss") && F
->arg_size() == 2) {
451 NewFn
= Intrinsic::getDeclaration(F
->getParent(),
452 Intrinsic::x86_xop_vfrcz_ss
);
455 if (Name
.startswith("xop.vfrcz.sd") && F
->arg_size() == 2) {
457 NewFn
= Intrinsic::getDeclaration(F
->getParent(),
458 Intrinsic::x86_xop_vfrcz_sd
);
461 // Upgrade any XOP PERMIL2 index operand still using a float/double vector.
462 if (Name
.startswith("xop.vpermil2")) { // Added in 3.9
463 auto Idx
= F
->getFunctionType()->getParamType(2);
464 if (Idx
->isFPOrFPVectorTy()) {
466 unsigned IdxSize
= Idx
->getPrimitiveSizeInBits();
467 unsigned EltSize
= Idx
->getScalarSizeInBits();
468 Intrinsic::ID Permil2ID
;
469 if (EltSize
== 64 && IdxSize
== 128)
470 Permil2ID
= Intrinsic::x86_xop_vpermil2pd
;
471 else if (EltSize
== 32 && IdxSize
== 128)
472 Permil2ID
= Intrinsic::x86_xop_vpermil2ps
;
473 else if (EltSize
== 64 && IdxSize
== 256)
474 Permil2ID
= Intrinsic::x86_xop_vpermil2pd_256
;
476 Permil2ID
= Intrinsic::x86_xop_vpermil2ps_256
;
477 NewFn
= Intrinsic::getDeclaration(F
->getParent(), Permil2ID
);
482 if (Name
== "seh.recoverfp") {
483 NewFn
= Intrinsic::getDeclaration(F
->getParent(), Intrinsic::eh_recoverfp
);
490 static bool UpgradeIntrinsicFunction1(Function
*F
, Function
*&NewFn
) {
491 assert(F
&& "Illegal to upgrade a non-existent Function.");
493 // Upgrade intrinsics "clang.arc.use" which doesn't start with "llvm.".
494 if (F
->getName() == "clang.arc.use") {
499 // Quickly eliminate it, if it's not a candidate.
500 StringRef Name
= F
->getName();
501 if (Name
.size() <= 8 || !Name
.startswith("llvm."))
503 Name
= Name
.substr(5); // Strip off "llvm."
508 if (Name
.startswith("arm.rbit") || Name
.startswith("aarch64.rbit")) {
509 NewFn
= Intrinsic::getDeclaration(F
->getParent(), Intrinsic::bitreverse
,
510 F
->arg_begin()->getType());
513 if (Name
.startswith("arm.neon.vclz")) {
515 F
->arg_begin()->getType(),
516 Type::getInt1Ty(F
->getContext())
518 // Can't use Intrinsic::getDeclaration here as it adds a ".i1" to
519 // the end of the name. Change name from llvm.arm.neon.vclz.* to
521 FunctionType
* fType
= FunctionType::get(F
->getReturnType(), args
, false);
522 NewFn
= Function::Create(fType
, F
->getLinkage(), F
->getAddressSpace(),
523 "llvm.ctlz." + Name
.substr(14), F
->getParent());
526 if (Name
.startswith("arm.neon.vcnt")) {
527 NewFn
= Intrinsic::getDeclaration(F
->getParent(), Intrinsic::ctpop
,
528 F
->arg_begin()->getType());
531 Regex
vldRegex("^arm\\.neon\\.vld([1234]|[234]lane)\\.v[a-z0-9]*$");
532 if (vldRegex
.match(Name
)) {
533 auto fArgs
= F
->getFunctionType()->params();
534 SmallVector
<Type
*, 4> Tys(fArgs
.begin(), fArgs
.end());
535 // Can't use Intrinsic::getDeclaration here as the return types might
536 // then only be structurally equal.
537 FunctionType
* fType
= FunctionType::get(F
->getReturnType(), Tys
, false);
538 NewFn
= Function::Create(fType
, F
->getLinkage(), F
->getAddressSpace(),
539 "llvm." + Name
+ ".p0i8", F
->getParent());
542 Regex
vstRegex("^arm\\.neon\\.vst([1234]|[234]lane)\\.v[a-z0-9]*$");
543 if (vstRegex
.match(Name
)) {
544 static const Intrinsic::ID StoreInts
[] = {Intrinsic::arm_neon_vst1
,
545 Intrinsic::arm_neon_vst2
,
546 Intrinsic::arm_neon_vst3
,
547 Intrinsic::arm_neon_vst4
};
549 static const Intrinsic::ID StoreLaneInts
[] = {
550 Intrinsic::arm_neon_vst2lane
, Intrinsic::arm_neon_vst3lane
,
551 Intrinsic::arm_neon_vst4lane
554 auto fArgs
= F
->getFunctionType()->params();
555 Type
*Tys
[] = {fArgs
[0], fArgs
[1]};
556 if (Name
.find("lane") == StringRef::npos
)
557 NewFn
= Intrinsic::getDeclaration(F
->getParent(),
558 StoreInts
[fArgs
.size() - 3], Tys
);
560 NewFn
= Intrinsic::getDeclaration(F
->getParent(),
561 StoreLaneInts
[fArgs
.size() - 5], Tys
);
564 if (Name
== "aarch64.thread.pointer" || Name
== "arm.thread.pointer") {
565 NewFn
= Intrinsic::getDeclaration(F
->getParent(), Intrinsic::thread_pointer
);
568 if (Name
.startswith("aarch64.neon.addp")) {
569 if (F
->arg_size() != 2)
570 break; // Invalid IR.
571 auto fArgs
= F
->getFunctionType()->params();
572 VectorType
*ArgTy
= dyn_cast
<VectorType
>(fArgs
[0]);
573 if (ArgTy
&& ArgTy
->getElementType()->isFloatingPointTy()) {
574 NewFn
= Intrinsic::getDeclaration(F
->getParent(),
575 Intrinsic::aarch64_neon_faddp
, fArgs
);
583 if (Name
.startswith("ctlz.") && F
->arg_size() == 1) {
585 NewFn
= Intrinsic::getDeclaration(F
->getParent(), Intrinsic::ctlz
,
586 F
->arg_begin()->getType());
589 if (Name
.startswith("cttz.") && F
->arg_size() == 1) {
591 NewFn
= Intrinsic::getDeclaration(F
->getParent(), Intrinsic::cttz
,
592 F
->arg_begin()->getType());
598 if (Name
== "dbg.value" && F
->arg_size() == 4) {
600 NewFn
= Intrinsic::getDeclaration(F
->getParent(), Intrinsic::dbg_value
);
606 SmallVector
<StringRef
, 2> Groups
;
607 Regex
R("^experimental.vector.reduce.([a-z]+)\\.[fi][0-9]+");
608 if (R
.match(Name
, &Groups
)) {
609 Intrinsic::ID ID
= Intrinsic::not_intrinsic
;
610 if (Groups
[1] == "fadd")
611 ID
= Intrinsic::experimental_vector_reduce_v2_fadd
;
612 if (Groups
[1] == "fmul")
613 ID
= Intrinsic::experimental_vector_reduce_v2_fmul
;
615 if (ID
!= Intrinsic::not_intrinsic
) {
617 auto Args
= F
->getFunctionType()->params();
618 Type
*Tys
[] = {F
->getFunctionType()->getReturnType(), Args
[1]};
619 NewFn
= Intrinsic::getDeclaration(F
->getParent(), ID
, Tys
);
627 bool IsLifetimeStart
= Name
.startswith("lifetime.start");
628 if (IsLifetimeStart
|| Name
.startswith("invariant.start")) {
629 Intrinsic::ID ID
= IsLifetimeStart
?
630 Intrinsic::lifetime_start
: Intrinsic::invariant_start
;
631 auto Args
= F
->getFunctionType()->params();
632 Type
* ObjectPtr
[1] = {Args
[1]};
633 if (F
->getName() != Intrinsic::getName(ID
, ObjectPtr
)) {
635 NewFn
= Intrinsic::getDeclaration(F
->getParent(), ID
, ObjectPtr
);
640 bool IsLifetimeEnd
= Name
.startswith("lifetime.end");
641 if (IsLifetimeEnd
|| Name
.startswith("invariant.end")) {
642 Intrinsic::ID ID
= IsLifetimeEnd
?
643 Intrinsic::lifetime_end
: Intrinsic::invariant_end
;
645 auto Args
= F
->getFunctionType()->params();
646 Type
* ObjectPtr
[1] = {Args
[IsLifetimeEnd
? 1 : 2]};
647 if (F
->getName() != Intrinsic::getName(ID
, ObjectPtr
)) {
649 NewFn
= Intrinsic::getDeclaration(F
->getParent(), ID
, ObjectPtr
);
653 if (Name
.startswith("invariant.group.barrier")) {
654 // Rename invariant.group.barrier to launder.invariant.group
655 auto Args
= F
->getFunctionType()->params();
656 Type
* ObjectPtr
[1] = {Args
[0]};
658 NewFn
= Intrinsic::getDeclaration(F
->getParent(),
659 Intrinsic::launder_invariant_group
, ObjectPtr
);
667 if (Name
.startswith("masked.load.")) {
668 Type
*Tys
[] = { F
->getReturnType(), F
->arg_begin()->getType() };
669 if (F
->getName() != Intrinsic::getName(Intrinsic::masked_load
, Tys
)) {
671 NewFn
= Intrinsic::getDeclaration(F
->getParent(),
672 Intrinsic::masked_load
,
677 if (Name
.startswith("masked.store.")) {
678 auto Args
= F
->getFunctionType()->params();
679 Type
*Tys
[] = { Args
[0], Args
[1] };
680 if (F
->getName() != Intrinsic::getName(Intrinsic::masked_store
, Tys
)) {
682 NewFn
= Intrinsic::getDeclaration(F
->getParent(),
683 Intrinsic::masked_store
,
688 // Renaming gather/scatter intrinsics with no address space overloading
689 // to the new overload which includes an address space
690 if (Name
.startswith("masked.gather.")) {
691 Type
*Tys
[] = {F
->getReturnType(), F
->arg_begin()->getType()};
692 if (F
->getName() != Intrinsic::getName(Intrinsic::masked_gather
, Tys
)) {
694 NewFn
= Intrinsic::getDeclaration(F
->getParent(),
695 Intrinsic::masked_gather
, Tys
);
699 if (Name
.startswith("masked.scatter.")) {
700 auto Args
= F
->getFunctionType()->params();
701 Type
*Tys
[] = {Args
[0], Args
[1]};
702 if (F
->getName() != Intrinsic::getName(Intrinsic::masked_scatter
, Tys
)) {
704 NewFn
= Intrinsic::getDeclaration(F
->getParent(),
705 Intrinsic::masked_scatter
, Tys
);
709 // Updating the memory intrinsics (memcpy/memmove/memset) that have an
710 // alignment parameter to embedding the alignment as an attribute of
712 if (Name
.startswith("memcpy.") && F
->arg_size() == 5) {
714 // Get the types of dest, src, and len
715 ArrayRef
<Type
*> ParamTypes
= F
->getFunctionType()->params().slice(0, 3);
716 NewFn
= Intrinsic::getDeclaration(F
->getParent(), Intrinsic::memcpy
,
720 if (Name
.startswith("memmove.") && F
->arg_size() == 5) {
722 // Get the types of dest, src, and len
723 ArrayRef
<Type
*> ParamTypes
= F
->getFunctionType()->params().slice(0, 3);
724 NewFn
= Intrinsic::getDeclaration(F
->getParent(), Intrinsic::memmove
,
728 if (Name
.startswith("memset.") && F
->arg_size() == 5) {
730 // Get the types of dest, and len
731 const auto *FT
= F
->getFunctionType();
732 Type
*ParamTypes
[2] = {
733 FT
->getParamType(0), // Dest
734 FT
->getParamType(2) // len
736 NewFn
= Intrinsic::getDeclaration(F
->getParent(), Intrinsic::memset
,
743 if (Name
.startswith("nvvm.")) {
744 Name
= Name
.substr(5);
746 // The following nvvm intrinsics correspond exactly to an LLVM intrinsic.
747 Intrinsic::ID IID
= StringSwitch
<Intrinsic::ID
>(Name
)
748 .Cases("brev32", "brev64", Intrinsic::bitreverse
)
749 .Case("clz.i", Intrinsic::ctlz
)
750 .Case("popc.i", Intrinsic::ctpop
)
751 .Default(Intrinsic::not_intrinsic
);
752 if (IID
!= Intrinsic::not_intrinsic
&& F
->arg_size() == 1) {
753 NewFn
= Intrinsic::getDeclaration(F
->getParent(), IID
,
754 {F
->getReturnType()});
758 // The following nvvm intrinsics correspond exactly to an LLVM idiom, but
759 // not to an intrinsic alone. We expand them in UpgradeIntrinsicCall.
761 // TODO: We could add lohi.i2d.
762 bool Expand
= StringSwitch
<bool>(Name
)
763 .Cases("abs.i", "abs.ll", true)
764 .Cases("clz.ll", "popc.ll", "h2f", true)
765 .Cases("max.i", "max.ll", "max.ui", "max.ull", true)
766 .Cases("min.i", "min.ll", "min.ui", "min.ull", true)
767 .StartsWith("atomic.load.add.f32.p", true)
768 .StartsWith("atomic.load.add.f64.p", true)
778 // We only need to change the name to match the mangling including the
780 if (Name
.startswith("objectsize.")) {
781 Type
*Tys
[2] = { F
->getReturnType(), F
->arg_begin()->getType() };
782 if (F
->arg_size() == 2 || F
->arg_size() == 3 ||
783 F
->getName() != Intrinsic::getName(Intrinsic::objectsize
, Tys
)) {
785 NewFn
= Intrinsic::getDeclaration(F
->getParent(), Intrinsic::objectsize
,
793 if (Name
== "stackprotectorcheck") {
800 if (UpgradeX86IntrinsicFunction(F
, Name
, NewFn
))
803 // Remangle our intrinsic since we upgrade the mangling
804 auto Result
= llvm::Intrinsic::remangleIntrinsicFunction(F
);
805 if (Result
!= None
) {
806 NewFn
= Result
.getValue();
810 // This may not belong here. This function is effectively being overloaded
811 // to both detect an intrinsic which needs upgrading, and to provide the
812 // upgraded form of the intrinsic. We should perhaps have two separate
813 // functions for this.
817 bool llvm::UpgradeIntrinsicFunction(Function
*F
, Function
*&NewFn
) {
819 bool Upgraded
= UpgradeIntrinsicFunction1(F
, NewFn
);
820 assert(F
!= NewFn
&& "Intrinsic function upgraded to the same function");
822 // Upgrade intrinsic attributes. This does not change the function.
825 if (Intrinsic::ID id
= F
->getIntrinsicID())
826 F
->setAttributes(Intrinsic::getAttributes(F
->getContext(), id
));
830 GlobalVariable
*llvm::UpgradeGlobalVariable(GlobalVariable
*GV
) {
831 if (!(GV
->hasName() && (GV
->getName() == "llvm.global_ctors" ||
832 GV
->getName() == "llvm.global_dtors")) ||
833 !GV
->hasInitializer())
835 ArrayType
*ATy
= dyn_cast
<ArrayType
>(GV
->getValueType());
838 StructType
*STy
= dyn_cast
<StructType
>(ATy
->getElementType());
839 if (!STy
|| STy
->getNumElements() != 2)
842 LLVMContext
&C
= GV
->getContext();
844 auto EltTy
= StructType::get(STy
->getElementType(0), STy
->getElementType(1),
846 Constant
*Init
= GV
->getInitializer();
847 unsigned N
= Init
->getNumOperands();
848 std::vector
<Constant
*> NewCtors(N
);
849 for (unsigned i
= 0; i
!= N
; ++i
) {
850 auto Ctor
= cast
<Constant
>(Init
->getOperand(i
));
851 NewCtors
[i
] = ConstantStruct::get(
852 EltTy
, Ctor
->getAggregateElement(0u), Ctor
->getAggregateElement(1),
853 Constant::getNullValue(IRB
.getInt8PtrTy()));
855 Constant
*NewInit
= ConstantArray::get(ArrayType::get(EltTy
, N
), NewCtors
);
857 return new GlobalVariable(NewInit
->getType(), false, GV
->getLinkage(),
858 NewInit
, GV
->getName());
861 // Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them
863 static Value
*UpgradeX86PSLLDQIntrinsics(IRBuilder
<> &Builder
,
864 Value
*Op
, unsigned Shift
) {
865 Type
*ResultTy
= Op
->getType();
866 unsigned NumElts
= ResultTy
->getVectorNumElements() * 8;
868 // Bitcast from a 64-bit element type to a byte element type.
869 Type
*VecTy
= VectorType::get(Builder
.getInt8Ty(), NumElts
);
870 Op
= Builder
.CreateBitCast(Op
, VecTy
, "cast");
872 // We'll be shuffling in zeroes.
873 Value
*Res
= Constant::getNullValue(VecTy
);
875 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
876 // we'll just return the zero vector.
879 // 256/512-bit version is split into 2/4 16-byte lanes.
880 for (unsigned l
= 0; l
!= NumElts
; l
+= 16)
881 for (unsigned i
= 0; i
!= 16; ++i
) {
882 unsigned Idx
= NumElts
+ i
- Shift
;
884 Idx
-= NumElts
- 16; // end of lane, switch operand.
885 Idxs
[l
+ i
] = Idx
+ l
;
888 Res
= Builder
.CreateShuffleVector(Res
, Op
, makeArrayRef(Idxs
, NumElts
));
891 // Bitcast back to a 64-bit element type.
892 return Builder
.CreateBitCast(Res
, ResultTy
, "cast");
895 // Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them
897 static Value
*UpgradeX86PSRLDQIntrinsics(IRBuilder
<> &Builder
, Value
*Op
,
899 Type
*ResultTy
= Op
->getType();
900 unsigned NumElts
= ResultTy
->getVectorNumElements() * 8;
902 // Bitcast from a 64-bit element type to a byte element type.
903 Type
*VecTy
= VectorType::get(Builder
.getInt8Ty(), NumElts
);
904 Op
= Builder
.CreateBitCast(Op
, VecTy
, "cast");
906 // We'll be shuffling in zeroes.
907 Value
*Res
= Constant::getNullValue(VecTy
);
909 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
910 // we'll just return the zero vector.
913 // 256/512-bit version is split into 2/4 16-byte lanes.
914 for (unsigned l
= 0; l
!= NumElts
; l
+= 16)
915 for (unsigned i
= 0; i
!= 16; ++i
) {
916 unsigned Idx
= i
+ Shift
;
918 Idx
+= NumElts
- 16; // end of lane, switch operand.
919 Idxs
[l
+ i
] = Idx
+ l
;
922 Res
= Builder
.CreateShuffleVector(Op
, Res
, makeArrayRef(Idxs
, NumElts
));
925 // Bitcast back to a 64-bit element type.
926 return Builder
.CreateBitCast(Res
, ResultTy
, "cast");
929 static Value
*getX86MaskVec(IRBuilder
<> &Builder
, Value
*Mask
,
931 llvm::VectorType
*MaskTy
= llvm::VectorType::get(Builder
.getInt1Ty(),
932 cast
<IntegerType
>(Mask
->getType())->getBitWidth());
933 Mask
= Builder
.CreateBitCast(Mask
, MaskTy
);
935 // If we have less than 8 elements, then the starting mask was an i8 and
936 // we need to extract down to the right number of elements.
939 for (unsigned i
= 0; i
!= NumElts
; ++i
)
941 Mask
= Builder
.CreateShuffleVector(Mask
, Mask
,
942 makeArrayRef(Indices
, NumElts
),
949 static Value
*EmitX86Select(IRBuilder
<> &Builder
, Value
*Mask
,
950 Value
*Op0
, Value
*Op1
) {
951 // If the mask is all ones just emit the first operation.
952 if (const auto *C
= dyn_cast
<Constant
>(Mask
))
953 if (C
->isAllOnesValue())
956 Mask
= getX86MaskVec(Builder
, Mask
, Op0
->getType()->getVectorNumElements());
957 return Builder
.CreateSelect(Mask
, Op0
, Op1
);
960 static Value
*EmitX86ScalarSelect(IRBuilder
<> &Builder
, Value
*Mask
,
961 Value
*Op0
, Value
*Op1
) {
962 // If the mask is all ones just emit the first operation.
963 if (const auto *C
= dyn_cast
<Constant
>(Mask
))
964 if (C
->isAllOnesValue())
967 llvm::VectorType
*MaskTy
=
968 llvm::VectorType::get(Builder
.getInt1Ty(),
969 Mask
->getType()->getIntegerBitWidth());
970 Mask
= Builder
.CreateBitCast(Mask
, MaskTy
);
971 Mask
= Builder
.CreateExtractElement(Mask
, (uint64_t)0);
972 return Builder
.CreateSelect(Mask
, Op0
, Op1
);
975 // Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics.
976 // PALIGNR handles large immediates by shifting while VALIGN masks the immediate
977 // so we need to handle both cases. VALIGN also doesn't have 128-bit lanes.
978 static Value
*UpgradeX86ALIGNIntrinsics(IRBuilder
<> &Builder
, Value
*Op0
,
979 Value
*Op1
, Value
*Shift
,
980 Value
*Passthru
, Value
*Mask
,
982 unsigned ShiftVal
= cast
<llvm::ConstantInt
>(Shift
)->getZExtValue();
984 unsigned NumElts
= Op0
->getType()->getVectorNumElements();
985 assert((IsVALIGN
|| NumElts
% 16 == 0) && "Illegal NumElts for PALIGNR!");
986 assert((!IsVALIGN
|| NumElts
<= 16) && "NumElts too large for VALIGN!");
987 assert(isPowerOf2_32(NumElts
) && "NumElts not a power of 2!");
989 // Mask the immediate for VALIGN.
991 ShiftVal
&= (NumElts
- 1);
993 // If palignr is shifting the pair of vectors more than the size of two
996 return llvm::Constant::getNullValue(Op0
->getType());
998 // If palignr is shifting the pair of input vectors more than one lane,
999 // but less than two lanes, convert to shifting in zeroes.
1000 if (ShiftVal
> 16) {
1003 Op0
= llvm::Constant::getNullValue(Op0
->getType());
1006 uint32_t Indices
[64];
1007 // 256-bit palignr operates on 128-bit lanes so we need to handle that
1008 for (unsigned l
= 0; l
< NumElts
; l
+= 16) {
1009 for (unsigned i
= 0; i
!= 16; ++i
) {
1010 unsigned Idx
= ShiftVal
+ i
;
1011 if (!IsVALIGN
&& Idx
>= 16) // Disable wrap for VALIGN.
1012 Idx
+= NumElts
- 16; // End of lane, switch operand.
1013 Indices
[l
+ i
] = Idx
+ l
;
1017 Value
*Align
= Builder
.CreateShuffleVector(Op1
, Op0
,
1018 makeArrayRef(Indices
, NumElts
),
1021 return EmitX86Select(Builder
, Mask
, Align
, Passthru
);
1024 static Value
*UpgradeX86VPERMT2Intrinsics(IRBuilder
<> &Builder
, CallInst
&CI
,
1025 bool ZeroMask
, bool IndexForm
) {
1026 Type
*Ty
= CI
.getType();
1027 unsigned VecWidth
= Ty
->getPrimitiveSizeInBits();
1028 unsigned EltWidth
= Ty
->getScalarSizeInBits();
1029 bool IsFloat
= Ty
->isFPOrFPVectorTy();
1031 if (VecWidth
== 128 && EltWidth
== 32 && IsFloat
)
1032 IID
= Intrinsic::x86_avx512_vpermi2var_ps_128
;
1033 else if (VecWidth
== 128 && EltWidth
== 32 && !IsFloat
)
1034 IID
= Intrinsic::x86_avx512_vpermi2var_d_128
;
1035 else if (VecWidth
== 128 && EltWidth
== 64 && IsFloat
)
1036 IID
= Intrinsic::x86_avx512_vpermi2var_pd_128
;
1037 else if (VecWidth
== 128 && EltWidth
== 64 && !IsFloat
)
1038 IID
= Intrinsic::x86_avx512_vpermi2var_q_128
;
1039 else if (VecWidth
== 256 && EltWidth
== 32 && IsFloat
)
1040 IID
= Intrinsic::x86_avx512_vpermi2var_ps_256
;
1041 else if (VecWidth
== 256 && EltWidth
== 32 && !IsFloat
)
1042 IID
= Intrinsic::x86_avx512_vpermi2var_d_256
;
1043 else if (VecWidth
== 256 && EltWidth
== 64 && IsFloat
)
1044 IID
= Intrinsic::x86_avx512_vpermi2var_pd_256
;
1045 else if (VecWidth
== 256 && EltWidth
== 64 && !IsFloat
)
1046 IID
= Intrinsic::x86_avx512_vpermi2var_q_256
;
1047 else if (VecWidth
== 512 && EltWidth
== 32 && IsFloat
)
1048 IID
= Intrinsic::x86_avx512_vpermi2var_ps_512
;
1049 else if (VecWidth
== 512 && EltWidth
== 32 && !IsFloat
)
1050 IID
= Intrinsic::x86_avx512_vpermi2var_d_512
;
1051 else if (VecWidth
== 512 && EltWidth
== 64 && IsFloat
)
1052 IID
= Intrinsic::x86_avx512_vpermi2var_pd_512
;
1053 else if (VecWidth
== 512 && EltWidth
== 64 && !IsFloat
)
1054 IID
= Intrinsic::x86_avx512_vpermi2var_q_512
;
1055 else if (VecWidth
== 128 && EltWidth
== 16)
1056 IID
= Intrinsic::x86_avx512_vpermi2var_hi_128
;
1057 else if (VecWidth
== 256 && EltWidth
== 16)
1058 IID
= Intrinsic::x86_avx512_vpermi2var_hi_256
;
1059 else if (VecWidth
== 512 && EltWidth
== 16)
1060 IID
= Intrinsic::x86_avx512_vpermi2var_hi_512
;
1061 else if (VecWidth
== 128 && EltWidth
== 8)
1062 IID
= Intrinsic::x86_avx512_vpermi2var_qi_128
;
1063 else if (VecWidth
== 256 && EltWidth
== 8)
1064 IID
= Intrinsic::x86_avx512_vpermi2var_qi_256
;
1065 else if (VecWidth
== 512 && EltWidth
== 8)
1066 IID
= Intrinsic::x86_avx512_vpermi2var_qi_512
;
1068 llvm_unreachable("Unexpected intrinsic");
1070 Value
*Args
[] = { CI
.getArgOperand(0) , CI
.getArgOperand(1),
1071 CI
.getArgOperand(2) };
1073 // If this isn't index form we need to swap operand 0 and 1.
1075 std::swap(Args
[0], Args
[1]);
1077 Value
*V
= Builder
.CreateCall(Intrinsic::getDeclaration(CI
.getModule(), IID
),
1079 Value
*PassThru
= ZeroMask
? ConstantAggregateZero::get(Ty
)
1080 : Builder
.CreateBitCast(CI
.getArgOperand(1),
1082 return EmitX86Select(Builder
, CI
.getArgOperand(3), V
, PassThru
);
1085 static Value
*UpgradeX86AddSubSatIntrinsics(IRBuilder
<> &Builder
, CallInst
&CI
,
1086 bool IsSigned
, bool IsAddition
) {
1087 Type
*Ty
= CI
.getType();
1088 Value
*Op0
= CI
.getOperand(0);
1089 Value
*Op1
= CI
.getOperand(1);
1092 IsSigned
? (IsAddition
? Intrinsic::sadd_sat
: Intrinsic::ssub_sat
)
1093 : (IsAddition
? Intrinsic::uadd_sat
: Intrinsic::usub_sat
);
1094 Function
*Intrin
= Intrinsic::getDeclaration(CI
.getModule(), IID
, Ty
);
1095 Value
*Res
= Builder
.CreateCall(Intrin
, {Op0
, Op1
});
1097 if (CI
.getNumArgOperands() == 4) { // For masked intrinsics.
1098 Value
*VecSrc
= CI
.getOperand(2);
1099 Value
*Mask
= CI
.getOperand(3);
1100 Res
= EmitX86Select(Builder
, Mask
, Res
, VecSrc
);
1105 static Value
*upgradeX86Rotate(IRBuilder
<> &Builder
, CallInst
&CI
,
1106 bool IsRotateRight
) {
1107 Type
*Ty
= CI
.getType();
1108 Value
*Src
= CI
.getArgOperand(0);
1109 Value
*Amt
= CI
.getArgOperand(1);
1111 // Amount may be scalar immediate, in which case create a splat vector.
1112 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
1113 // we only care about the lowest log2 bits anyway.
1114 if (Amt
->getType() != Ty
) {
1115 unsigned NumElts
= Ty
->getVectorNumElements();
1116 Amt
= Builder
.CreateIntCast(Amt
, Ty
->getScalarType(), false);
1117 Amt
= Builder
.CreateVectorSplat(NumElts
, Amt
);
1120 Intrinsic::ID IID
= IsRotateRight
? Intrinsic::fshr
: Intrinsic::fshl
;
1121 Function
*Intrin
= Intrinsic::getDeclaration(CI
.getModule(), IID
, Ty
);
1122 Value
*Res
= Builder
.CreateCall(Intrin
, {Src
, Src
, Amt
});
1124 if (CI
.getNumArgOperands() == 4) { // For masked intrinsics.
1125 Value
*VecSrc
= CI
.getOperand(2);
1126 Value
*Mask
= CI
.getOperand(3);
1127 Res
= EmitX86Select(Builder
, Mask
, Res
, VecSrc
);
1132 static Value
*upgradeX86vpcom(IRBuilder
<> &Builder
, CallInst
&CI
, unsigned Imm
,
1134 Type
*Ty
= CI
.getType();
1135 Value
*LHS
= CI
.getArgOperand(0);
1136 Value
*RHS
= CI
.getArgOperand(1);
1138 CmpInst::Predicate Pred
;
1141 Pred
= IsSigned
? ICmpInst::ICMP_SLT
: ICmpInst::ICMP_ULT
;
1144 Pred
= IsSigned
? ICmpInst::ICMP_SLE
: ICmpInst::ICMP_ULE
;
1147 Pred
= IsSigned
? ICmpInst::ICMP_SGT
: ICmpInst::ICMP_UGT
;
1150 Pred
= IsSigned
? ICmpInst::ICMP_SGE
: ICmpInst::ICMP_UGE
;
1153 Pred
= ICmpInst::ICMP_EQ
;
1156 Pred
= ICmpInst::ICMP_NE
;
1159 return Constant::getNullValue(Ty
); // FALSE
1161 return Constant::getAllOnesValue(Ty
); // TRUE
1163 llvm_unreachable("Unknown XOP vpcom/vpcomu predicate");
1166 Value
*Cmp
= Builder
.CreateICmp(Pred
, LHS
, RHS
);
1167 Value
*Ext
= Builder
.CreateSExt(Cmp
, Ty
);
1171 static Value
*upgradeX86ConcatShift(IRBuilder
<> &Builder
, CallInst
&CI
,
1172 bool IsShiftRight
, bool ZeroMask
) {
1173 Type
*Ty
= CI
.getType();
1174 Value
*Op0
= CI
.getArgOperand(0);
1175 Value
*Op1
= CI
.getArgOperand(1);
1176 Value
*Amt
= CI
.getArgOperand(2);
1179 std::swap(Op0
, Op1
);
1181 // Amount may be scalar immediate, in which case create a splat vector.
1182 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
1183 // we only care about the lowest log2 bits anyway.
1184 if (Amt
->getType() != Ty
) {
1185 unsigned NumElts
= Ty
->getVectorNumElements();
1186 Amt
= Builder
.CreateIntCast(Amt
, Ty
->getScalarType(), false);
1187 Amt
= Builder
.CreateVectorSplat(NumElts
, Amt
);
1190 Intrinsic::ID IID
= IsShiftRight
? Intrinsic::fshr
: Intrinsic::fshl
;
1191 Function
*Intrin
= Intrinsic::getDeclaration(CI
.getModule(), IID
, Ty
);
1192 Value
*Res
= Builder
.CreateCall(Intrin
, {Op0
, Op1
, Amt
});
1194 unsigned NumArgs
= CI
.getNumArgOperands();
1195 if (NumArgs
>= 4) { // For masked intrinsics.
1196 Value
*VecSrc
= NumArgs
== 5 ? CI
.getArgOperand(3) :
1197 ZeroMask
? ConstantAggregateZero::get(CI
.getType()) :
1198 CI
.getArgOperand(0);
1199 Value
*Mask
= CI
.getOperand(NumArgs
- 1);
1200 Res
= EmitX86Select(Builder
, Mask
, Res
, VecSrc
);
1205 static Value
*UpgradeMaskedStore(IRBuilder
<> &Builder
,
1206 Value
*Ptr
, Value
*Data
, Value
*Mask
,
1208 // Cast the pointer to the right type.
1209 Ptr
= Builder
.CreateBitCast(Ptr
,
1210 llvm::PointerType::getUnqual(Data
->getType()));
1212 Aligned
? cast
<VectorType
>(Data
->getType())->getBitWidth() / 8 : 1;
1214 // If the mask is all ones just emit a regular store.
1215 if (const auto *C
= dyn_cast
<Constant
>(Mask
))
1216 if (C
->isAllOnesValue())
1217 return Builder
.CreateAlignedStore(Data
, Ptr
, Align
);
1219 // Convert the mask from an integer type to a vector of i1.
1220 unsigned NumElts
= Data
->getType()->getVectorNumElements();
1221 Mask
= getX86MaskVec(Builder
, Mask
, NumElts
);
1222 return Builder
.CreateMaskedStore(Data
, Ptr
, Align
, Mask
);
1225 static Value
*UpgradeMaskedLoad(IRBuilder
<> &Builder
,
1226 Value
*Ptr
, Value
*Passthru
, Value
*Mask
,
1228 Type
*ValTy
= Passthru
->getType();
1229 // Cast the pointer to the right type.
1230 Ptr
= Builder
.CreateBitCast(Ptr
, llvm::PointerType::getUnqual(ValTy
));
1232 Aligned
? cast
<VectorType
>(Passthru
->getType())->getBitWidth() / 8 : 1;
1234 // If the mask is all ones just emit a regular store.
1235 if (const auto *C
= dyn_cast
<Constant
>(Mask
))
1236 if (C
->isAllOnesValue())
1237 return Builder
.CreateAlignedLoad(ValTy
, Ptr
, Align
);
1239 // Convert the mask from an integer type to a vector of i1.
1240 unsigned NumElts
= Passthru
->getType()->getVectorNumElements();
1241 Mask
= getX86MaskVec(Builder
, Mask
, NumElts
);
1242 return Builder
.CreateMaskedLoad(Ptr
, Align
, Mask
, Passthru
);
1245 static Value
*upgradeAbs(IRBuilder
<> &Builder
, CallInst
&CI
) {
1246 Value
*Op0
= CI
.getArgOperand(0);
1247 llvm::Type
*Ty
= Op0
->getType();
1248 Value
*Zero
= llvm::Constant::getNullValue(Ty
);
1249 Value
*Cmp
= Builder
.CreateICmp(ICmpInst::ICMP_SGT
, Op0
, Zero
);
1250 Value
*Neg
= Builder
.CreateNeg(Op0
);
1251 Value
*Res
= Builder
.CreateSelect(Cmp
, Op0
, Neg
);
1253 if (CI
.getNumArgOperands() == 3)
1254 Res
= EmitX86Select(Builder
,CI
.getArgOperand(2), Res
, CI
.getArgOperand(1));
1259 static Value
*upgradeIntMinMax(IRBuilder
<> &Builder
, CallInst
&CI
,
1260 ICmpInst::Predicate Pred
) {
1261 Value
*Op0
= CI
.getArgOperand(0);
1262 Value
*Op1
= CI
.getArgOperand(1);
1263 Value
*Cmp
= Builder
.CreateICmp(Pred
, Op0
, Op1
);
1264 Value
*Res
= Builder
.CreateSelect(Cmp
, Op0
, Op1
);
1266 if (CI
.getNumArgOperands() == 4)
1267 Res
= EmitX86Select(Builder
, CI
.getArgOperand(3), Res
, CI
.getArgOperand(2));
1272 static Value
*upgradePMULDQ(IRBuilder
<> &Builder
, CallInst
&CI
, bool IsSigned
) {
1273 Type
*Ty
= CI
.getType();
1275 // Arguments have a vXi32 type so cast to vXi64.
1276 Value
*LHS
= Builder
.CreateBitCast(CI
.getArgOperand(0), Ty
);
1277 Value
*RHS
= Builder
.CreateBitCast(CI
.getArgOperand(1), Ty
);
1280 // Shift left then arithmetic shift right.
1281 Constant
*ShiftAmt
= ConstantInt::get(Ty
, 32);
1282 LHS
= Builder
.CreateShl(LHS
, ShiftAmt
);
1283 LHS
= Builder
.CreateAShr(LHS
, ShiftAmt
);
1284 RHS
= Builder
.CreateShl(RHS
, ShiftAmt
);
1285 RHS
= Builder
.CreateAShr(RHS
, ShiftAmt
);
1287 // Clear the upper bits.
1288 Constant
*Mask
= ConstantInt::get(Ty
, 0xffffffff);
1289 LHS
= Builder
.CreateAnd(LHS
, Mask
);
1290 RHS
= Builder
.CreateAnd(RHS
, Mask
);
1293 Value
*Res
= Builder
.CreateMul(LHS
, RHS
);
1295 if (CI
.getNumArgOperands() == 4)
1296 Res
= EmitX86Select(Builder
, CI
.getArgOperand(3), Res
, CI
.getArgOperand(2));
1301 // Applying mask on vector of i1's and make sure result is at least 8 bits wide.
1302 static Value
*ApplyX86MaskOn1BitsVec(IRBuilder
<> &Builder
, Value
*Vec
,
1304 unsigned NumElts
= Vec
->getType()->getVectorNumElements();
1306 const auto *C
= dyn_cast
<Constant
>(Mask
);
1307 if (!C
|| !C
->isAllOnesValue())
1308 Vec
= Builder
.CreateAnd(Vec
, getX86MaskVec(Builder
, Mask
, NumElts
));
1312 uint32_t Indices
[8];
1313 for (unsigned i
= 0; i
!= NumElts
; ++i
)
1315 for (unsigned i
= NumElts
; i
!= 8; ++i
)
1316 Indices
[i
] = NumElts
+ i
% NumElts
;
1317 Vec
= Builder
.CreateShuffleVector(Vec
,
1318 Constant::getNullValue(Vec
->getType()),
1321 return Builder
.CreateBitCast(Vec
, Builder
.getIntNTy(std::max(NumElts
, 8U)));
1324 static Value
*upgradeMaskedCompare(IRBuilder
<> &Builder
, CallInst
&CI
,
1325 unsigned CC
, bool Signed
) {
1326 Value
*Op0
= CI
.getArgOperand(0);
1327 unsigned NumElts
= Op0
->getType()->getVectorNumElements();
1331 Cmp
= Constant::getNullValue(llvm::VectorType::get(Builder
.getInt1Ty(), NumElts
));
1332 } else if (CC
== 7) {
1333 Cmp
= Constant::getAllOnesValue(llvm::VectorType::get(Builder
.getInt1Ty(), NumElts
));
1335 ICmpInst::Predicate Pred
;
1337 default: llvm_unreachable("Unknown condition code");
1338 case 0: Pred
= ICmpInst::ICMP_EQ
; break;
1339 case 1: Pred
= Signed
? ICmpInst::ICMP_SLT
: ICmpInst::ICMP_ULT
; break;
1340 case 2: Pred
= Signed
? ICmpInst::ICMP_SLE
: ICmpInst::ICMP_ULE
; break;
1341 case 4: Pred
= ICmpInst::ICMP_NE
; break;
1342 case 5: Pred
= Signed
? ICmpInst::ICMP_SGE
: ICmpInst::ICMP_UGE
; break;
1343 case 6: Pred
= Signed
? ICmpInst::ICMP_SGT
: ICmpInst::ICMP_UGT
; break;
1345 Cmp
= Builder
.CreateICmp(Pred
, Op0
, CI
.getArgOperand(1));
1348 Value
*Mask
= CI
.getArgOperand(CI
.getNumArgOperands() - 1);
1350 return ApplyX86MaskOn1BitsVec(Builder
, Cmp
, Mask
);
1353 // Replace a masked intrinsic with an older unmasked intrinsic.
1354 static Value
*UpgradeX86MaskedShift(IRBuilder
<> &Builder
, CallInst
&CI
,
1355 Intrinsic::ID IID
) {
1356 Function
*Intrin
= Intrinsic::getDeclaration(CI
.getModule(), IID
);
1357 Value
*Rep
= Builder
.CreateCall(Intrin
,
1358 { CI
.getArgOperand(0), CI
.getArgOperand(1) });
1359 return EmitX86Select(Builder
, CI
.getArgOperand(3), Rep
, CI
.getArgOperand(2));
1362 static Value
* upgradeMaskedMove(IRBuilder
<> &Builder
, CallInst
&CI
) {
1363 Value
* A
= CI
.getArgOperand(0);
1364 Value
* B
= CI
.getArgOperand(1);
1365 Value
* Src
= CI
.getArgOperand(2);
1366 Value
* Mask
= CI
.getArgOperand(3);
1368 Value
* AndNode
= Builder
.CreateAnd(Mask
, APInt(8, 1));
1369 Value
* Cmp
= Builder
.CreateIsNotNull(AndNode
);
1370 Value
* Extract1
= Builder
.CreateExtractElement(B
, (uint64_t)0);
1371 Value
* Extract2
= Builder
.CreateExtractElement(Src
, (uint64_t)0);
1372 Value
* Select
= Builder
.CreateSelect(Cmp
, Extract1
, Extract2
);
1373 return Builder
.CreateInsertElement(A
, Select
, (uint64_t)0);
1377 static Value
* UpgradeMaskToInt(IRBuilder
<> &Builder
, CallInst
&CI
) {
1378 Value
* Op
= CI
.getArgOperand(0);
1379 Type
* ReturnOp
= CI
.getType();
1380 unsigned NumElts
= CI
.getType()->getVectorNumElements();
1381 Value
*Mask
= getX86MaskVec(Builder
, Op
, NumElts
);
1382 return Builder
.CreateSExt(Mask
, ReturnOp
, "vpmovm2");
1385 // Replace intrinsic with unmasked version and a select.
1386 static bool upgradeAVX512MaskToSelect(StringRef Name
, IRBuilder
<> &Builder
,
1387 CallInst
&CI
, Value
*&Rep
) {
1388 Name
= Name
.substr(12); // Remove avx512.mask.
1390 unsigned VecWidth
= CI
.getType()->getPrimitiveSizeInBits();
1391 unsigned EltWidth
= CI
.getType()->getScalarSizeInBits();
1393 if (Name
.startswith("max.p")) {
1394 if (VecWidth
== 128 && EltWidth
== 32)
1395 IID
= Intrinsic::x86_sse_max_ps
;
1396 else if (VecWidth
== 128 && EltWidth
== 64)
1397 IID
= Intrinsic::x86_sse2_max_pd
;
1398 else if (VecWidth
== 256 && EltWidth
== 32)
1399 IID
= Intrinsic::x86_avx_max_ps_256
;
1400 else if (VecWidth
== 256 && EltWidth
== 64)
1401 IID
= Intrinsic::x86_avx_max_pd_256
;
1403 llvm_unreachable("Unexpected intrinsic");
1404 } else if (Name
.startswith("min.p")) {
1405 if (VecWidth
== 128 && EltWidth
== 32)
1406 IID
= Intrinsic::x86_sse_min_ps
;
1407 else if (VecWidth
== 128 && EltWidth
== 64)
1408 IID
= Intrinsic::x86_sse2_min_pd
;
1409 else if (VecWidth
== 256 && EltWidth
== 32)
1410 IID
= Intrinsic::x86_avx_min_ps_256
;
1411 else if (VecWidth
== 256 && EltWidth
== 64)
1412 IID
= Intrinsic::x86_avx_min_pd_256
;
1414 llvm_unreachable("Unexpected intrinsic");
1415 } else if (Name
.startswith("pshuf.b.")) {
1416 if (VecWidth
== 128)
1417 IID
= Intrinsic::x86_ssse3_pshuf_b_128
;
1418 else if (VecWidth
== 256)
1419 IID
= Intrinsic::x86_avx2_pshuf_b
;
1420 else if (VecWidth
== 512)
1421 IID
= Intrinsic::x86_avx512_pshuf_b_512
;
1423 llvm_unreachable("Unexpected intrinsic");
1424 } else if (Name
.startswith("pmul.hr.sw.")) {
1425 if (VecWidth
== 128)
1426 IID
= Intrinsic::x86_ssse3_pmul_hr_sw_128
;
1427 else if (VecWidth
== 256)
1428 IID
= Intrinsic::x86_avx2_pmul_hr_sw
;
1429 else if (VecWidth
== 512)
1430 IID
= Intrinsic::x86_avx512_pmul_hr_sw_512
;
1432 llvm_unreachable("Unexpected intrinsic");
1433 } else if (Name
.startswith("pmulh.w.")) {
1434 if (VecWidth
== 128)
1435 IID
= Intrinsic::x86_sse2_pmulh_w
;
1436 else if (VecWidth
== 256)
1437 IID
= Intrinsic::x86_avx2_pmulh_w
;
1438 else if (VecWidth
== 512)
1439 IID
= Intrinsic::x86_avx512_pmulh_w_512
;
1441 llvm_unreachable("Unexpected intrinsic");
1442 } else if (Name
.startswith("pmulhu.w.")) {
1443 if (VecWidth
== 128)
1444 IID
= Intrinsic::x86_sse2_pmulhu_w
;
1445 else if (VecWidth
== 256)
1446 IID
= Intrinsic::x86_avx2_pmulhu_w
;
1447 else if (VecWidth
== 512)
1448 IID
= Intrinsic::x86_avx512_pmulhu_w_512
;
1450 llvm_unreachable("Unexpected intrinsic");
1451 } else if (Name
.startswith("pmaddw.d.")) {
1452 if (VecWidth
== 128)
1453 IID
= Intrinsic::x86_sse2_pmadd_wd
;
1454 else if (VecWidth
== 256)
1455 IID
= Intrinsic::x86_avx2_pmadd_wd
;
1456 else if (VecWidth
== 512)
1457 IID
= Intrinsic::x86_avx512_pmaddw_d_512
;
1459 llvm_unreachable("Unexpected intrinsic");
1460 } else if (Name
.startswith("pmaddubs.w.")) {
1461 if (VecWidth
== 128)
1462 IID
= Intrinsic::x86_ssse3_pmadd_ub_sw_128
;
1463 else if (VecWidth
== 256)
1464 IID
= Intrinsic::x86_avx2_pmadd_ub_sw
;
1465 else if (VecWidth
== 512)
1466 IID
= Intrinsic::x86_avx512_pmaddubs_w_512
;
1468 llvm_unreachable("Unexpected intrinsic");
1469 } else if (Name
.startswith("packsswb.")) {
1470 if (VecWidth
== 128)
1471 IID
= Intrinsic::x86_sse2_packsswb_128
;
1472 else if (VecWidth
== 256)
1473 IID
= Intrinsic::x86_avx2_packsswb
;
1474 else if (VecWidth
== 512)
1475 IID
= Intrinsic::x86_avx512_packsswb_512
;
1477 llvm_unreachable("Unexpected intrinsic");
1478 } else if (Name
.startswith("packssdw.")) {
1479 if (VecWidth
== 128)
1480 IID
= Intrinsic::x86_sse2_packssdw_128
;
1481 else if (VecWidth
== 256)
1482 IID
= Intrinsic::x86_avx2_packssdw
;
1483 else if (VecWidth
== 512)
1484 IID
= Intrinsic::x86_avx512_packssdw_512
;
1486 llvm_unreachable("Unexpected intrinsic");
1487 } else if (Name
.startswith("packuswb.")) {
1488 if (VecWidth
== 128)
1489 IID
= Intrinsic::x86_sse2_packuswb_128
;
1490 else if (VecWidth
== 256)
1491 IID
= Intrinsic::x86_avx2_packuswb
;
1492 else if (VecWidth
== 512)
1493 IID
= Intrinsic::x86_avx512_packuswb_512
;
1495 llvm_unreachable("Unexpected intrinsic");
1496 } else if (Name
.startswith("packusdw.")) {
1497 if (VecWidth
== 128)
1498 IID
= Intrinsic::x86_sse41_packusdw
;
1499 else if (VecWidth
== 256)
1500 IID
= Intrinsic::x86_avx2_packusdw
;
1501 else if (VecWidth
== 512)
1502 IID
= Intrinsic::x86_avx512_packusdw_512
;
1504 llvm_unreachable("Unexpected intrinsic");
1505 } else if (Name
.startswith("vpermilvar.")) {
1506 if (VecWidth
== 128 && EltWidth
== 32)
1507 IID
= Intrinsic::x86_avx_vpermilvar_ps
;
1508 else if (VecWidth
== 128 && EltWidth
== 64)
1509 IID
= Intrinsic::x86_avx_vpermilvar_pd
;
1510 else if (VecWidth
== 256 && EltWidth
== 32)
1511 IID
= Intrinsic::x86_avx_vpermilvar_ps_256
;
1512 else if (VecWidth
== 256 && EltWidth
== 64)
1513 IID
= Intrinsic::x86_avx_vpermilvar_pd_256
;
1514 else if (VecWidth
== 512 && EltWidth
== 32)
1515 IID
= Intrinsic::x86_avx512_vpermilvar_ps_512
;
1516 else if (VecWidth
== 512 && EltWidth
== 64)
1517 IID
= Intrinsic::x86_avx512_vpermilvar_pd_512
;
1519 llvm_unreachable("Unexpected intrinsic");
1520 } else if (Name
== "cvtpd2dq.256") {
1521 IID
= Intrinsic::x86_avx_cvt_pd2dq_256
;
1522 } else if (Name
== "cvtpd2ps.256") {
1523 IID
= Intrinsic::x86_avx_cvt_pd2_ps_256
;
1524 } else if (Name
== "cvttpd2dq.256") {
1525 IID
= Intrinsic::x86_avx_cvtt_pd2dq_256
;
1526 } else if (Name
== "cvttps2dq.128") {
1527 IID
= Intrinsic::x86_sse2_cvttps2dq
;
1528 } else if (Name
== "cvttps2dq.256") {
1529 IID
= Intrinsic::x86_avx_cvtt_ps2dq_256
;
1530 } else if (Name
.startswith("permvar.")) {
1531 bool IsFloat
= CI
.getType()->isFPOrFPVectorTy();
1532 if (VecWidth
== 256 && EltWidth
== 32 && IsFloat
)
1533 IID
= Intrinsic::x86_avx2_permps
;
1534 else if (VecWidth
== 256 && EltWidth
== 32 && !IsFloat
)
1535 IID
= Intrinsic::x86_avx2_permd
;
1536 else if (VecWidth
== 256 && EltWidth
== 64 && IsFloat
)
1537 IID
= Intrinsic::x86_avx512_permvar_df_256
;
1538 else if (VecWidth
== 256 && EltWidth
== 64 && !IsFloat
)
1539 IID
= Intrinsic::x86_avx512_permvar_di_256
;
1540 else if (VecWidth
== 512 && EltWidth
== 32 && IsFloat
)
1541 IID
= Intrinsic::x86_avx512_permvar_sf_512
;
1542 else if (VecWidth
== 512 && EltWidth
== 32 && !IsFloat
)
1543 IID
= Intrinsic::x86_avx512_permvar_si_512
;
1544 else if (VecWidth
== 512 && EltWidth
== 64 && IsFloat
)
1545 IID
= Intrinsic::x86_avx512_permvar_df_512
;
1546 else if (VecWidth
== 512 && EltWidth
== 64 && !IsFloat
)
1547 IID
= Intrinsic::x86_avx512_permvar_di_512
;
1548 else if (VecWidth
== 128 && EltWidth
== 16)
1549 IID
= Intrinsic::x86_avx512_permvar_hi_128
;
1550 else if (VecWidth
== 256 && EltWidth
== 16)
1551 IID
= Intrinsic::x86_avx512_permvar_hi_256
;
1552 else if (VecWidth
== 512 && EltWidth
== 16)
1553 IID
= Intrinsic::x86_avx512_permvar_hi_512
;
1554 else if (VecWidth
== 128 && EltWidth
== 8)
1555 IID
= Intrinsic::x86_avx512_permvar_qi_128
;
1556 else if (VecWidth
== 256 && EltWidth
== 8)
1557 IID
= Intrinsic::x86_avx512_permvar_qi_256
;
1558 else if (VecWidth
== 512 && EltWidth
== 8)
1559 IID
= Intrinsic::x86_avx512_permvar_qi_512
;
1561 llvm_unreachable("Unexpected intrinsic");
1562 } else if (Name
.startswith("dbpsadbw.")) {
1563 if (VecWidth
== 128)
1564 IID
= Intrinsic::x86_avx512_dbpsadbw_128
;
1565 else if (VecWidth
== 256)
1566 IID
= Intrinsic::x86_avx512_dbpsadbw_256
;
1567 else if (VecWidth
== 512)
1568 IID
= Intrinsic::x86_avx512_dbpsadbw_512
;
1570 llvm_unreachable("Unexpected intrinsic");
1571 } else if (Name
.startswith("pmultishift.qb.")) {
1572 if (VecWidth
== 128)
1573 IID
= Intrinsic::x86_avx512_pmultishift_qb_128
;
1574 else if (VecWidth
== 256)
1575 IID
= Intrinsic::x86_avx512_pmultishift_qb_256
;
1576 else if (VecWidth
== 512)
1577 IID
= Intrinsic::x86_avx512_pmultishift_qb_512
;
1579 llvm_unreachable("Unexpected intrinsic");
1580 } else if (Name
.startswith("conflict.")) {
1581 if (Name
[9] == 'd' && VecWidth
== 128)
1582 IID
= Intrinsic::x86_avx512_conflict_d_128
;
1583 else if (Name
[9] == 'd' && VecWidth
== 256)
1584 IID
= Intrinsic::x86_avx512_conflict_d_256
;
1585 else if (Name
[9] == 'd' && VecWidth
== 512)
1586 IID
= Intrinsic::x86_avx512_conflict_d_512
;
1587 else if (Name
[9] == 'q' && VecWidth
== 128)
1588 IID
= Intrinsic::x86_avx512_conflict_q_128
;
1589 else if (Name
[9] == 'q' && VecWidth
== 256)
1590 IID
= Intrinsic::x86_avx512_conflict_q_256
;
1591 else if (Name
[9] == 'q' && VecWidth
== 512)
1592 IID
= Intrinsic::x86_avx512_conflict_q_512
;
1594 llvm_unreachable("Unexpected intrinsic");
1595 } else if (Name
.startswith("pavg.")) {
1596 if (Name
[5] == 'b' && VecWidth
== 128)
1597 IID
= Intrinsic::x86_sse2_pavg_b
;
1598 else if (Name
[5] == 'b' && VecWidth
== 256)
1599 IID
= Intrinsic::x86_avx2_pavg_b
;
1600 else if (Name
[5] == 'b' && VecWidth
== 512)
1601 IID
= Intrinsic::x86_avx512_pavg_b_512
;
1602 else if (Name
[5] == 'w' && VecWidth
== 128)
1603 IID
= Intrinsic::x86_sse2_pavg_w
;
1604 else if (Name
[5] == 'w' && VecWidth
== 256)
1605 IID
= Intrinsic::x86_avx2_pavg_w
;
1606 else if (Name
[5] == 'w' && VecWidth
== 512)
1607 IID
= Intrinsic::x86_avx512_pavg_w_512
;
1609 llvm_unreachable("Unexpected intrinsic");
1613 SmallVector
<Value
*, 4> Args(CI
.arg_operands().begin(),
1614 CI
.arg_operands().end());
1617 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(CI
.getModule(), IID
),
1619 unsigned NumArgs
= CI
.getNumArgOperands();
1620 Rep
= EmitX86Select(Builder
, CI
.getArgOperand(NumArgs
- 1), Rep
,
1621 CI
.getArgOperand(NumArgs
- 2));
1625 /// Upgrade comment in call to inline asm that represents an objc retain release
1627 void llvm::UpgradeInlineAsmString(std::string
*AsmStr
) {
1629 if (AsmStr
->find("mov\tfp") == 0 &&
1630 AsmStr
->find("objc_retainAutoreleaseReturnValue") != std::string::npos
&&
1631 (Pos
= AsmStr
->find("# marker")) != std::string::npos
) {
1632 AsmStr
->replace(Pos
, 1, ";");
1637 /// Upgrade a call to an old intrinsic. All argument and return casting must be
1638 /// provided to seamlessly integrate with existing context.
1639 void llvm::UpgradeIntrinsicCall(CallInst
*CI
, Function
*NewFn
) {
1640 Function
*F
= CI
->getCalledFunction();
1641 LLVMContext
&C
= CI
->getContext();
1642 IRBuilder
<> Builder(C
);
1643 Builder
.SetInsertPoint(CI
->getParent(), CI
->getIterator());
1645 assert(F
&& "Intrinsic call is not direct?");
1648 // Get the Function's name.
1649 StringRef Name
= F
->getName();
1651 // clang.arc.use is an old name for llvm.arc.clang.arc.use. It is dropped
1652 // from upgrader because the optimizer now only recognizes intrinsics for
1653 // ARC runtime calls.
1654 if (Name
== "clang.arc.use") {
1655 CI
->eraseFromParent();
1659 assert(Name
.startswith("llvm.") && "Intrinsic doesn't start with 'llvm.'");
1660 Name
= Name
.substr(5);
1662 bool IsX86
= Name
.startswith("x86.");
1664 Name
= Name
.substr(4);
1665 bool IsNVVM
= Name
.startswith("nvvm.");
1667 Name
= Name
.substr(5);
1669 if (IsX86
&& Name
.startswith("sse4a.movnt.")) {
1670 Module
*M
= F
->getParent();
1671 SmallVector
<Metadata
*, 1> Elts
;
1673 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C
), 1)));
1674 MDNode
*Node
= MDNode::get(C
, Elts
);
1676 Value
*Arg0
= CI
->getArgOperand(0);
1677 Value
*Arg1
= CI
->getArgOperand(1);
1679 // Nontemporal (unaligned) store of the 0'th element of the float/double
1681 Type
*SrcEltTy
= cast
<VectorType
>(Arg1
->getType())->getElementType();
1682 PointerType
*EltPtrTy
= PointerType::getUnqual(SrcEltTy
);
1683 Value
*Addr
= Builder
.CreateBitCast(Arg0
, EltPtrTy
, "cast");
1685 Builder
.CreateExtractElement(Arg1
, (uint64_t)0, "extractelement");
1687 StoreInst
*SI
= Builder
.CreateAlignedStore(Extract
, Addr
, 1);
1688 SI
->setMetadata(M
->getMDKindID("nontemporal"), Node
);
1690 // Remove intrinsic.
1691 CI
->eraseFromParent();
1695 if (IsX86
&& (Name
.startswith("avx.movnt.") ||
1696 Name
.startswith("avx512.storent."))) {
1697 Module
*M
= F
->getParent();
1698 SmallVector
<Metadata
*, 1> Elts
;
1700 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C
), 1)));
1701 MDNode
*Node
= MDNode::get(C
, Elts
);
1703 Value
*Arg0
= CI
->getArgOperand(0);
1704 Value
*Arg1
= CI
->getArgOperand(1);
1706 // Convert the type of the pointer to a pointer to the stored type.
1707 Value
*BC
= Builder
.CreateBitCast(Arg0
,
1708 PointerType::getUnqual(Arg1
->getType()),
1710 VectorType
*VTy
= cast
<VectorType
>(Arg1
->getType());
1711 StoreInst
*SI
= Builder
.CreateAlignedStore(Arg1
, BC
,
1712 VTy
->getBitWidth() / 8);
1713 SI
->setMetadata(M
->getMDKindID("nontemporal"), Node
);
1715 // Remove intrinsic.
1716 CI
->eraseFromParent();
1720 if (IsX86
&& Name
== "sse2.storel.dq") {
1721 Value
*Arg0
= CI
->getArgOperand(0);
1722 Value
*Arg1
= CI
->getArgOperand(1);
1724 Type
*NewVecTy
= VectorType::get(Type::getInt64Ty(C
), 2);
1725 Value
*BC0
= Builder
.CreateBitCast(Arg1
, NewVecTy
, "cast");
1726 Value
*Elt
= Builder
.CreateExtractElement(BC0
, (uint64_t)0);
1727 Value
*BC
= Builder
.CreateBitCast(Arg0
,
1728 PointerType::getUnqual(Elt
->getType()),
1730 Builder
.CreateAlignedStore(Elt
, BC
, 1);
1732 // Remove intrinsic.
1733 CI
->eraseFromParent();
1737 if (IsX86
&& (Name
.startswith("sse.storeu.") ||
1738 Name
.startswith("sse2.storeu.") ||
1739 Name
.startswith("avx.storeu."))) {
1740 Value
*Arg0
= CI
->getArgOperand(0);
1741 Value
*Arg1
= CI
->getArgOperand(1);
1743 Arg0
= Builder
.CreateBitCast(Arg0
,
1744 PointerType::getUnqual(Arg1
->getType()),
1746 Builder
.CreateAlignedStore(Arg1
, Arg0
, 1);
1748 // Remove intrinsic.
1749 CI
->eraseFromParent();
1753 if (IsX86
&& Name
== "avx512.mask.store.ss") {
1754 Value
*Mask
= Builder
.CreateAnd(CI
->getArgOperand(2), Builder
.getInt8(1));
1755 UpgradeMaskedStore(Builder
, CI
->getArgOperand(0), CI
->getArgOperand(1),
1758 // Remove intrinsic.
1759 CI
->eraseFromParent();
1763 if (IsX86
&& (Name
.startswith("avx512.mask.store"))) {
1764 // "avx512.mask.storeu." or "avx512.mask.store."
1765 bool Aligned
= Name
[17] != 'u'; // "avx512.mask.storeu".
1766 UpgradeMaskedStore(Builder
, CI
->getArgOperand(0), CI
->getArgOperand(1),
1767 CI
->getArgOperand(2), Aligned
);
1769 // Remove intrinsic.
1770 CI
->eraseFromParent();
1775 // Upgrade packed integer vector compare intrinsics to compare instructions.
1776 if (IsX86
&& (Name
.startswith("sse2.pcmp") ||
1777 Name
.startswith("avx2.pcmp"))) {
1778 // "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt."
1779 bool CmpEq
= Name
[9] == 'e';
1780 Rep
= Builder
.CreateICmp(CmpEq
? ICmpInst::ICMP_EQ
: ICmpInst::ICMP_SGT
,
1781 CI
->getArgOperand(0), CI
->getArgOperand(1));
1782 Rep
= Builder
.CreateSExt(Rep
, CI
->getType(), "");
1783 } else if (IsX86
&& (Name
.startswith("avx512.broadcastm"))) {
1784 Type
*ExtTy
= Type::getInt32Ty(C
);
1785 if (CI
->getOperand(0)->getType()->isIntegerTy(8))
1786 ExtTy
= Type::getInt64Ty(C
);
1787 unsigned NumElts
= CI
->getType()->getPrimitiveSizeInBits() /
1788 ExtTy
->getPrimitiveSizeInBits();
1789 Rep
= Builder
.CreateZExt(CI
->getArgOperand(0), ExtTy
);
1790 Rep
= Builder
.CreateVectorSplat(NumElts
, Rep
);
1791 } else if (IsX86
&& (Name
== "sse.sqrt.ss" ||
1792 Name
== "sse2.sqrt.sd")) {
1793 Value
*Vec
= CI
->getArgOperand(0);
1794 Value
*Elt0
= Builder
.CreateExtractElement(Vec
, (uint64_t)0);
1795 Function
*Intr
= Intrinsic::getDeclaration(F
->getParent(),
1796 Intrinsic::sqrt
, Elt0
->getType());
1797 Elt0
= Builder
.CreateCall(Intr
, Elt0
);
1798 Rep
= Builder
.CreateInsertElement(Vec
, Elt0
, (uint64_t)0);
1799 } else if (IsX86
&& (Name
.startswith("avx.sqrt.p") ||
1800 Name
.startswith("sse2.sqrt.p") ||
1801 Name
.startswith("sse.sqrt.p"))) {
1802 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(F
->getParent(),
1805 {CI
->getArgOperand(0)});
1806 } else if (IsX86
&& (Name
.startswith("avx512.mask.sqrt.p"))) {
1807 if (CI
->getNumArgOperands() == 4 &&
1808 (!isa
<ConstantInt
>(CI
->getArgOperand(3)) ||
1809 cast
<ConstantInt
>(CI
->getArgOperand(3))->getZExtValue() != 4)) {
1810 Intrinsic::ID IID
= Name
[18] == 's' ? Intrinsic::x86_avx512_sqrt_ps_512
1811 : Intrinsic::x86_avx512_sqrt_pd_512
;
1813 Value
*Args
[] = { CI
->getArgOperand(0), CI
->getArgOperand(3) };
1814 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(CI
->getModule(),
1817 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(F
->getParent(),
1820 {CI
->getArgOperand(0)});
1822 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(2), Rep
,
1823 CI
->getArgOperand(1));
1824 } else if (IsX86
&& (Name
.startswith("avx512.ptestm") ||
1825 Name
.startswith("avx512.ptestnm"))) {
1826 Value
*Op0
= CI
->getArgOperand(0);
1827 Value
*Op1
= CI
->getArgOperand(1);
1828 Value
*Mask
= CI
->getArgOperand(2);
1829 Rep
= Builder
.CreateAnd(Op0
, Op1
);
1830 llvm::Type
*Ty
= Op0
->getType();
1831 Value
*Zero
= llvm::Constant::getNullValue(Ty
);
1832 ICmpInst::Predicate Pred
=
1833 Name
.startswith("avx512.ptestm") ? ICmpInst::ICMP_NE
: ICmpInst::ICMP_EQ
;
1834 Rep
= Builder
.CreateICmp(Pred
, Rep
, Zero
);
1835 Rep
= ApplyX86MaskOn1BitsVec(Builder
, Rep
, Mask
);
1836 } else if (IsX86
&& (Name
.startswith("avx512.mask.pbroadcast"))){
1838 CI
->getArgOperand(1)->getType()->getVectorNumElements();
1839 Rep
= Builder
.CreateVectorSplat(NumElts
, CI
->getArgOperand(0));
1840 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(2), Rep
,
1841 CI
->getArgOperand(1));
1842 } else if (IsX86
&& (Name
.startswith("avx512.kunpck"))) {
1843 unsigned NumElts
= CI
->getType()->getScalarSizeInBits();
1844 Value
*LHS
= getX86MaskVec(Builder
, CI
->getArgOperand(0), NumElts
);
1845 Value
*RHS
= getX86MaskVec(Builder
, CI
->getArgOperand(1), NumElts
);
1846 uint32_t Indices
[64];
1847 for (unsigned i
= 0; i
!= NumElts
; ++i
)
1850 // First extract half of each vector. This gives better codegen than
1851 // doing it in a single shuffle.
1852 LHS
= Builder
.CreateShuffleVector(LHS
, LHS
,
1853 makeArrayRef(Indices
, NumElts
/ 2));
1854 RHS
= Builder
.CreateShuffleVector(RHS
, RHS
,
1855 makeArrayRef(Indices
, NumElts
/ 2));
1856 // Concat the vectors.
1857 // NOTE: Operands have to be swapped to match intrinsic definition.
1858 Rep
= Builder
.CreateShuffleVector(RHS
, LHS
,
1859 makeArrayRef(Indices
, NumElts
));
1860 Rep
= Builder
.CreateBitCast(Rep
, CI
->getType());
1861 } else if (IsX86
&& Name
== "avx512.kand.w") {
1862 Value
*LHS
= getX86MaskVec(Builder
, CI
->getArgOperand(0), 16);
1863 Value
*RHS
= getX86MaskVec(Builder
, CI
->getArgOperand(1), 16);
1864 Rep
= Builder
.CreateAnd(LHS
, RHS
);
1865 Rep
= Builder
.CreateBitCast(Rep
, CI
->getType());
1866 } else if (IsX86
&& Name
== "avx512.kandn.w") {
1867 Value
*LHS
= getX86MaskVec(Builder
, CI
->getArgOperand(0), 16);
1868 Value
*RHS
= getX86MaskVec(Builder
, CI
->getArgOperand(1), 16);
1869 LHS
= Builder
.CreateNot(LHS
);
1870 Rep
= Builder
.CreateAnd(LHS
, RHS
);
1871 Rep
= Builder
.CreateBitCast(Rep
, CI
->getType());
1872 } else if (IsX86
&& Name
== "avx512.kor.w") {
1873 Value
*LHS
= getX86MaskVec(Builder
, CI
->getArgOperand(0), 16);
1874 Value
*RHS
= getX86MaskVec(Builder
, CI
->getArgOperand(1), 16);
1875 Rep
= Builder
.CreateOr(LHS
, RHS
);
1876 Rep
= Builder
.CreateBitCast(Rep
, CI
->getType());
1877 } else if (IsX86
&& Name
== "avx512.kxor.w") {
1878 Value
*LHS
= getX86MaskVec(Builder
, CI
->getArgOperand(0), 16);
1879 Value
*RHS
= getX86MaskVec(Builder
, CI
->getArgOperand(1), 16);
1880 Rep
= Builder
.CreateXor(LHS
, RHS
);
1881 Rep
= Builder
.CreateBitCast(Rep
, CI
->getType());
1882 } else if (IsX86
&& Name
== "avx512.kxnor.w") {
1883 Value
*LHS
= getX86MaskVec(Builder
, CI
->getArgOperand(0), 16);
1884 Value
*RHS
= getX86MaskVec(Builder
, CI
->getArgOperand(1), 16);
1885 LHS
= Builder
.CreateNot(LHS
);
1886 Rep
= Builder
.CreateXor(LHS
, RHS
);
1887 Rep
= Builder
.CreateBitCast(Rep
, CI
->getType());
1888 } else if (IsX86
&& Name
== "avx512.knot.w") {
1889 Rep
= getX86MaskVec(Builder
, CI
->getArgOperand(0), 16);
1890 Rep
= Builder
.CreateNot(Rep
);
1891 Rep
= Builder
.CreateBitCast(Rep
, CI
->getType());
1893 (Name
== "avx512.kortestz.w" || Name
== "avx512.kortestc.w")) {
1894 Value
*LHS
= getX86MaskVec(Builder
, CI
->getArgOperand(0), 16);
1895 Value
*RHS
= getX86MaskVec(Builder
, CI
->getArgOperand(1), 16);
1896 Rep
= Builder
.CreateOr(LHS
, RHS
);
1897 Rep
= Builder
.CreateBitCast(Rep
, Builder
.getInt16Ty());
1899 if (Name
[14] == 'c')
1900 C
= ConstantInt::getAllOnesValue(Builder
.getInt16Ty());
1902 C
= ConstantInt::getNullValue(Builder
.getInt16Ty());
1903 Rep
= Builder
.CreateICmpEQ(Rep
, C
);
1904 Rep
= Builder
.CreateZExt(Rep
, Builder
.getInt32Ty());
1905 } else if (IsX86
&& (Name
== "sse.add.ss" || Name
== "sse2.add.sd" ||
1906 Name
== "sse.sub.ss" || Name
== "sse2.sub.sd" ||
1907 Name
== "sse.mul.ss" || Name
== "sse2.mul.sd" ||
1908 Name
== "sse.div.ss" || Name
== "sse2.div.sd")) {
1909 Type
*I32Ty
= Type::getInt32Ty(C
);
1910 Value
*Elt0
= Builder
.CreateExtractElement(CI
->getArgOperand(0),
1911 ConstantInt::get(I32Ty
, 0));
1912 Value
*Elt1
= Builder
.CreateExtractElement(CI
->getArgOperand(1),
1913 ConstantInt::get(I32Ty
, 0));
1915 if (Name
.contains(".add."))
1916 EltOp
= Builder
.CreateFAdd(Elt0
, Elt1
);
1917 else if (Name
.contains(".sub."))
1918 EltOp
= Builder
.CreateFSub(Elt0
, Elt1
);
1919 else if (Name
.contains(".mul."))
1920 EltOp
= Builder
.CreateFMul(Elt0
, Elt1
);
1922 EltOp
= Builder
.CreateFDiv(Elt0
, Elt1
);
1923 Rep
= Builder
.CreateInsertElement(CI
->getArgOperand(0), EltOp
,
1924 ConstantInt::get(I32Ty
, 0));
1925 } else if (IsX86
&& Name
.startswith("avx512.mask.pcmp")) {
1926 // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt."
1927 bool CmpEq
= Name
[16] == 'e';
1928 Rep
= upgradeMaskedCompare(Builder
, *CI
, CmpEq
? 0 : 6, true);
1929 } else if (IsX86
&& Name
.startswith("avx512.mask.vpshufbitqmb.")) {
1930 Type
*OpTy
= CI
->getArgOperand(0)->getType();
1931 unsigned VecWidth
= OpTy
->getPrimitiveSizeInBits();
1934 default: llvm_unreachable("Unexpected intrinsic");
1935 case 128: IID
= Intrinsic::x86_avx512_vpshufbitqmb_128
; break;
1936 case 256: IID
= Intrinsic::x86_avx512_vpshufbitqmb_256
; break;
1937 case 512: IID
= Intrinsic::x86_avx512_vpshufbitqmb_512
; break;
1940 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(F
->getParent(), IID
),
1941 { CI
->getOperand(0), CI
->getArgOperand(1) });
1942 Rep
= ApplyX86MaskOn1BitsVec(Builder
, Rep
, CI
->getArgOperand(2));
1943 } else if (IsX86
&& Name
.startswith("avx512.mask.fpclass.p")) {
1944 Type
*OpTy
= CI
->getArgOperand(0)->getType();
1945 unsigned VecWidth
= OpTy
->getPrimitiveSizeInBits();
1946 unsigned EltWidth
= OpTy
->getScalarSizeInBits();
1948 if (VecWidth
== 128 && EltWidth
== 32)
1949 IID
= Intrinsic::x86_avx512_fpclass_ps_128
;
1950 else if (VecWidth
== 256 && EltWidth
== 32)
1951 IID
= Intrinsic::x86_avx512_fpclass_ps_256
;
1952 else if (VecWidth
== 512 && EltWidth
== 32)
1953 IID
= Intrinsic::x86_avx512_fpclass_ps_512
;
1954 else if (VecWidth
== 128 && EltWidth
== 64)
1955 IID
= Intrinsic::x86_avx512_fpclass_pd_128
;
1956 else if (VecWidth
== 256 && EltWidth
== 64)
1957 IID
= Intrinsic::x86_avx512_fpclass_pd_256
;
1958 else if (VecWidth
== 512 && EltWidth
== 64)
1959 IID
= Intrinsic::x86_avx512_fpclass_pd_512
;
1961 llvm_unreachable("Unexpected intrinsic");
1963 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(F
->getParent(), IID
),
1964 { CI
->getOperand(0), CI
->getArgOperand(1) });
1965 Rep
= ApplyX86MaskOn1BitsVec(Builder
, Rep
, CI
->getArgOperand(2));
1966 } else if (IsX86
&& Name
.startswith("avx512.mask.cmp.p")) {
1967 Type
*OpTy
= CI
->getArgOperand(0)->getType();
1968 unsigned VecWidth
= OpTy
->getPrimitiveSizeInBits();
1969 unsigned EltWidth
= OpTy
->getScalarSizeInBits();
1971 if (VecWidth
== 128 && EltWidth
== 32)
1972 IID
= Intrinsic::x86_avx512_cmp_ps_128
;
1973 else if (VecWidth
== 256 && EltWidth
== 32)
1974 IID
= Intrinsic::x86_avx512_cmp_ps_256
;
1975 else if (VecWidth
== 512 && EltWidth
== 32)
1976 IID
= Intrinsic::x86_avx512_cmp_ps_512
;
1977 else if (VecWidth
== 128 && EltWidth
== 64)
1978 IID
= Intrinsic::x86_avx512_cmp_pd_128
;
1979 else if (VecWidth
== 256 && EltWidth
== 64)
1980 IID
= Intrinsic::x86_avx512_cmp_pd_256
;
1981 else if (VecWidth
== 512 && EltWidth
== 64)
1982 IID
= Intrinsic::x86_avx512_cmp_pd_512
;
1984 llvm_unreachable("Unexpected intrinsic");
1986 SmallVector
<Value
*, 4> Args
;
1987 Args
.push_back(CI
->getArgOperand(0));
1988 Args
.push_back(CI
->getArgOperand(1));
1989 Args
.push_back(CI
->getArgOperand(2));
1990 if (CI
->getNumArgOperands() == 5)
1991 Args
.push_back(CI
->getArgOperand(4));
1993 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(F
->getParent(), IID
),
1995 Rep
= ApplyX86MaskOn1BitsVec(Builder
, Rep
, CI
->getArgOperand(3));
1996 } else if (IsX86
&& Name
.startswith("avx512.mask.cmp.") &&
1998 // Integer compare intrinsics.
1999 unsigned Imm
= cast
<ConstantInt
>(CI
->getArgOperand(2))->getZExtValue();
2000 Rep
= upgradeMaskedCompare(Builder
, *CI
, Imm
, true);
2001 } else if (IsX86
&& Name
.startswith("avx512.mask.ucmp.")) {
2002 unsigned Imm
= cast
<ConstantInt
>(CI
->getArgOperand(2))->getZExtValue();
2003 Rep
= upgradeMaskedCompare(Builder
, *CI
, Imm
, false);
2004 } else if (IsX86
&& (Name
.startswith("avx512.cvtb2mask.") ||
2005 Name
.startswith("avx512.cvtw2mask.") ||
2006 Name
.startswith("avx512.cvtd2mask.") ||
2007 Name
.startswith("avx512.cvtq2mask."))) {
2008 Value
*Op
= CI
->getArgOperand(0);
2009 Value
*Zero
= llvm::Constant::getNullValue(Op
->getType());
2010 Rep
= Builder
.CreateICmp(ICmpInst::ICMP_SLT
, Op
, Zero
);
2011 Rep
= ApplyX86MaskOn1BitsVec(Builder
, Rep
, nullptr);
2012 } else if(IsX86
&& (Name
== "ssse3.pabs.b.128" ||
2013 Name
== "ssse3.pabs.w.128" ||
2014 Name
== "ssse3.pabs.d.128" ||
2015 Name
.startswith("avx2.pabs") ||
2016 Name
.startswith("avx512.mask.pabs"))) {
2017 Rep
= upgradeAbs(Builder
, *CI
);
2018 } else if (IsX86
&& (Name
== "sse41.pmaxsb" ||
2019 Name
== "sse2.pmaxs.w" ||
2020 Name
== "sse41.pmaxsd" ||
2021 Name
.startswith("avx2.pmaxs") ||
2022 Name
.startswith("avx512.mask.pmaxs"))) {
2023 Rep
= upgradeIntMinMax(Builder
, *CI
, ICmpInst::ICMP_SGT
);
2024 } else if (IsX86
&& (Name
== "sse2.pmaxu.b" ||
2025 Name
== "sse41.pmaxuw" ||
2026 Name
== "sse41.pmaxud" ||
2027 Name
.startswith("avx2.pmaxu") ||
2028 Name
.startswith("avx512.mask.pmaxu"))) {
2029 Rep
= upgradeIntMinMax(Builder
, *CI
, ICmpInst::ICMP_UGT
);
2030 } else if (IsX86
&& (Name
== "sse41.pminsb" ||
2031 Name
== "sse2.pmins.w" ||
2032 Name
== "sse41.pminsd" ||
2033 Name
.startswith("avx2.pmins") ||
2034 Name
.startswith("avx512.mask.pmins"))) {
2035 Rep
= upgradeIntMinMax(Builder
, *CI
, ICmpInst::ICMP_SLT
);
2036 } else if (IsX86
&& (Name
== "sse2.pminu.b" ||
2037 Name
== "sse41.pminuw" ||
2038 Name
== "sse41.pminud" ||
2039 Name
.startswith("avx2.pminu") ||
2040 Name
.startswith("avx512.mask.pminu"))) {
2041 Rep
= upgradeIntMinMax(Builder
, *CI
, ICmpInst::ICMP_ULT
);
2042 } else if (IsX86
&& (Name
== "sse2.pmulu.dq" ||
2043 Name
== "avx2.pmulu.dq" ||
2044 Name
== "avx512.pmulu.dq.512" ||
2045 Name
.startswith("avx512.mask.pmulu.dq."))) {
2046 Rep
= upgradePMULDQ(Builder
, *CI
, /*Signed*/false);
2047 } else if (IsX86
&& (Name
== "sse41.pmuldq" ||
2048 Name
== "avx2.pmul.dq" ||
2049 Name
== "avx512.pmul.dq.512" ||
2050 Name
.startswith("avx512.mask.pmul.dq."))) {
2051 Rep
= upgradePMULDQ(Builder
, *CI
, /*Signed*/true);
2052 } else if (IsX86
&& (Name
== "sse.cvtsi2ss" ||
2053 Name
== "sse2.cvtsi2sd" ||
2054 Name
== "sse.cvtsi642ss" ||
2055 Name
== "sse2.cvtsi642sd")) {
2056 Rep
= Builder
.CreateSIToFP(CI
->getArgOperand(1),
2057 CI
->getType()->getVectorElementType());
2058 Rep
= Builder
.CreateInsertElement(CI
->getArgOperand(0), Rep
, (uint64_t)0);
2059 } else if (IsX86
&& Name
== "avx512.cvtusi2sd") {
2060 Rep
= Builder
.CreateUIToFP(CI
->getArgOperand(1),
2061 CI
->getType()->getVectorElementType());
2062 Rep
= Builder
.CreateInsertElement(CI
->getArgOperand(0), Rep
, (uint64_t)0);
2063 } else if (IsX86
&& Name
== "sse2.cvtss2sd") {
2064 Rep
= Builder
.CreateExtractElement(CI
->getArgOperand(1), (uint64_t)0);
2065 Rep
= Builder
.CreateFPExt(Rep
, CI
->getType()->getVectorElementType());
2066 Rep
= Builder
.CreateInsertElement(CI
->getArgOperand(0), Rep
, (uint64_t)0);
2067 } else if (IsX86
&& (Name
== "sse2.cvtdq2pd" ||
2068 Name
== "sse2.cvtdq2ps" ||
2069 Name
== "avx.cvtdq2.pd.256" ||
2070 Name
== "avx.cvtdq2.ps.256" ||
2071 Name
.startswith("avx512.mask.cvtdq2pd.") ||
2072 Name
.startswith("avx512.mask.cvtudq2pd.") ||
2073 Name
.startswith("avx512.mask.cvtdq2ps.") ||
2074 Name
.startswith("avx512.mask.cvtudq2ps.") ||
2075 Name
.startswith("avx512.mask.cvtqq2pd.") ||
2076 Name
.startswith("avx512.mask.cvtuqq2pd.") ||
2077 Name
== "avx512.mask.cvtqq2ps.256" ||
2078 Name
== "avx512.mask.cvtqq2ps.512" ||
2079 Name
== "avx512.mask.cvtuqq2ps.256" ||
2080 Name
== "avx512.mask.cvtuqq2ps.512" ||
2081 Name
== "sse2.cvtps2pd" ||
2082 Name
== "avx.cvt.ps2.pd.256" ||
2083 Name
== "avx512.mask.cvtps2pd.128" ||
2084 Name
== "avx512.mask.cvtps2pd.256")) {
2085 Type
*DstTy
= CI
->getType();
2086 Rep
= CI
->getArgOperand(0);
2087 Type
*SrcTy
= Rep
->getType();
2089 unsigned NumDstElts
= DstTy
->getVectorNumElements();
2090 if (NumDstElts
< SrcTy
->getVectorNumElements()) {
2091 assert(NumDstElts
== 2 && "Unexpected vector size");
2092 uint32_t ShuffleMask
[2] = { 0, 1 };
2093 Rep
= Builder
.CreateShuffleVector(Rep
, Rep
, ShuffleMask
);
2096 bool IsPS2PD
= SrcTy
->getVectorElementType()->isFloatTy();
2097 bool IsUnsigned
= (StringRef::npos
!= Name
.find("cvtu"));
2099 Rep
= Builder
.CreateFPExt(Rep
, DstTy
, "cvtps2pd");
2100 else if (CI
->getNumArgOperands() == 4 &&
2101 (!isa
<ConstantInt
>(CI
->getArgOperand(3)) ||
2102 cast
<ConstantInt
>(CI
->getArgOperand(3))->getZExtValue() != 4)) {
2103 Intrinsic::ID IID
= IsUnsigned
? Intrinsic::x86_avx512_uitofp_round
2104 : Intrinsic::x86_avx512_sitofp_round
;
2105 Function
*F
= Intrinsic::getDeclaration(CI
->getModule(), IID
,
2107 Rep
= Builder
.CreateCall(F
, { Rep
, CI
->getArgOperand(3) });
2109 Rep
= IsUnsigned
? Builder
.CreateUIToFP(Rep
, DstTy
, "cvt")
2110 : Builder
.CreateSIToFP(Rep
, DstTy
, "cvt");
2113 if (CI
->getNumArgOperands() >= 3)
2114 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(2), Rep
,
2115 CI
->getArgOperand(1));
2116 } else if (IsX86
&& (Name
.startswith("avx512.mask.loadu."))) {
2117 Rep
= UpgradeMaskedLoad(Builder
, CI
->getArgOperand(0),
2118 CI
->getArgOperand(1), CI
->getArgOperand(2),
2120 } else if (IsX86
&& (Name
.startswith("avx512.mask.load."))) {
2121 Rep
= UpgradeMaskedLoad(Builder
, CI
->getArgOperand(0),
2122 CI
->getArgOperand(1),CI
->getArgOperand(2),
2124 } else if (IsX86
&& Name
.startswith("avx512.mask.expand.load.")) {
2125 Type
*ResultTy
= CI
->getType();
2126 Type
*PtrTy
= ResultTy
->getVectorElementType();
2128 // Cast the pointer to element type.
2129 Value
*Ptr
= Builder
.CreateBitCast(CI
->getOperand(0),
2130 llvm::PointerType::getUnqual(PtrTy
));
2132 Value
*MaskVec
= getX86MaskVec(Builder
, CI
->getArgOperand(2),
2133 ResultTy
->getVectorNumElements());
2135 Function
*ELd
= Intrinsic::getDeclaration(F
->getParent(),
2136 Intrinsic::masked_expandload
,
2138 Rep
= Builder
.CreateCall(ELd
, { Ptr
, MaskVec
, CI
->getOperand(1) });
2139 } else if (IsX86
&& Name
.startswith("avx512.mask.compress.store.")) {
2140 Type
*ResultTy
= CI
->getArgOperand(1)->getType();
2141 Type
*PtrTy
= ResultTy
->getVectorElementType();
2143 // Cast the pointer to element type.
2144 Value
*Ptr
= Builder
.CreateBitCast(CI
->getOperand(0),
2145 llvm::PointerType::getUnqual(PtrTy
));
2147 Value
*MaskVec
= getX86MaskVec(Builder
, CI
->getArgOperand(2),
2148 ResultTy
->getVectorNumElements());
2150 Function
*CSt
= Intrinsic::getDeclaration(F
->getParent(),
2151 Intrinsic::masked_compressstore
,
2153 Rep
= Builder
.CreateCall(CSt
, { CI
->getArgOperand(1), Ptr
, MaskVec
});
2154 } else if (IsX86
&& (Name
.startswith("avx512.mask.compress.") ||
2155 Name
.startswith("avx512.mask.expand."))) {
2156 Type
*ResultTy
= CI
->getType();
2158 Value
*MaskVec
= getX86MaskVec(Builder
, CI
->getArgOperand(2),
2159 ResultTy
->getVectorNumElements());
2161 bool IsCompress
= Name
[12] == 'c';
2162 Intrinsic::ID IID
= IsCompress
? Intrinsic::x86_avx512_mask_compress
2163 : Intrinsic::x86_avx512_mask_expand
;
2164 Function
*Intr
= Intrinsic::getDeclaration(F
->getParent(), IID
, ResultTy
);
2165 Rep
= Builder
.CreateCall(Intr
, { CI
->getOperand(0), CI
->getOperand(1),
2167 } else if (IsX86
&& Name
.startswith("xop.vpcom")) {
2169 if (Name
.endswith("ub") || Name
.endswith("uw") || Name
.endswith("ud") ||
2170 Name
.endswith("uq"))
2172 else if (Name
.endswith("b") || Name
.endswith("w") || Name
.endswith("d") ||
2176 llvm_unreachable("Unknown suffix");
2179 if (CI
->getNumArgOperands() == 3) {
2180 Imm
= cast
<ConstantInt
>(CI
->getArgOperand(2))->getZExtValue();
2182 Name
= Name
.substr(9); // strip off "xop.vpcom"
2183 if (Name
.startswith("lt"))
2185 else if (Name
.startswith("le"))
2187 else if (Name
.startswith("gt"))
2189 else if (Name
.startswith("ge"))
2191 else if (Name
.startswith("eq"))
2193 else if (Name
.startswith("ne"))
2195 else if (Name
.startswith("false"))
2197 else if (Name
.startswith("true"))
2200 llvm_unreachable("Unknown condition");
2203 Rep
= upgradeX86vpcom(Builder
, *CI
, Imm
, IsSigned
);
2204 } else if (IsX86
&& Name
.startswith("xop.vpcmov")) {
2205 Value
*Sel
= CI
->getArgOperand(2);
2206 Value
*NotSel
= Builder
.CreateNot(Sel
);
2207 Value
*Sel0
= Builder
.CreateAnd(CI
->getArgOperand(0), Sel
);
2208 Value
*Sel1
= Builder
.CreateAnd(CI
->getArgOperand(1), NotSel
);
2209 Rep
= Builder
.CreateOr(Sel0
, Sel1
);
2210 } else if (IsX86
&& (Name
.startswith("xop.vprot") ||
2211 Name
.startswith("avx512.prol") ||
2212 Name
.startswith("avx512.mask.prol"))) {
2213 Rep
= upgradeX86Rotate(Builder
, *CI
, false);
2214 } else if (IsX86
&& (Name
.startswith("avx512.pror") ||
2215 Name
.startswith("avx512.mask.pror"))) {
2216 Rep
= upgradeX86Rotate(Builder
, *CI
, true);
2217 } else if (IsX86
&& (Name
.startswith("avx512.vpshld.") ||
2218 Name
.startswith("avx512.mask.vpshld") ||
2219 Name
.startswith("avx512.maskz.vpshld"))) {
2220 bool ZeroMask
= Name
[11] == 'z';
2221 Rep
= upgradeX86ConcatShift(Builder
, *CI
, false, ZeroMask
);
2222 } else if (IsX86
&& (Name
.startswith("avx512.vpshrd.") ||
2223 Name
.startswith("avx512.mask.vpshrd") ||
2224 Name
.startswith("avx512.maskz.vpshrd"))) {
2225 bool ZeroMask
= Name
[11] == 'z';
2226 Rep
= upgradeX86ConcatShift(Builder
, *CI
, true, ZeroMask
);
2227 } else if (IsX86
&& Name
== "sse42.crc32.64.8") {
2228 Function
*CRC32
= Intrinsic::getDeclaration(F
->getParent(),
2229 Intrinsic::x86_sse42_crc32_32_8
);
2230 Value
*Trunc0
= Builder
.CreateTrunc(CI
->getArgOperand(0), Type::getInt32Ty(C
));
2231 Rep
= Builder
.CreateCall(CRC32
, {Trunc0
, CI
->getArgOperand(1)});
2232 Rep
= Builder
.CreateZExt(Rep
, CI
->getType(), "");
2233 } else if (IsX86
&& (Name
.startswith("avx.vbroadcast.s") ||
2234 Name
.startswith("avx512.vbroadcast.s"))) {
2235 // Replace broadcasts with a series of insertelements.
2236 Type
*VecTy
= CI
->getType();
2237 Type
*EltTy
= VecTy
->getVectorElementType();
2238 unsigned EltNum
= VecTy
->getVectorNumElements();
2239 Value
*Cast
= Builder
.CreateBitCast(CI
->getArgOperand(0),
2240 EltTy
->getPointerTo());
2241 Value
*Load
= Builder
.CreateLoad(EltTy
, Cast
);
2242 Type
*I32Ty
= Type::getInt32Ty(C
);
2243 Rep
= UndefValue::get(VecTy
);
2244 for (unsigned I
= 0; I
< EltNum
; ++I
)
2245 Rep
= Builder
.CreateInsertElement(Rep
, Load
,
2246 ConstantInt::get(I32Ty
, I
));
2247 } else if (IsX86
&& (Name
.startswith("sse41.pmovsx") ||
2248 Name
.startswith("sse41.pmovzx") ||
2249 Name
.startswith("avx2.pmovsx") ||
2250 Name
.startswith("avx2.pmovzx") ||
2251 Name
.startswith("avx512.mask.pmovsx") ||
2252 Name
.startswith("avx512.mask.pmovzx"))) {
2253 VectorType
*SrcTy
= cast
<VectorType
>(CI
->getArgOperand(0)->getType());
2254 VectorType
*DstTy
= cast
<VectorType
>(CI
->getType());
2255 unsigned NumDstElts
= DstTy
->getNumElements();
2257 // Extract a subvector of the first NumDstElts lanes and sign/zero extend.
2258 SmallVector
<uint32_t, 8> ShuffleMask(NumDstElts
);
2259 for (unsigned i
= 0; i
!= NumDstElts
; ++i
)
2262 Value
*SV
= Builder
.CreateShuffleVector(
2263 CI
->getArgOperand(0), UndefValue::get(SrcTy
), ShuffleMask
);
2265 bool DoSext
= (StringRef::npos
!= Name
.find("pmovsx"));
2266 Rep
= DoSext
? Builder
.CreateSExt(SV
, DstTy
)
2267 : Builder
.CreateZExt(SV
, DstTy
);
2268 // If there are 3 arguments, it's a masked intrinsic so we need a select.
2269 if (CI
->getNumArgOperands() == 3)
2270 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(2), Rep
,
2271 CI
->getArgOperand(1));
2272 } else if (Name
== "avx512.mask.pmov.qd.256" ||
2273 Name
== "avx512.mask.pmov.qd.512" ||
2274 Name
== "avx512.mask.pmov.wb.256" ||
2275 Name
== "avx512.mask.pmov.wb.512") {
2276 Type
*Ty
= CI
->getArgOperand(1)->getType();
2277 Rep
= Builder
.CreateTrunc(CI
->getArgOperand(0), Ty
);
2278 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(2), Rep
,
2279 CI
->getArgOperand(1));
2280 } else if (IsX86
&& (Name
.startswith("avx.vbroadcastf128") ||
2281 Name
== "avx2.vbroadcasti128")) {
2282 // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.
2283 Type
*EltTy
= CI
->getType()->getVectorElementType();
2284 unsigned NumSrcElts
= 128 / EltTy
->getPrimitiveSizeInBits();
2285 Type
*VT
= VectorType::get(EltTy
, NumSrcElts
);
2286 Value
*Op
= Builder
.CreatePointerCast(CI
->getArgOperand(0),
2287 PointerType::getUnqual(VT
));
2288 Value
*Load
= Builder
.CreateAlignedLoad(VT
, Op
, 1);
2289 if (NumSrcElts
== 2)
2290 Rep
= Builder
.CreateShuffleVector(Load
, UndefValue::get(Load
->getType()),
2293 Rep
= Builder
.CreateShuffleVector(Load
, UndefValue::get(Load
->getType()),
2294 { 0, 1, 2, 3, 0, 1, 2, 3 });
2295 } else if (IsX86
&& (Name
.startswith("avx512.mask.shuf.i") ||
2296 Name
.startswith("avx512.mask.shuf.f"))) {
2297 unsigned Imm
= cast
<ConstantInt
>(CI
->getArgOperand(2))->getZExtValue();
2298 Type
*VT
= CI
->getType();
2299 unsigned NumLanes
= VT
->getPrimitiveSizeInBits() / 128;
2300 unsigned NumElementsInLane
= 128 / VT
->getScalarSizeInBits();
2301 unsigned ControlBitsMask
= NumLanes
- 1;
2302 unsigned NumControlBits
= NumLanes
/ 2;
2303 SmallVector
<uint32_t, 8> ShuffleMask(0);
2305 for (unsigned l
= 0; l
!= NumLanes
; ++l
) {
2306 unsigned LaneMask
= (Imm
>> (l
* NumControlBits
)) & ControlBitsMask
;
2307 // We actually need the other source.
2308 if (l
>= NumLanes
/ 2)
2309 LaneMask
+= NumLanes
;
2310 for (unsigned i
= 0; i
!= NumElementsInLane
; ++i
)
2311 ShuffleMask
.push_back(LaneMask
* NumElementsInLane
+ i
);
2313 Rep
= Builder
.CreateShuffleVector(CI
->getArgOperand(0),
2314 CI
->getArgOperand(1), ShuffleMask
);
2315 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(4), Rep
,
2316 CI
->getArgOperand(3));
2317 }else if (IsX86
&& (Name
.startswith("avx512.mask.broadcastf") ||
2318 Name
.startswith("avx512.mask.broadcasti"))) {
2319 unsigned NumSrcElts
=
2320 CI
->getArgOperand(0)->getType()->getVectorNumElements();
2321 unsigned NumDstElts
= CI
->getType()->getVectorNumElements();
2323 SmallVector
<uint32_t, 8> ShuffleMask(NumDstElts
);
2324 for (unsigned i
= 0; i
!= NumDstElts
; ++i
)
2325 ShuffleMask
[i
] = i
% NumSrcElts
;
2327 Rep
= Builder
.CreateShuffleVector(CI
->getArgOperand(0),
2328 CI
->getArgOperand(0),
2330 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(2), Rep
,
2331 CI
->getArgOperand(1));
2332 } else if (IsX86
&& (Name
.startswith("avx2.pbroadcast") ||
2333 Name
.startswith("avx2.vbroadcast") ||
2334 Name
.startswith("avx512.pbroadcast") ||
2335 Name
.startswith("avx512.mask.broadcast.s"))) {
2336 // Replace vp?broadcasts with a vector shuffle.
2337 Value
*Op
= CI
->getArgOperand(0);
2338 unsigned NumElts
= CI
->getType()->getVectorNumElements();
2339 Type
*MaskTy
= VectorType::get(Type::getInt32Ty(C
), NumElts
);
2340 Rep
= Builder
.CreateShuffleVector(Op
, UndefValue::get(Op
->getType()),
2341 Constant::getNullValue(MaskTy
));
2343 if (CI
->getNumArgOperands() == 3)
2344 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(2), Rep
,
2345 CI
->getArgOperand(1));
2346 } else if (IsX86
&& (Name
.startswith("sse2.padds.") ||
2347 Name
.startswith("sse2.psubs.") ||
2348 Name
.startswith("avx2.padds.") ||
2349 Name
.startswith("avx2.psubs.") ||
2350 Name
.startswith("avx512.padds.") ||
2351 Name
.startswith("avx512.psubs.") ||
2352 Name
.startswith("avx512.mask.padds.") ||
2353 Name
.startswith("avx512.mask.psubs."))) {
2354 bool IsAdd
= Name
.contains(".padds");
2355 Rep
= UpgradeX86AddSubSatIntrinsics(Builder
, *CI
, true, IsAdd
);
2356 } else if (IsX86
&& (Name
.startswith("sse2.paddus.") ||
2357 Name
.startswith("sse2.psubus.") ||
2358 Name
.startswith("avx2.paddus.") ||
2359 Name
.startswith("avx2.psubus.") ||
2360 Name
.startswith("avx512.mask.paddus.") ||
2361 Name
.startswith("avx512.mask.psubus."))) {
2362 bool IsAdd
= Name
.contains(".paddus");
2363 Rep
= UpgradeX86AddSubSatIntrinsics(Builder
, *CI
, false, IsAdd
);
2364 } else if (IsX86
&& Name
.startswith("avx512.mask.palignr.")) {
2365 Rep
= UpgradeX86ALIGNIntrinsics(Builder
, CI
->getArgOperand(0),
2366 CI
->getArgOperand(1),
2367 CI
->getArgOperand(2),
2368 CI
->getArgOperand(3),
2369 CI
->getArgOperand(4),
2371 } else if (IsX86
&& Name
.startswith("avx512.mask.valign.")) {
2372 Rep
= UpgradeX86ALIGNIntrinsics(Builder
, CI
->getArgOperand(0),
2373 CI
->getArgOperand(1),
2374 CI
->getArgOperand(2),
2375 CI
->getArgOperand(3),
2376 CI
->getArgOperand(4),
2378 } else if (IsX86
&& (Name
== "sse2.psll.dq" ||
2379 Name
== "avx2.psll.dq")) {
2380 // 128/256-bit shift left specified in bits.
2381 unsigned Shift
= cast
<ConstantInt
>(CI
->getArgOperand(1))->getZExtValue();
2382 Rep
= UpgradeX86PSLLDQIntrinsics(Builder
, CI
->getArgOperand(0),
2383 Shift
/ 8); // Shift is in bits.
2384 } else if (IsX86
&& (Name
== "sse2.psrl.dq" ||
2385 Name
== "avx2.psrl.dq")) {
2386 // 128/256-bit shift right specified in bits.
2387 unsigned Shift
= cast
<ConstantInt
>(CI
->getArgOperand(1))->getZExtValue();
2388 Rep
= UpgradeX86PSRLDQIntrinsics(Builder
, CI
->getArgOperand(0),
2389 Shift
/ 8); // Shift is in bits.
2390 } else if (IsX86
&& (Name
== "sse2.psll.dq.bs" ||
2391 Name
== "avx2.psll.dq.bs" ||
2392 Name
== "avx512.psll.dq.512")) {
2393 // 128/256/512-bit shift left specified in bytes.
2394 unsigned Shift
= cast
<ConstantInt
>(CI
->getArgOperand(1))->getZExtValue();
2395 Rep
= UpgradeX86PSLLDQIntrinsics(Builder
, CI
->getArgOperand(0), Shift
);
2396 } else if (IsX86
&& (Name
== "sse2.psrl.dq.bs" ||
2397 Name
== "avx2.psrl.dq.bs" ||
2398 Name
== "avx512.psrl.dq.512")) {
2399 // 128/256/512-bit shift right specified in bytes.
2400 unsigned Shift
= cast
<ConstantInt
>(CI
->getArgOperand(1))->getZExtValue();
2401 Rep
= UpgradeX86PSRLDQIntrinsics(Builder
, CI
->getArgOperand(0), Shift
);
2402 } else if (IsX86
&& (Name
== "sse41.pblendw" ||
2403 Name
.startswith("sse41.blendp") ||
2404 Name
.startswith("avx.blend.p") ||
2405 Name
== "avx2.pblendw" ||
2406 Name
.startswith("avx2.pblendd."))) {
2407 Value
*Op0
= CI
->getArgOperand(0);
2408 Value
*Op1
= CI
->getArgOperand(1);
2409 unsigned Imm
= cast
<ConstantInt
>(CI
->getArgOperand(2))->getZExtValue();
2410 VectorType
*VecTy
= cast
<VectorType
>(CI
->getType());
2411 unsigned NumElts
= VecTy
->getNumElements();
2413 SmallVector
<uint32_t, 16> Idxs(NumElts
);
2414 for (unsigned i
= 0; i
!= NumElts
; ++i
)
2415 Idxs
[i
] = ((Imm
>> (i
%8)) & 1) ? i
+ NumElts
: i
;
2417 Rep
= Builder
.CreateShuffleVector(Op0
, Op1
, Idxs
);
2418 } else if (IsX86
&& (Name
.startswith("avx.vinsertf128.") ||
2419 Name
== "avx2.vinserti128" ||
2420 Name
.startswith("avx512.mask.insert"))) {
2421 Value
*Op0
= CI
->getArgOperand(0);
2422 Value
*Op1
= CI
->getArgOperand(1);
2423 unsigned Imm
= cast
<ConstantInt
>(CI
->getArgOperand(2))->getZExtValue();
2424 unsigned DstNumElts
= CI
->getType()->getVectorNumElements();
2425 unsigned SrcNumElts
= Op1
->getType()->getVectorNumElements();
2426 unsigned Scale
= DstNumElts
/ SrcNumElts
;
2428 // Mask off the high bits of the immediate value; hardware ignores those.
2431 // Extend the second operand into a vector the size of the destination.
2432 Value
*UndefV
= UndefValue::get(Op1
->getType());
2433 SmallVector
<uint32_t, 8> Idxs(DstNumElts
);
2434 for (unsigned i
= 0; i
!= SrcNumElts
; ++i
)
2436 for (unsigned i
= SrcNumElts
; i
!= DstNumElts
; ++i
)
2437 Idxs
[i
] = SrcNumElts
;
2438 Rep
= Builder
.CreateShuffleVector(Op1
, UndefV
, Idxs
);
2440 // Insert the second operand into the first operand.
2442 // Note that there is no guarantee that instruction lowering will actually
2443 // produce a vinsertf128 instruction for the created shuffles. In
2444 // particular, the 0 immediate case involves no lane changes, so it can
2445 // be handled as a blend.
2447 // Example of shuffle mask for 32-bit elements:
2448 // Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
2449 // Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 >
2451 // First fill with identify mask.
2452 for (unsigned i
= 0; i
!= DstNumElts
; ++i
)
2454 // Then replace the elements where we need to insert.
2455 for (unsigned i
= 0; i
!= SrcNumElts
; ++i
)
2456 Idxs
[i
+ Imm
* SrcNumElts
] = i
+ DstNumElts
;
2457 Rep
= Builder
.CreateShuffleVector(Op0
, Rep
, Idxs
);
2459 // If the intrinsic has a mask operand, handle that.
2460 if (CI
->getNumArgOperands() == 5)
2461 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(4), Rep
,
2462 CI
->getArgOperand(3));
2463 } else if (IsX86
&& (Name
.startswith("avx.vextractf128.") ||
2464 Name
== "avx2.vextracti128" ||
2465 Name
.startswith("avx512.mask.vextract"))) {
2466 Value
*Op0
= CI
->getArgOperand(0);
2467 unsigned Imm
= cast
<ConstantInt
>(CI
->getArgOperand(1))->getZExtValue();
2468 unsigned DstNumElts
= CI
->getType()->getVectorNumElements();
2469 unsigned SrcNumElts
= Op0
->getType()->getVectorNumElements();
2470 unsigned Scale
= SrcNumElts
/ DstNumElts
;
2472 // Mask off the high bits of the immediate value; hardware ignores those.
2475 // Get indexes for the subvector of the input vector.
2476 SmallVector
<uint32_t, 8> Idxs(DstNumElts
);
2477 for (unsigned i
= 0; i
!= DstNumElts
; ++i
) {
2478 Idxs
[i
] = i
+ (Imm
* DstNumElts
);
2480 Rep
= Builder
.CreateShuffleVector(Op0
, Op0
, Idxs
);
2482 // If the intrinsic has a mask operand, handle that.
2483 if (CI
->getNumArgOperands() == 4)
2484 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
2485 CI
->getArgOperand(2));
2486 } else if (!IsX86
&& Name
== "stackprotectorcheck") {
2488 } else if (IsX86
&& (Name
.startswith("avx512.mask.perm.df.") ||
2489 Name
.startswith("avx512.mask.perm.di."))) {
2490 Value
*Op0
= CI
->getArgOperand(0);
2491 unsigned Imm
= cast
<ConstantInt
>(CI
->getArgOperand(1))->getZExtValue();
2492 VectorType
*VecTy
= cast
<VectorType
>(CI
->getType());
2493 unsigned NumElts
= VecTy
->getNumElements();
2495 SmallVector
<uint32_t, 8> Idxs(NumElts
);
2496 for (unsigned i
= 0; i
!= NumElts
; ++i
)
2497 Idxs
[i
] = (i
& ~0x3) + ((Imm
>> (2 * (i
& 0x3))) & 3);
2499 Rep
= Builder
.CreateShuffleVector(Op0
, Op0
, Idxs
);
2501 if (CI
->getNumArgOperands() == 4)
2502 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
2503 CI
->getArgOperand(2));
2504 } else if (IsX86
&& (Name
.startswith("avx.vperm2f128.") ||
2505 Name
== "avx2.vperm2i128")) {
2506 // The immediate permute control byte looks like this:
2507 // [1:0] - select 128 bits from sources for low half of destination
2509 // [3] - zero low half of destination
2510 // [5:4] - select 128 bits from sources for high half of destination
2512 // [7] - zero high half of destination
2514 uint8_t Imm
= cast
<ConstantInt
>(CI
->getArgOperand(2))->getZExtValue();
2516 unsigned NumElts
= CI
->getType()->getVectorNumElements();
2517 unsigned HalfSize
= NumElts
/ 2;
2518 SmallVector
<uint32_t, 8> ShuffleMask(NumElts
);
2520 // Determine which operand(s) are actually in use for this instruction.
2521 Value
*V0
= (Imm
& 0x02) ? CI
->getArgOperand(1) : CI
->getArgOperand(0);
2522 Value
*V1
= (Imm
& 0x20) ? CI
->getArgOperand(1) : CI
->getArgOperand(0);
2524 // If needed, replace operands based on zero mask.
2525 V0
= (Imm
& 0x08) ? ConstantAggregateZero::get(CI
->getType()) : V0
;
2526 V1
= (Imm
& 0x80) ? ConstantAggregateZero::get(CI
->getType()) : V1
;
2528 // Permute low half of result.
2529 unsigned StartIndex
= (Imm
& 0x01) ? HalfSize
: 0;
2530 for (unsigned i
= 0; i
< HalfSize
; ++i
)
2531 ShuffleMask
[i
] = StartIndex
+ i
;
2533 // Permute high half of result.
2534 StartIndex
= (Imm
& 0x10) ? HalfSize
: 0;
2535 for (unsigned i
= 0; i
< HalfSize
; ++i
)
2536 ShuffleMask
[i
+ HalfSize
] = NumElts
+ StartIndex
+ i
;
2538 Rep
= Builder
.CreateShuffleVector(V0
, V1
, ShuffleMask
);
2540 } else if (IsX86
&& (Name
.startswith("avx.vpermil.") ||
2541 Name
== "sse2.pshuf.d" ||
2542 Name
.startswith("avx512.mask.vpermil.p") ||
2543 Name
.startswith("avx512.mask.pshuf.d."))) {
2544 Value
*Op0
= CI
->getArgOperand(0);
2545 unsigned Imm
= cast
<ConstantInt
>(CI
->getArgOperand(1))->getZExtValue();
2546 VectorType
*VecTy
= cast
<VectorType
>(CI
->getType());
2547 unsigned NumElts
= VecTy
->getNumElements();
2548 // Calculate the size of each index in the immediate.
2549 unsigned IdxSize
= 64 / VecTy
->getScalarSizeInBits();
2550 unsigned IdxMask
= ((1 << IdxSize
) - 1);
2552 SmallVector
<uint32_t, 8> Idxs(NumElts
);
2553 // Lookup the bits for this element, wrapping around the immediate every
2554 // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need
2555 // to offset by the first index of each group.
2556 for (unsigned i
= 0; i
!= NumElts
; ++i
)
2557 Idxs
[i
] = ((Imm
>> ((i
* IdxSize
) % 8)) & IdxMask
) | (i
& ~IdxMask
);
2559 Rep
= Builder
.CreateShuffleVector(Op0
, Op0
, Idxs
);
2561 if (CI
->getNumArgOperands() == 4)
2562 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
2563 CI
->getArgOperand(2));
2564 } else if (IsX86
&& (Name
== "sse2.pshufl.w" ||
2565 Name
.startswith("avx512.mask.pshufl.w."))) {
2566 Value
*Op0
= CI
->getArgOperand(0);
2567 unsigned Imm
= cast
<ConstantInt
>(CI
->getArgOperand(1))->getZExtValue();
2568 unsigned NumElts
= CI
->getType()->getVectorNumElements();
2570 SmallVector
<uint32_t, 16> Idxs(NumElts
);
2571 for (unsigned l
= 0; l
!= NumElts
; l
+= 8) {
2572 for (unsigned i
= 0; i
!= 4; ++i
)
2573 Idxs
[i
+ l
] = ((Imm
>> (2 * i
)) & 0x3) + l
;
2574 for (unsigned i
= 4; i
!= 8; ++i
)
2575 Idxs
[i
+ l
] = i
+ l
;
2578 Rep
= Builder
.CreateShuffleVector(Op0
, Op0
, Idxs
);
2580 if (CI
->getNumArgOperands() == 4)
2581 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
2582 CI
->getArgOperand(2));
2583 } else if (IsX86
&& (Name
== "sse2.pshufh.w" ||
2584 Name
.startswith("avx512.mask.pshufh.w."))) {
2585 Value
*Op0
= CI
->getArgOperand(0);
2586 unsigned Imm
= cast
<ConstantInt
>(CI
->getArgOperand(1))->getZExtValue();
2587 unsigned NumElts
= CI
->getType()->getVectorNumElements();
2589 SmallVector
<uint32_t, 16> Idxs(NumElts
);
2590 for (unsigned l
= 0; l
!= NumElts
; l
+= 8) {
2591 for (unsigned i
= 0; i
!= 4; ++i
)
2592 Idxs
[i
+ l
] = i
+ l
;
2593 for (unsigned i
= 0; i
!= 4; ++i
)
2594 Idxs
[i
+ l
+ 4] = ((Imm
>> (2 * i
)) & 0x3) + 4 + l
;
2597 Rep
= Builder
.CreateShuffleVector(Op0
, Op0
, Idxs
);
2599 if (CI
->getNumArgOperands() == 4)
2600 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
2601 CI
->getArgOperand(2));
2602 } else if (IsX86
&& Name
.startswith("avx512.mask.shuf.p")) {
2603 Value
*Op0
= CI
->getArgOperand(0);
2604 Value
*Op1
= CI
->getArgOperand(1);
2605 unsigned Imm
= cast
<ConstantInt
>(CI
->getArgOperand(2))->getZExtValue();
2606 unsigned NumElts
= CI
->getType()->getVectorNumElements();
2608 unsigned NumLaneElts
= 128/CI
->getType()->getScalarSizeInBits();
2609 unsigned HalfLaneElts
= NumLaneElts
/ 2;
2611 SmallVector
<uint32_t, 16> Idxs(NumElts
);
2612 for (unsigned i
= 0; i
!= NumElts
; ++i
) {
2613 // Base index is the starting element of the lane.
2614 Idxs
[i
] = i
- (i
% NumLaneElts
);
2615 // If we are half way through the lane switch to the other source.
2616 if ((i
% NumLaneElts
) >= HalfLaneElts
)
2618 // Now select the specific element. By adding HalfLaneElts bits from
2619 // the immediate. Wrapping around the immediate every 8-bits.
2620 Idxs
[i
] += (Imm
>> ((i
* HalfLaneElts
) % 8)) & ((1 << HalfLaneElts
) - 1);
2623 Rep
= Builder
.CreateShuffleVector(Op0
, Op1
, Idxs
);
2625 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(4), Rep
,
2626 CI
->getArgOperand(3));
2627 } else if (IsX86
&& (Name
.startswith("avx512.mask.movddup") ||
2628 Name
.startswith("avx512.mask.movshdup") ||
2629 Name
.startswith("avx512.mask.movsldup"))) {
2630 Value
*Op0
= CI
->getArgOperand(0);
2631 unsigned NumElts
= CI
->getType()->getVectorNumElements();
2632 unsigned NumLaneElts
= 128/CI
->getType()->getScalarSizeInBits();
2634 unsigned Offset
= 0;
2635 if (Name
.startswith("avx512.mask.movshdup."))
2638 SmallVector
<uint32_t, 16> Idxs(NumElts
);
2639 for (unsigned l
= 0; l
!= NumElts
; l
+= NumLaneElts
)
2640 for (unsigned i
= 0; i
!= NumLaneElts
; i
+= 2) {
2641 Idxs
[i
+ l
+ 0] = i
+ l
+ Offset
;
2642 Idxs
[i
+ l
+ 1] = i
+ l
+ Offset
;
2645 Rep
= Builder
.CreateShuffleVector(Op0
, Op0
, Idxs
);
2647 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(2), Rep
,
2648 CI
->getArgOperand(1));
2649 } else if (IsX86
&& (Name
.startswith("avx512.mask.punpckl") ||
2650 Name
.startswith("avx512.mask.unpckl."))) {
2651 Value
*Op0
= CI
->getArgOperand(0);
2652 Value
*Op1
= CI
->getArgOperand(1);
2653 int NumElts
= CI
->getType()->getVectorNumElements();
2654 int NumLaneElts
= 128/CI
->getType()->getScalarSizeInBits();
2656 SmallVector
<uint32_t, 64> Idxs(NumElts
);
2657 for (int l
= 0; l
!= NumElts
; l
+= NumLaneElts
)
2658 for (int i
= 0; i
!= NumLaneElts
; ++i
)
2659 Idxs
[i
+ l
] = l
+ (i
/ 2) + NumElts
* (i
% 2);
2661 Rep
= Builder
.CreateShuffleVector(Op0
, Op1
, Idxs
);
2663 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
2664 CI
->getArgOperand(2));
2665 } else if (IsX86
&& (Name
.startswith("avx512.mask.punpckh") ||
2666 Name
.startswith("avx512.mask.unpckh."))) {
2667 Value
*Op0
= CI
->getArgOperand(0);
2668 Value
*Op1
= CI
->getArgOperand(1);
2669 int NumElts
= CI
->getType()->getVectorNumElements();
2670 int NumLaneElts
= 128/CI
->getType()->getScalarSizeInBits();
2672 SmallVector
<uint32_t, 64> Idxs(NumElts
);
2673 for (int l
= 0; l
!= NumElts
; l
+= NumLaneElts
)
2674 for (int i
= 0; i
!= NumLaneElts
; ++i
)
2675 Idxs
[i
+ l
] = (NumLaneElts
/ 2) + l
+ (i
/ 2) + NumElts
* (i
% 2);
2677 Rep
= Builder
.CreateShuffleVector(Op0
, Op1
, Idxs
);
2679 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
2680 CI
->getArgOperand(2));
2681 } else if (IsX86
&& (Name
.startswith("avx512.mask.and.") ||
2682 Name
.startswith("avx512.mask.pand."))) {
2683 VectorType
*FTy
= cast
<VectorType
>(CI
->getType());
2684 VectorType
*ITy
= VectorType::getInteger(FTy
);
2685 Rep
= Builder
.CreateAnd(Builder
.CreateBitCast(CI
->getArgOperand(0), ITy
),
2686 Builder
.CreateBitCast(CI
->getArgOperand(1), ITy
));
2687 Rep
= Builder
.CreateBitCast(Rep
, FTy
);
2688 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
2689 CI
->getArgOperand(2));
2690 } else if (IsX86
&& (Name
.startswith("avx512.mask.andn.") ||
2691 Name
.startswith("avx512.mask.pandn."))) {
2692 VectorType
*FTy
= cast
<VectorType
>(CI
->getType());
2693 VectorType
*ITy
= VectorType::getInteger(FTy
);
2694 Rep
= Builder
.CreateNot(Builder
.CreateBitCast(CI
->getArgOperand(0), ITy
));
2695 Rep
= Builder
.CreateAnd(Rep
,
2696 Builder
.CreateBitCast(CI
->getArgOperand(1), ITy
));
2697 Rep
= Builder
.CreateBitCast(Rep
, FTy
);
2698 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
2699 CI
->getArgOperand(2));
2700 } else if (IsX86
&& (Name
.startswith("avx512.mask.or.") ||
2701 Name
.startswith("avx512.mask.por."))) {
2702 VectorType
*FTy
= cast
<VectorType
>(CI
->getType());
2703 VectorType
*ITy
= VectorType::getInteger(FTy
);
2704 Rep
= Builder
.CreateOr(Builder
.CreateBitCast(CI
->getArgOperand(0), ITy
),
2705 Builder
.CreateBitCast(CI
->getArgOperand(1), ITy
));
2706 Rep
= Builder
.CreateBitCast(Rep
, FTy
);
2707 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
2708 CI
->getArgOperand(2));
2709 } else if (IsX86
&& (Name
.startswith("avx512.mask.xor.") ||
2710 Name
.startswith("avx512.mask.pxor."))) {
2711 VectorType
*FTy
= cast
<VectorType
>(CI
->getType());
2712 VectorType
*ITy
= VectorType::getInteger(FTy
);
2713 Rep
= Builder
.CreateXor(Builder
.CreateBitCast(CI
->getArgOperand(0), ITy
),
2714 Builder
.CreateBitCast(CI
->getArgOperand(1), ITy
));
2715 Rep
= Builder
.CreateBitCast(Rep
, FTy
);
2716 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
2717 CI
->getArgOperand(2));
2718 } else if (IsX86
&& Name
.startswith("avx512.mask.padd.")) {
2719 Rep
= Builder
.CreateAdd(CI
->getArgOperand(0), CI
->getArgOperand(1));
2720 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
2721 CI
->getArgOperand(2));
2722 } else if (IsX86
&& Name
.startswith("avx512.mask.psub.")) {
2723 Rep
= Builder
.CreateSub(CI
->getArgOperand(0), CI
->getArgOperand(1));
2724 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
2725 CI
->getArgOperand(2));
2726 } else if (IsX86
&& Name
.startswith("avx512.mask.pmull.")) {
2727 Rep
= Builder
.CreateMul(CI
->getArgOperand(0), CI
->getArgOperand(1));
2728 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
2729 CI
->getArgOperand(2));
2730 } else if (IsX86
&& Name
.startswith("avx512.mask.add.p")) {
2731 if (Name
.endswith(".512")) {
2733 if (Name
[17] == 's')
2734 IID
= Intrinsic::x86_avx512_add_ps_512
;
2736 IID
= Intrinsic::x86_avx512_add_pd_512
;
2738 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(F
->getParent(), IID
),
2739 { CI
->getArgOperand(0), CI
->getArgOperand(1),
2740 CI
->getArgOperand(4) });
2742 Rep
= Builder
.CreateFAdd(CI
->getArgOperand(0), CI
->getArgOperand(1));
2744 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
2745 CI
->getArgOperand(2));
2746 } else if (IsX86
&& Name
.startswith("avx512.mask.div.p")) {
2747 if (Name
.endswith(".512")) {
2749 if (Name
[17] == 's')
2750 IID
= Intrinsic::x86_avx512_div_ps_512
;
2752 IID
= Intrinsic::x86_avx512_div_pd_512
;
2754 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(F
->getParent(), IID
),
2755 { CI
->getArgOperand(0), CI
->getArgOperand(1),
2756 CI
->getArgOperand(4) });
2758 Rep
= Builder
.CreateFDiv(CI
->getArgOperand(0), CI
->getArgOperand(1));
2760 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
2761 CI
->getArgOperand(2));
2762 } else if (IsX86
&& Name
.startswith("avx512.mask.mul.p")) {
2763 if (Name
.endswith(".512")) {
2765 if (Name
[17] == 's')
2766 IID
= Intrinsic::x86_avx512_mul_ps_512
;
2768 IID
= Intrinsic::x86_avx512_mul_pd_512
;
2770 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(F
->getParent(), IID
),
2771 { CI
->getArgOperand(0), CI
->getArgOperand(1),
2772 CI
->getArgOperand(4) });
2774 Rep
= Builder
.CreateFMul(CI
->getArgOperand(0), CI
->getArgOperand(1));
2776 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
2777 CI
->getArgOperand(2));
2778 } else if (IsX86
&& Name
.startswith("avx512.mask.sub.p")) {
2779 if (Name
.endswith(".512")) {
2781 if (Name
[17] == 's')
2782 IID
= Intrinsic::x86_avx512_sub_ps_512
;
2784 IID
= Intrinsic::x86_avx512_sub_pd_512
;
2786 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(F
->getParent(), IID
),
2787 { CI
->getArgOperand(0), CI
->getArgOperand(1),
2788 CI
->getArgOperand(4) });
2790 Rep
= Builder
.CreateFSub(CI
->getArgOperand(0), CI
->getArgOperand(1));
2792 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
2793 CI
->getArgOperand(2));
2794 } else if (IsX86
&& (Name
.startswith("avx512.mask.max.p") ||
2795 Name
.startswith("avx512.mask.min.p")) &&
2796 Name
.drop_front(18) == ".512") {
2797 bool IsDouble
= Name
[17] == 'd';
2798 bool IsMin
= Name
[13] == 'i';
2799 static const Intrinsic::ID MinMaxTbl
[2][2] = {
2800 { Intrinsic::x86_avx512_max_ps_512
, Intrinsic::x86_avx512_max_pd_512
},
2801 { Intrinsic::x86_avx512_min_ps_512
, Intrinsic::x86_avx512_min_pd_512
}
2803 Intrinsic::ID IID
= MinMaxTbl
[IsMin
][IsDouble
];
2805 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(F
->getParent(), IID
),
2806 { CI
->getArgOperand(0), CI
->getArgOperand(1),
2807 CI
->getArgOperand(4) });
2808 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
2809 CI
->getArgOperand(2));
2810 } else if (IsX86
&& Name
.startswith("avx512.mask.lzcnt.")) {
2811 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(F
->getParent(),
2814 { CI
->getArgOperand(0), Builder
.getInt1(false) });
2815 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(2), Rep
,
2816 CI
->getArgOperand(1));
2817 } else if (IsX86
&& Name
.startswith("avx512.mask.psll")) {
2818 bool IsImmediate
= Name
[16] == 'i' ||
2819 (Name
.size() > 18 && Name
[18] == 'i');
2820 bool IsVariable
= Name
[16] == 'v';
2821 char Size
= Name
[16] == '.' ? Name
[17] :
2822 Name
[17] == '.' ? Name
[18] :
2823 Name
[18] == '.' ? Name
[19] :
2827 if (IsVariable
&& Name
[17] != '.') {
2828 if (Size
== 'd' && Name
[17] == '2') // avx512.mask.psllv2.di
2829 IID
= Intrinsic::x86_avx2_psllv_q
;
2830 else if (Size
== 'd' && Name
[17] == '4') // avx512.mask.psllv4.di
2831 IID
= Intrinsic::x86_avx2_psllv_q_256
;
2832 else if (Size
== 's' && Name
[17] == '4') // avx512.mask.psllv4.si
2833 IID
= Intrinsic::x86_avx2_psllv_d
;
2834 else if (Size
== 's' && Name
[17] == '8') // avx512.mask.psllv8.si
2835 IID
= Intrinsic::x86_avx2_psllv_d_256
;
2836 else if (Size
== 'h' && Name
[17] == '8') // avx512.mask.psllv8.hi
2837 IID
= Intrinsic::x86_avx512_psllv_w_128
;
2838 else if (Size
== 'h' && Name
[17] == '1') // avx512.mask.psllv16.hi
2839 IID
= Intrinsic::x86_avx512_psllv_w_256
;
2840 else if (Name
[17] == '3' && Name
[18] == '2') // avx512.mask.psllv32hi
2841 IID
= Intrinsic::x86_avx512_psllv_w_512
;
2843 llvm_unreachable("Unexpected size");
2844 } else if (Name
.endswith(".128")) {
2845 if (Size
== 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128
2846 IID
= IsImmediate
? Intrinsic::x86_sse2_pslli_d
2847 : Intrinsic::x86_sse2_psll_d
;
2848 else if (Size
== 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.128
2849 IID
= IsImmediate
? Intrinsic::x86_sse2_pslli_q
2850 : Intrinsic::x86_sse2_psll_q
;
2851 else if (Size
== 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.128
2852 IID
= IsImmediate
? Intrinsic::x86_sse2_pslli_w
2853 : Intrinsic::x86_sse2_psll_w
;
2855 llvm_unreachable("Unexpected size");
2856 } else if (Name
.endswith(".256")) {
2857 if (Size
== 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256
2858 IID
= IsImmediate
? Intrinsic::x86_avx2_pslli_d
2859 : Intrinsic::x86_avx2_psll_d
;
2860 else if (Size
== 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.256
2861 IID
= IsImmediate
? Intrinsic::x86_avx2_pslli_q
2862 : Intrinsic::x86_avx2_psll_q
;
2863 else if (Size
== 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.256
2864 IID
= IsImmediate
? Intrinsic::x86_avx2_pslli_w
2865 : Intrinsic::x86_avx2_psll_w
;
2867 llvm_unreachable("Unexpected size");
2869 if (Size
== 'd') // psll.di.512, pslli.d, psll.d, psllv.d.512
2870 IID
= IsImmediate
? Intrinsic::x86_avx512_pslli_d_512
:
2871 IsVariable
? Intrinsic::x86_avx512_psllv_d_512
:
2872 Intrinsic::x86_avx512_psll_d_512
;
2873 else if (Size
== 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.512
2874 IID
= IsImmediate
? Intrinsic::x86_avx512_pslli_q_512
:
2875 IsVariable
? Intrinsic::x86_avx512_psllv_q_512
:
2876 Intrinsic::x86_avx512_psll_q_512
;
2877 else if (Size
== 'w') // psll.wi.512, pslli.w, psll.w
2878 IID
= IsImmediate
? Intrinsic::x86_avx512_pslli_w_512
2879 : Intrinsic::x86_avx512_psll_w_512
;
2881 llvm_unreachable("Unexpected size");
2884 Rep
= UpgradeX86MaskedShift(Builder
, *CI
, IID
);
2885 } else if (IsX86
&& Name
.startswith("avx512.mask.psrl")) {
2886 bool IsImmediate
= Name
[16] == 'i' ||
2887 (Name
.size() > 18 && Name
[18] == 'i');
2888 bool IsVariable
= Name
[16] == 'v';
2889 char Size
= Name
[16] == '.' ? Name
[17] :
2890 Name
[17] == '.' ? Name
[18] :
2891 Name
[18] == '.' ? Name
[19] :
2895 if (IsVariable
&& Name
[17] != '.') {
2896 if (Size
== 'd' && Name
[17] == '2') // avx512.mask.psrlv2.di
2897 IID
= Intrinsic::x86_avx2_psrlv_q
;
2898 else if (Size
== 'd' && Name
[17] == '4') // avx512.mask.psrlv4.di
2899 IID
= Intrinsic::x86_avx2_psrlv_q_256
;
2900 else if (Size
== 's' && Name
[17] == '4') // avx512.mask.psrlv4.si
2901 IID
= Intrinsic::x86_avx2_psrlv_d
;
2902 else if (Size
== 's' && Name
[17] == '8') // avx512.mask.psrlv8.si
2903 IID
= Intrinsic::x86_avx2_psrlv_d_256
;
2904 else if (Size
== 'h' && Name
[17] == '8') // avx512.mask.psrlv8.hi
2905 IID
= Intrinsic::x86_avx512_psrlv_w_128
;
2906 else if (Size
== 'h' && Name
[17] == '1') // avx512.mask.psrlv16.hi
2907 IID
= Intrinsic::x86_avx512_psrlv_w_256
;
2908 else if (Name
[17] == '3' && Name
[18] == '2') // avx512.mask.psrlv32hi
2909 IID
= Intrinsic::x86_avx512_psrlv_w_512
;
2911 llvm_unreachable("Unexpected size");
2912 } else if (Name
.endswith(".128")) {
2913 if (Size
== 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128
2914 IID
= IsImmediate
? Intrinsic::x86_sse2_psrli_d
2915 : Intrinsic::x86_sse2_psrl_d
;
2916 else if (Size
== 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.128
2917 IID
= IsImmediate
? Intrinsic::x86_sse2_psrli_q
2918 : Intrinsic::x86_sse2_psrl_q
;
2919 else if (Size
== 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.128
2920 IID
= IsImmediate
? Intrinsic::x86_sse2_psrli_w
2921 : Intrinsic::x86_sse2_psrl_w
;
2923 llvm_unreachable("Unexpected size");
2924 } else if (Name
.endswith(".256")) {
2925 if (Size
== 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256
2926 IID
= IsImmediate
? Intrinsic::x86_avx2_psrli_d
2927 : Intrinsic::x86_avx2_psrl_d
;
2928 else if (Size
== 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.256
2929 IID
= IsImmediate
? Intrinsic::x86_avx2_psrli_q
2930 : Intrinsic::x86_avx2_psrl_q
;
2931 else if (Size
== 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.256
2932 IID
= IsImmediate
? Intrinsic::x86_avx2_psrli_w
2933 : Intrinsic::x86_avx2_psrl_w
;
2935 llvm_unreachable("Unexpected size");
2937 if (Size
== 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.512
2938 IID
= IsImmediate
? Intrinsic::x86_avx512_psrli_d_512
:
2939 IsVariable
? Intrinsic::x86_avx512_psrlv_d_512
:
2940 Intrinsic::x86_avx512_psrl_d_512
;
2941 else if (Size
== 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.512
2942 IID
= IsImmediate
? Intrinsic::x86_avx512_psrli_q_512
:
2943 IsVariable
? Intrinsic::x86_avx512_psrlv_q_512
:
2944 Intrinsic::x86_avx512_psrl_q_512
;
2945 else if (Size
== 'w') // psrl.wi.512, psrli.w, psrl.w)
2946 IID
= IsImmediate
? Intrinsic::x86_avx512_psrli_w_512
2947 : Intrinsic::x86_avx512_psrl_w_512
;
2949 llvm_unreachable("Unexpected size");
2952 Rep
= UpgradeX86MaskedShift(Builder
, *CI
, IID
);
2953 } else if (IsX86
&& Name
.startswith("avx512.mask.psra")) {
2954 bool IsImmediate
= Name
[16] == 'i' ||
2955 (Name
.size() > 18 && Name
[18] == 'i');
2956 bool IsVariable
= Name
[16] == 'v';
2957 char Size
= Name
[16] == '.' ? Name
[17] :
2958 Name
[17] == '.' ? Name
[18] :
2959 Name
[18] == '.' ? Name
[19] :
2963 if (IsVariable
&& Name
[17] != '.') {
2964 if (Size
== 's' && Name
[17] == '4') // avx512.mask.psrav4.si
2965 IID
= Intrinsic::x86_avx2_psrav_d
;
2966 else if (Size
== 's' && Name
[17] == '8') // avx512.mask.psrav8.si
2967 IID
= Intrinsic::x86_avx2_psrav_d_256
;
2968 else if (Size
== 'h' && Name
[17] == '8') // avx512.mask.psrav8.hi
2969 IID
= Intrinsic::x86_avx512_psrav_w_128
;
2970 else if (Size
== 'h' && Name
[17] == '1') // avx512.mask.psrav16.hi
2971 IID
= Intrinsic::x86_avx512_psrav_w_256
;
2972 else if (Name
[17] == '3' && Name
[18] == '2') // avx512.mask.psrav32hi
2973 IID
= Intrinsic::x86_avx512_psrav_w_512
;
2975 llvm_unreachable("Unexpected size");
2976 } else if (Name
.endswith(".128")) {
2977 if (Size
== 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128
2978 IID
= IsImmediate
? Intrinsic::x86_sse2_psrai_d
2979 : Intrinsic::x86_sse2_psra_d
;
2980 else if (Size
== 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.128
2981 IID
= IsImmediate
? Intrinsic::x86_avx512_psrai_q_128
:
2982 IsVariable
? Intrinsic::x86_avx512_psrav_q_128
:
2983 Intrinsic::x86_avx512_psra_q_128
;
2984 else if (Size
== 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.128
2985 IID
= IsImmediate
? Intrinsic::x86_sse2_psrai_w
2986 : Intrinsic::x86_sse2_psra_w
;
2988 llvm_unreachable("Unexpected size");
2989 } else if (Name
.endswith(".256")) {
2990 if (Size
== 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256
2991 IID
= IsImmediate
? Intrinsic::x86_avx2_psrai_d
2992 : Intrinsic::x86_avx2_psra_d
;
2993 else if (Size
== 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.256
2994 IID
= IsImmediate
? Intrinsic::x86_avx512_psrai_q_256
:
2995 IsVariable
? Intrinsic::x86_avx512_psrav_q_256
:
2996 Intrinsic::x86_avx512_psra_q_256
;
2997 else if (Size
== 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.256
2998 IID
= IsImmediate
? Intrinsic::x86_avx2_psrai_w
2999 : Intrinsic::x86_avx2_psra_w
;
3001 llvm_unreachable("Unexpected size");
3003 if (Size
== 'd') // psra.di.512, psrai.d, psra.d, psrav.d.512
3004 IID
= IsImmediate
? Intrinsic::x86_avx512_psrai_d_512
:
3005 IsVariable
? Intrinsic::x86_avx512_psrav_d_512
:
3006 Intrinsic::x86_avx512_psra_d_512
;
3007 else if (Size
== 'q') // psra.qi.512, psrai.q, psra.q
3008 IID
= IsImmediate
? Intrinsic::x86_avx512_psrai_q_512
:
3009 IsVariable
? Intrinsic::x86_avx512_psrav_q_512
:
3010 Intrinsic::x86_avx512_psra_q_512
;
3011 else if (Size
== 'w') // psra.wi.512, psrai.w, psra.w
3012 IID
= IsImmediate
? Intrinsic::x86_avx512_psrai_w_512
3013 : Intrinsic::x86_avx512_psra_w_512
;
3015 llvm_unreachable("Unexpected size");
3018 Rep
= UpgradeX86MaskedShift(Builder
, *CI
, IID
);
3019 } else if (IsX86
&& Name
.startswith("avx512.mask.move.s")) {
3020 Rep
= upgradeMaskedMove(Builder
, *CI
);
3021 } else if (IsX86
&& Name
.startswith("avx512.cvtmask2")) {
3022 Rep
= UpgradeMaskToInt(Builder
, *CI
);
3023 } else if (IsX86
&& Name
.endswith(".movntdqa")) {
3024 Module
*M
= F
->getParent();
3025 MDNode
*Node
= MDNode::get(
3026 C
, ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C
), 1)));
3028 Value
*Ptr
= CI
->getArgOperand(0);
3029 VectorType
*VTy
= cast
<VectorType
>(CI
->getType());
3031 // Convert the type of the pointer to a pointer to the stored type.
3033 Builder
.CreateBitCast(Ptr
, PointerType::getUnqual(VTy
), "cast");
3034 LoadInst
*LI
= Builder
.CreateAlignedLoad(VTy
, BC
, VTy
->getBitWidth() / 8);
3035 LI
->setMetadata(M
->getMDKindID("nontemporal"), Node
);
3037 } else if (IsX86
&& (Name
.startswith("fma.vfmadd.") ||
3038 Name
.startswith("fma.vfmsub.") ||
3039 Name
.startswith("fma.vfnmadd.") ||
3040 Name
.startswith("fma.vfnmsub."))) {
3041 bool NegMul
= Name
[6] == 'n';
3042 bool NegAcc
= NegMul
? Name
[8] == 's' : Name
[7] == 's';
3043 bool IsScalar
= NegMul
? Name
[12] == 's' : Name
[11] == 's';
3045 Value
*Ops
[] = { CI
->getArgOperand(0), CI
->getArgOperand(1),
3046 CI
->getArgOperand(2) };
3049 Ops
[0] = Builder
.CreateExtractElement(Ops
[0], (uint64_t)0);
3050 Ops
[1] = Builder
.CreateExtractElement(Ops
[1], (uint64_t)0);
3051 Ops
[2] = Builder
.CreateExtractElement(Ops
[2], (uint64_t)0);
3054 if (NegMul
&& !IsScalar
)
3055 Ops
[0] = Builder
.CreateFNeg(Ops
[0]);
3056 if (NegMul
&& IsScalar
)
3057 Ops
[1] = Builder
.CreateFNeg(Ops
[1]);
3059 Ops
[2] = Builder
.CreateFNeg(Ops
[2]);
3061 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(CI
->getModule(),
3067 Rep
= Builder
.CreateInsertElement(CI
->getArgOperand(0), Rep
,
3069 } else if (IsX86
&& Name
.startswith("fma4.vfmadd.s")) {
3070 Value
*Ops
[] = { CI
->getArgOperand(0), CI
->getArgOperand(1),
3071 CI
->getArgOperand(2) };
3073 Ops
[0] = Builder
.CreateExtractElement(Ops
[0], (uint64_t)0);
3074 Ops
[1] = Builder
.CreateExtractElement(Ops
[1], (uint64_t)0);
3075 Ops
[2] = Builder
.CreateExtractElement(Ops
[2], (uint64_t)0);
3077 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(CI
->getModule(),
3082 Rep
= Builder
.CreateInsertElement(Constant::getNullValue(CI
->getType()),
3084 } else if (IsX86
&& (Name
.startswith("avx512.mask.vfmadd.s") ||
3085 Name
.startswith("avx512.maskz.vfmadd.s") ||
3086 Name
.startswith("avx512.mask3.vfmadd.s") ||
3087 Name
.startswith("avx512.mask3.vfmsub.s") ||
3088 Name
.startswith("avx512.mask3.vfnmsub.s"))) {
3089 bool IsMask3
= Name
[11] == '3';
3090 bool IsMaskZ
= Name
[11] == 'z';
3091 // Drop the "avx512.mask." to make it easier.
3092 Name
= Name
.drop_front(IsMask3
|| IsMaskZ
? 13 : 12);
3093 bool NegMul
= Name
[2] == 'n';
3094 bool NegAcc
= NegMul
? Name
[4] == 's' : Name
[3] == 's';
3096 Value
*A
= CI
->getArgOperand(0);
3097 Value
*B
= CI
->getArgOperand(1);
3098 Value
*C
= CI
->getArgOperand(2);
3100 if (NegMul
&& (IsMask3
|| IsMaskZ
))
3101 A
= Builder
.CreateFNeg(A
);
3102 if (NegMul
&& !(IsMask3
|| IsMaskZ
))
3103 B
= Builder
.CreateFNeg(B
);
3105 C
= Builder
.CreateFNeg(C
);
3107 A
= Builder
.CreateExtractElement(A
, (uint64_t)0);
3108 B
= Builder
.CreateExtractElement(B
, (uint64_t)0);
3109 C
= Builder
.CreateExtractElement(C
, (uint64_t)0);
3111 if (!isa
<ConstantInt
>(CI
->getArgOperand(4)) ||
3112 cast
<ConstantInt
>(CI
->getArgOperand(4))->getZExtValue() != 4) {
3113 Value
*Ops
[] = { A
, B
, C
, CI
->getArgOperand(4) };
3116 if (Name
.back() == 'd')
3117 IID
= Intrinsic::x86_avx512_vfmadd_f64
;
3119 IID
= Intrinsic::x86_avx512_vfmadd_f32
;
3120 Function
*FMA
= Intrinsic::getDeclaration(CI
->getModule(), IID
);
3121 Rep
= Builder
.CreateCall(FMA
, Ops
);
3123 Function
*FMA
= Intrinsic::getDeclaration(CI
->getModule(),
3126 Rep
= Builder
.CreateCall(FMA
, { A
, B
, C
});
3129 Value
*PassThru
= IsMaskZ
? Constant::getNullValue(Rep
->getType()) :
3132 // For Mask3 with NegAcc, we need to create a new extractelement that
3133 // avoids the negation above.
3134 if (NegAcc
&& IsMask3
)
3135 PassThru
= Builder
.CreateExtractElement(CI
->getArgOperand(2),
3138 Rep
= EmitX86ScalarSelect(Builder
, CI
->getArgOperand(3),
3140 Rep
= Builder
.CreateInsertElement(CI
->getArgOperand(IsMask3
? 2 : 0),
3142 } else if (IsX86
&& (Name
.startswith("avx512.mask.vfmadd.p") ||
3143 Name
.startswith("avx512.mask.vfnmadd.p") ||
3144 Name
.startswith("avx512.mask.vfnmsub.p") ||
3145 Name
.startswith("avx512.mask3.vfmadd.p") ||
3146 Name
.startswith("avx512.mask3.vfmsub.p") ||
3147 Name
.startswith("avx512.mask3.vfnmsub.p") ||
3148 Name
.startswith("avx512.maskz.vfmadd.p"))) {
3149 bool IsMask3
= Name
[11] == '3';
3150 bool IsMaskZ
= Name
[11] == 'z';
3151 // Drop the "avx512.mask." to make it easier.
3152 Name
= Name
.drop_front(IsMask3
|| IsMaskZ
? 13 : 12);
3153 bool NegMul
= Name
[2] == 'n';
3154 bool NegAcc
= NegMul
? Name
[4] == 's' : Name
[3] == 's';
3156 Value
*A
= CI
->getArgOperand(0);
3157 Value
*B
= CI
->getArgOperand(1);
3158 Value
*C
= CI
->getArgOperand(2);
3160 if (NegMul
&& (IsMask3
|| IsMaskZ
))
3161 A
= Builder
.CreateFNeg(A
);
3162 if (NegMul
&& !(IsMask3
|| IsMaskZ
))
3163 B
= Builder
.CreateFNeg(B
);
3165 C
= Builder
.CreateFNeg(C
);
3167 if (CI
->getNumArgOperands() == 5 &&
3168 (!isa
<ConstantInt
>(CI
->getArgOperand(4)) ||
3169 cast
<ConstantInt
>(CI
->getArgOperand(4))->getZExtValue() != 4)) {
3171 // Check the character before ".512" in string.
3172 if (Name
[Name
.size()-5] == 's')
3173 IID
= Intrinsic::x86_avx512_vfmadd_ps_512
;
3175 IID
= Intrinsic::x86_avx512_vfmadd_pd_512
;
3177 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(F
->getParent(), IID
),
3178 { A
, B
, C
, CI
->getArgOperand(4) });
3180 Function
*FMA
= Intrinsic::getDeclaration(CI
->getModule(),
3183 Rep
= Builder
.CreateCall(FMA
, { A
, B
, C
});
3186 Value
*PassThru
= IsMaskZ
? llvm::Constant::getNullValue(CI
->getType()) :
3187 IsMask3
? CI
->getArgOperand(2) :
3188 CI
->getArgOperand(0);
3190 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
, PassThru
);
3191 } else if (IsX86
&& (Name
.startswith("fma.vfmaddsub.p") ||
3192 Name
.startswith("fma.vfmsubadd.p"))) {
3193 bool IsSubAdd
= Name
[7] == 's';
3194 int NumElts
= CI
->getType()->getVectorNumElements();
3196 Value
*Ops
[] = { CI
->getArgOperand(0), CI
->getArgOperand(1),
3197 CI
->getArgOperand(2) };
3199 Function
*FMA
= Intrinsic::getDeclaration(CI
->getModule(), Intrinsic::fma
,
3201 Value
*Odd
= Builder
.CreateCall(FMA
, Ops
);
3202 Ops
[2] = Builder
.CreateFNeg(Ops
[2]);
3203 Value
*Even
= Builder
.CreateCall(FMA
, Ops
);
3206 std::swap(Even
, Odd
);
3208 SmallVector
<uint32_t, 32> Idxs(NumElts
);
3209 for (int i
= 0; i
!= NumElts
; ++i
)
3210 Idxs
[i
] = i
+ (i
% 2) * NumElts
;
3212 Rep
= Builder
.CreateShuffleVector(Even
, Odd
, Idxs
);
3213 } else if (IsX86
&& (Name
.startswith("avx512.mask.vfmaddsub.p") ||
3214 Name
.startswith("avx512.mask3.vfmaddsub.p") ||
3215 Name
.startswith("avx512.maskz.vfmaddsub.p") ||
3216 Name
.startswith("avx512.mask3.vfmsubadd.p"))) {
3217 bool IsMask3
= Name
[11] == '3';
3218 bool IsMaskZ
= Name
[11] == 'z';
3219 // Drop the "avx512.mask." to make it easier.
3220 Name
= Name
.drop_front(IsMask3
|| IsMaskZ
? 13 : 12);
3221 bool IsSubAdd
= Name
[3] == 's';
3222 if (CI
->getNumArgOperands() == 5 &&
3223 (!isa
<ConstantInt
>(CI
->getArgOperand(4)) ||
3224 cast
<ConstantInt
>(CI
->getArgOperand(4))->getZExtValue() != 4)) {
3226 // Check the character before ".512" in string.
3227 if (Name
[Name
.size()-5] == 's')
3228 IID
= Intrinsic::x86_avx512_vfmaddsub_ps_512
;
3230 IID
= Intrinsic::x86_avx512_vfmaddsub_pd_512
;
3232 Value
*Ops
[] = { CI
->getArgOperand(0), CI
->getArgOperand(1),
3233 CI
->getArgOperand(2), CI
->getArgOperand(4) };
3235 Ops
[2] = Builder
.CreateFNeg(Ops
[2]);
3237 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(F
->getParent(), IID
),
3238 {CI
->getArgOperand(0), CI
->getArgOperand(1),
3239 CI
->getArgOperand(2), CI
->getArgOperand(4)});
3241 int NumElts
= CI
->getType()->getVectorNumElements();
3243 Value
*Ops
[] = { CI
->getArgOperand(0), CI
->getArgOperand(1),
3244 CI
->getArgOperand(2) };
3246 Function
*FMA
= Intrinsic::getDeclaration(CI
->getModule(), Intrinsic::fma
,
3248 Value
*Odd
= Builder
.CreateCall(FMA
, Ops
);
3249 Ops
[2] = Builder
.CreateFNeg(Ops
[2]);
3250 Value
*Even
= Builder
.CreateCall(FMA
, Ops
);
3253 std::swap(Even
, Odd
);
3255 SmallVector
<uint32_t, 32> Idxs(NumElts
);
3256 for (int i
= 0; i
!= NumElts
; ++i
)
3257 Idxs
[i
] = i
+ (i
% 2) * NumElts
;
3259 Rep
= Builder
.CreateShuffleVector(Even
, Odd
, Idxs
);
3262 Value
*PassThru
= IsMaskZ
? llvm::Constant::getNullValue(CI
->getType()) :
3263 IsMask3
? CI
->getArgOperand(2) :
3264 CI
->getArgOperand(0);
3266 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
, PassThru
);
3267 } else if (IsX86
&& (Name
.startswith("avx512.mask.pternlog.") ||
3268 Name
.startswith("avx512.maskz.pternlog."))) {
3269 bool ZeroMask
= Name
[11] == 'z';
3270 unsigned VecWidth
= CI
->getType()->getPrimitiveSizeInBits();
3271 unsigned EltWidth
= CI
->getType()->getScalarSizeInBits();
3273 if (VecWidth
== 128 && EltWidth
== 32)
3274 IID
= Intrinsic::x86_avx512_pternlog_d_128
;
3275 else if (VecWidth
== 256 && EltWidth
== 32)
3276 IID
= Intrinsic::x86_avx512_pternlog_d_256
;
3277 else if (VecWidth
== 512 && EltWidth
== 32)
3278 IID
= Intrinsic::x86_avx512_pternlog_d_512
;
3279 else if (VecWidth
== 128 && EltWidth
== 64)
3280 IID
= Intrinsic::x86_avx512_pternlog_q_128
;
3281 else if (VecWidth
== 256 && EltWidth
== 64)
3282 IID
= Intrinsic::x86_avx512_pternlog_q_256
;
3283 else if (VecWidth
== 512 && EltWidth
== 64)
3284 IID
= Intrinsic::x86_avx512_pternlog_q_512
;
3286 llvm_unreachable("Unexpected intrinsic");
3288 Value
*Args
[] = { CI
->getArgOperand(0) , CI
->getArgOperand(1),
3289 CI
->getArgOperand(2), CI
->getArgOperand(3) };
3290 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(CI
->getModule(), IID
),
3292 Value
*PassThru
= ZeroMask
? ConstantAggregateZero::get(CI
->getType())
3293 : CI
->getArgOperand(0);
3294 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(4), Rep
, PassThru
);
3295 } else if (IsX86
&& (Name
.startswith("avx512.mask.vpmadd52") ||
3296 Name
.startswith("avx512.maskz.vpmadd52"))) {
3297 bool ZeroMask
= Name
[11] == 'z';
3298 bool High
= Name
[20] == 'h' || Name
[21] == 'h';
3299 unsigned VecWidth
= CI
->getType()->getPrimitiveSizeInBits();
3301 if (VecWidth
== 128 && !High
)
3302 IID
= Intrinsic::x86_avx512_vpmadd52l_uq_128
;
3303 else if (VecWidth
== 256 && !High
)
3304 IID
= Intrinsic::x86_avx512_vpmadd52l_uq_256
;
3305 else if (VecWidth
== 512 && !High
)
3306 IID
= Intrinsic::x86_avx512_vpmadd52l_uq_512
;
3307 else if (VecWidth
== 128 && High
)
3308 IID
= Intrinsic::x86_avx512_vpmadd52h_uq_128
;
3309 else if (VecWidth
== 256 && High
)
3310 IID
= Intrinsic::x86_avx512_vpmadd52h_uq_256
;
3311 else if (VecWidth
== 512 && High
)
3312 IID
= Intrinsic::x86_avx512_vpmadd52h_uq_512
;
3314 llvm_unreachable("Unexpected intrinsic");
3316 Value
*Args
[] = { CI
->getArgOperand(0) , CI
->getArgOperand(1),
3317 CI
->getArgOperand(2) };
3318 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(CI
->getModule(), IID
),
3320 Value
*PassThru
= ZeroMask
? ConstantAggregateZero::get(CI
->getType())
3321 : CI
->getArgOperand(0);
3322 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
, PassThru
);
3323 } else if (IsX86
&& (Name
.startswith("avx512.mask.vpermi2var.") ||
3324 Name
.startswith("avx512.mask.vpermt2var.") ||
3325 Name
.startswith("avx512.maskz.vpermt2var."))) {
3326 bool ZeroMask
= Name
[11] == 'z';
3327 bool IndexForm
= Name
[17] == 'i';
3328 Rep
= UpgradeX86VPERMT2Intrinsics(Builder
, *CI
, ZeroMask
, IndexForm
);
3329 } else if (IsX86
&& (Name
.startswith("avx512.mask.vpdpbusd.") ||
3330 Name
.startswith("avx512.maskz.vpdpbusd.") ||
3331 Name
.startswith("avx512.mask.vpdpbusds.") ||
3332 Name
.startswith("avx512.maskz.vpdpbusds."))) {
3333 bool ZeroMask
= Name
[11] == 'z';
3334 bool IsSaturating
= Name
[ZeroMask
? 21 : 20] == 's';
3335 unsigned VecWidth
= CI
->getType()->getPrimitiveSizeInBits();
3337 if (VecWidth
== 128 && !IsSaturating
)
3338 IID
= Intrinsic::x86_avx512_vpdpbusd_128
;
3339 else if (VecWidth
== 256 && !IsSaturating
)
3340 IID
= Intrinsic::x86_avx512_vpdpbusd_256
;
3341 else if (VecWidth
== 512 && !IsSaturating
)
3342 IID
= Intrinsic::x86_avx512_vpdpbusd_512
;
3343 else if (VecWidth
== 128 && IsSaturating
)
3344 IID
= Intrinsic::x86_avx512_vpdpbusds_128
;
3345 else if (VecWidth
== 256 && IsSaturating
)
3346 IID
= Intrinsic::x86_avx512_vpdpbusds_256
;
3347 else if (VecWidth
== 512 && IsSaturating
)
3348 IID
= Intrinsic::x86_avx512_vpdpbusds_512
;
3350 llvm_unreachable("Unexpected intrinsic");
3352 Value
*Args
[] = { CI
->getArgOperand(0), CI
->getArgOperand(1),
3353 CI
->getArgOperand(2) };
3354 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(CI
->getModule(), IID
),
3356 Value
*PassThru
= ZeroMask
? ConstantAggregateZero::get(CI
->getType())
3357 : CI
->getArgOperand(0);
3358 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
, PassThru
);
3359 } else if (IsX86
&& (Name
.startswith("avx512.mask.vpdpwssd.") ||
3360 Name
.startswith("avx512.maskz.vpdpwssd.") ||
3361 Name
.startswith("avx512.mask.vpdpwssds.") ||
3362 Name
.startswith("avx512.maskz.vpdpwssds."))) {
3363 bool ZeroMask
= Name
[11] == 'z';
3364 bool IsSaturating
= Name
[ZeroMask
? 21 : 20] == 's';
3365 unsigned VecWidth
= CI
->getType()->getPrimitiveSizeInBits();
3367 if (VecWidth
== 128 && !IsSaturating
)
3368 IID
= Intrinsic::x86_avx512_vpdpwssd_128
;
3369 else if (VecWidth
== 256 && !IsSaturating
)
3370 IID
= Intrinsic::x86_avx512_vpdpwssd_256
;
3371 else if (VecWidth
== 512 && !IsSaturating
)
3372 IID
= Intrinsic::x86_avx512_vpdpwssd_512
;
3373 else if (VecWidth
== 128 && IsSaturating
)
3374 IID
= Intrinsic::x86_avx512_vpdpwssds_128
;
3375 else if (VecWidth
== 256 && IsSaturating
)
3376 IID
= Intrinsic::x86_avx512_vpdpwssds_256
;
3377 else if (VecWidth
== 512 && IsSaturating
)
3378 IID
= Intrinsic::x86_avx512_vpdpwssds_512
;
3380 llvm_unreachable("Unexpected intrinsic");
3382 Value
*Args
[] = { CI
->getArgOperand(0), CI
->getArgOperand(1),
3383 CI
->getArgOperand(2) };
3384 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(CI
->getModule(), IID
),
3386 Value
*PassThru
= ZeroMask
? ConstantAggregateZero::get(CI
->getType())
3387 : CI
->getArgOperand(0);
3388 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
, PassThru
);
3389 } else if (IsX86
&& (Name
== "addcarryx.u32" || Name
== "addcarryx.u64" ||
3390 Name
== "addcarry.u32" || Name
== "addcarry.u64" ||
3391 Name
== "subborrow.u32" || Name
== "subborrow.u64")) {
3393 if (Name
[0] == 'a' && Name
.back() == '2')
3394 IID
= Intrinsic::x86_addcarry_32
;
3395 else if (Name
[0] == 'a' && Name
.back() == '4')
3396 IID
= Intrinsic::x86_addcarry_64
;
3397 else if (Name
[0] == 's' && Name
.back() == '2')
3398 IID
= Intrinsic::x86_subborrow_32
;
3399 else if (Name
[0] == 's' && Name
.back() == '4')
3400 IID
= Intrinsic::x86_subborrow_64
;
3402 llvm_unreachable("Unexpected intrinsic");
3404 // Make a call with 3 operands.
3405 Value
*Args
[] = { CI
->getArgOperand(0), CI
->getArgOperand(1),
3406 CI
->getArgOperand(2)};
3407 Value
*NewCall
= Builder
.CreateCall(
3408 Intrinsic::getDeclaration(CI
->getModule(), IID
),
3411 // Extract the second result and store it.
3412 Value
*Data
= Builder
.CreateExtractValue(NewCall
, 1);
3413 // Cast the pointer to the right type.
3414 Value
*Ptr
= Builder
.CreateBitCast(CI
->getArgOperand(3),
3415 llvm::PointerType::getUnqual(Data
->getType()));
3416 Builder
.CreateAlignedStore(Data
, Ptr
, 1);
3417 // Replace the original call result with the first result of the new call.
3418 Value
*CF
= Builder
.CreateExtractValue(NewCall
, 0);
3420 CI
->replaceAllUsesWith(CF
);
3422 } else if (IsX86
&& Name
.startswith("avx512.mask.") &&
3423 upgradeAVX512MaskToSelect(Name
, Builder
, *CI
, Rep
)) {
3424 // Rep will be updated by the call in the condition.
3425 } else if (IsNVVM
&& (Name
== "abs.i" || Name
== "abs.ll")) {
3426 Value
*Arg
= CI
->getArgOperand(0);
3427 Value
*Neg
= Builder
.CreateNeg(Arg
, "neg");
3428 Value
*Cmp
= Builder
.CreateICmpSGE(
3429 Arg
, llvm::Constant::getNullValue(Arg
->getType()), "abs.cond");
3430 Rep
= Builder
.CreateSelect(Cmp
, Arg
, Neg
, "abs");
3431 } else if (IsNVVM
&& (Name
.startswith("atomic.load.add.f32.p") ||
3432 Name
.startswith("atomic.load.add.f64.p"))) {
3433 Value
*Ptr
= CI
->getArgOperand(0);
3434 Value
*Val
= CI
->getArgOperand(1);
3435 Rep
= Builder
.CreateAtomicRMW(AtomicRMWInst::FAdd
, Ptr
, Val
,
3436 AtomicOrdering::SequentiallyConsistent
);
3437 } else if (IsNVVM
&& (Name
== "max.i" || Name
== "max.ll" ||
3438 Name
== "max.ui" || Name
== "max.ull")) {
3439 Value
*Arg0
= CI
->getArgOperand(0);
3440 Value
*Arg1
= CI
->getArgOperand(1);
3441 Value
*Cmp
= Name
.endswith(".ui") || Name
.endswith(".ull")
3442 ? Builder
.CreateICmpUGE(Arg0
, Arg1
, "max.cond")
3443 : Builder
.CreateICmpSGE(Arg0
, Arg1
, "max.cond");
3444 Rep
= Builder
.CreateSelect(Cmp
, Arg0
, Arg1
, "max");
3445 } else if (IsNVVM
&& (Name
== "min.i" || Name
== "min.ll" ||
3446 Name
== "min.ui" || Name
== "min.ull")) {
3447 Value
*Arg0
= CI
->getArgOperand(0);
3448 Value
*Arg1
= CI
->getArgOperand(1);
3449 Value
*Cmp
= Name
.endswith(".ui") || Name
.endswith(".ull")
3450 ? Builder
.CreateICmpULE(Arg0
, Arg1
, "min.cond")
3451 : Builder
.CreateICmpSLE(Arg0
, Arg1
, "min.cond");
3452 Rep
= Builder
.CreateSelect(Cmp
, Arg0
, Arg1
, "min");
3453 } else if (IsNVVM
&& Name
== "clz.ll") {
3454 // llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 and returns an i64.
3455 Value
*Arg
= CI
->getArgOperand(0);
3456 Value
*Ctlz
= Builder
.CreateCall(
3457 Intrinsic::getDeclaration(F
->getParent(), Intrinsic::ctlz
,
3459 {Arg
, Builder
.getFalse()}, "ctlz");
3460 Rep
= Builder
.CreateTrunc(Ctlz
, Builder
.getInt32Ty(), "ctlz.trunc");
3461 } else if (IsNVVM
&& Name
== "popc.ll") {
3462 // llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 and returns an
3464 Value
*Arg
= CI
->getArgOperand(0);
3465 Value
*Popc
= Builder
.CreateCall(
3466 Intrinsic::getDeclaration(F
->getParent(), Intrinsic::ctpop
,
3469 Rep
= Builder
.CreateTrunc(Popc
, Builder
.getInt32Ty(), "ctpop.trunc");
3470 } else if (IsNVVM
&& Name
== "h2f") {
3471 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(
3472 F
->getParent(), Intrinsic::convert_from_fp16
,
3473 {Builder
.getFloatTy()}),
3474 CI
->getArgOperand(0), "h2f");
3476 llvm_unreachable("Unknown function for CallInst upgrade.");
3480 CI
->replaceAllUsesWith(Rep
);
3481 CI
->eraseFromParent();
3485 const auto &DefaultCase
= [&NewFn
, &CI
]() -> void {
3486 // Handle generic mangling change, but nothing else
3488 (CI
->getCalledFunction()->getName() != NewFn
->getName()) &&
3489 "Unknown function for CallInst upgrade and isn't just a name change");
3490 CI
->setCalledFunction(NewFn
);
3492 CallInst
*NewCall
= nullptr;
3493 switch (NewFn
->getIntrinsicID()) {
3498 case Intrinsic::experimental_vector_reduce_v2_fmul
: {
3499 SmallVector
<Value
*, 2> Args
;
3501 Args
.push_back(ConstantFP::get(CI
->getOperand(0)->getType(), 1.0));
3503 Args
.push_back(CI
->getOperand(0));
3504 Args
.push_back(CI
->getOperand(1));
3505 NewCall
= Builder
.CreateCall(NewFn
, Args
);
3506 cast
<Instruction
>(NewCall
)->copyFastMathFlags(CI
);
3509 case Intrinsic::experimental_vector_reduce_v2_fadd
: {
3510 SmallVector
<Value
*, 2> Args
;
3512 Args
.push_back(Constant::getNullValue(CI
->getOperand(0)->getType()));
3514 Args
.push_back(CI
->getOperand(0));
3515 Args
.push_back(CI
->getOperand(1));
3516 NewCall
= Builder
.CreateCall(NewFn
, Args
);
3517 cast
<Instruction
>(NewCall
)->copyFastMathFlags(CI
);
3520 case Intrinsic::arm_neon_vld1
:
3521 case Intrinsic::arm_neon_vld2
:
3522 case Intrinsic::arm_neon_vld3
:
3523 case Intrinsic::arm_neon_vld4
:
3524 case Intrinsic::arm_neon_vld2lane
:
3525 case Intrinsic::arm_neon_vld3lane
:
3526 case Intrinsic::arm_neon_vld4lane
:
3527 case Intrinsic::arm_neon_vst1
:
3528 case Intrinsic::arm_neon_vst2
:
3529 case Intrinsic::arm_neon_vst3
:
3530 case Intrinsic::arm_neon_vst4
:
3531 case Intrinsic::arm_neon_vst2lane
:
3532 case Intrinsic::arm_neon_vst3lane
:
3533 case Intrinsic::arm_neon_vst4lane
: {
3534 SmallVector
<Value
*, 4> Args(CI
->arg_operands().begin(),
3535 CI
->arg_operands().end());
3536 NewCall
= Builder
.CreateCall(NewFn
, Args
);
3540 case Intrinsic::bitreverse
:
3541 NewCall
= Builder
.CreateCall(NewFn
, {CI
->getArgOperand(0)});
3544 case Intrinsic::ctlz
:
3545 case Intrinsic::cttz
:
3546 assert(CI
->getNumArgOperands() == 1 &&
3547 "Mismatch between function args and call args");
3549 Builder
.CreateCall(NewFn
, {CI
->getArgOperand(0), Builder
.getFalse()});
3552 case Intrinsic::objectsize
: {
3553 Value
*NullIsUnknownSize
= CI
->getNumArgOperands() == 2
3554 ? Builder
.getFalse()
3555 : CI
->getArgOperand(2);
3557 CI
->getNumArgOperands() < 4 ? Builder
.getFalse() : CI
->getArgOperand(3);
3558 NewCall
= Builder
.CreateCall(
3559 NewFn
, {CI
->getArgOperand(0), CI
->getArgOperand(1), NullIsUnknownSize
, Dynamic
});
3563 case Intrinsic::ctpop
:
3564 NewCall
= Builder
.CreateCall(NewFn
, {CI
->getArgOperand(0)});
3567 case Intrinsic::convert_from_fp16
:
3568 NewCall
= Builder
.CreateCall(NewFn
, {CI
->getArgOperand(0)});
3571 case Intrinsic::dbg_value
:
3572 // Upgrade from the old version that had an extra offset argument.
3573 assert(CI
->getNumArgOperands() == 4);
3574 // Drop nonzero offsets instead of attempting to upgrade them.
3575 if (auto *Offset
= dyn_cast_or_null
<Constant
>(CI
->getArgOperand(1)))
3576 if (Offset
->isZeroValue()) {
3577 NewCall
= Builder
.CreateCall(
3579 {CI
->getArgOperand(0), CI
->getArgOperand(2), CI
->getArgOperand(3)});
3582 CI
->eraseFromParent();
3585 case Intrinsic::x86_xop_vfrcz_ss
:
3586 case Intrinsic::x86_xop_vfrcz_sd
:
3587 NewCall
= Builder
.CreateCall(NewFn
, {CI
->getArgOperand(1)});
3590 case Intrinsic::x86_xop_vpermil2pd
:
3591 case Intrinsic::x86_xop_vpermil2ps
:
3592 case Intrinsic::x86_xop_vpermil2pd_256
:
3593 case Intrinsic::x86_xop_vpermil2ps_256
: {
3594 SmallVector
<Value
*, 4> Args(CI
->arg_operands().begin(),
3595 CI
->arg_operands().end());
3596 VectorType
*FltIdxTy
= cast
<VectorType
>(Args
[2]->getType());
3597 VectorType
*IntIdxTy
= VectorType::getInteger(FltIdxTy
);
3598 Args
[2] = Builder
.CreateBitCast(Args
[2], IntIdxTy
);
3599 NewCall
= Builder
.CreateCall(NewFn
, Args
);
3603 case Intrinsic::x86_sse41_ptestc
:
3604 case Intrinsic::x86_sse41_ptestz
:
3605 case Intrinsic::x86_sse41_ptestnzc
: {
3606 // The arguments for these intrinsics used to be v4f32, and changed
3607 // to v2i64. This is purely a nop, since those are bitwise intrinsics.
3608 // So, the only thing required is a bitcast for both arguments.
3609 // First, check the arguments have the old type.
3610 Value
*Arg0
= CI
->getArgOperand(0);
3611 if (Arg0
->getType() != VectorType::get(Type::getFloatTy(C
), 4))
3614 // Old intrinsic, add bitcasts
3615 Value
*Arg1
= CI
->getArgOperand(1);
3617 Type
*NewVecTy
= VectorType::get(Type::getInt64Ty(C
), 2);
3619 Value
*BC0
= Builder
.CreateBitCast(Arg0
, NewVecTy
, "cast");
3620 Value
*BC1
= Builder
.CreateBitCast(Arg1
, NewVecTy
, "cast");
3622 NewCall
= Builder
.CreateCall(NewFn
, {BC0
, BC1
});
3626 case Intrinsic::x86_rdtscp
: {
3627 // This used to take 1 arguments. If we have no arguments, it is already
3629 if (CI
->getNumOperands() == 0)
3632 NewCall
= Builder
.CreateCall(NewFn
);
3633 // Extract the second result and store it.
3634 Value
*Data
= Builder
.CreateExtractValue(NewCall
, 1);
3635 // Cast the pointer to the right type.
3636 Value
*Ptr
= Builder
.CreateBitCast(CI
->getArgOperand(0),
3637 llvm::PointerType::getUnqual(Data
->getType()));
3638 Builder
.CreateAlignedStore(Data
, Ptr
, 1);
3639 // Replace the original call result with the first result of the new call.
3640 Value
*TSC
= Builder
.CreateExtractValue(NewCall
, 0);
3642 std::string Name
= CI
->getName();
3643 if (!Name
.empty()) {
3644 CI
->setName(Name
+ ".old");
3645 NewCall
->setName(Name
);
3647 CI
->replaceAllUsesWith(TSC
);
3648 CI
->eraseFromParent();
3652 case Intrinsic::x86_sse41_insertps
:
3653 case Intrinsic::x86_sse41_dppd
:
3654 case Intrinsic::x86_sse41_dpps
:
3655 case Intrinsic::x86_sse41_mpsadbw
:
3656 case Intrinsic::x86_avx_dp_ps_256
:
3657 case Intrinsic::x86_avx2_mpsadbw
: {
3658 // Need to truncate the last argument from i32 to i8 -- this argument models
3659 // an inherently 8-bit immediate operand to these x86 instructions.
3660 SmallVector
<Value
*, 4> Args(CI
->arg_operands().begin(),
3661 CI
->arg_operands().end());
3663 // Replace the last argument with a trunc.
3664 Args
.back() = Builder
.CreateTrunc(Args
.back(), Type::getInt8Ty(C
), "trunc");
3665 NewCall
= Builder
.CreateCall(NewFn
, Args
);
3669 case Intrinsic::thread_pointer
: {
3670 NewCall
= Builder
.CreateCall(NewFn
, {});
3674 case Intrinsic::invariant_start
:
3675 case Intrinsic::invariant_end
:
3676 case Intrinsic::masked_load
:
3677 case Intrinsic::masked_store
:
3678 case Intrinsic::masked_gather
:
3679 case Intrinsic::masked_scatter
: {
3680 SmallVector
<Value
*, 4> Args(CI
->arg_operands().begin(),
3681 CI
->arg_operands().end());
3682 NewCall
= Builder
.CreateCall(NewFn
, Args
);
3686 case Intrinsic::memcpy
:
3687 case Intrinsic::memmove
:
3688 case Intrinsic::memset
: {
3689 // We have to make sure that the call signature is what we're expecting.
3690 // We only want to change the old signatures by removing the alignment arg:
3691 // @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i32, i1)
3692 // -> @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i1)
3693 // @llvm.memset...(i8*, i8, i[32|64], i32, i1)
3694 // -> @llvm.memset...(i8*, i8, i[32|64], i1)
3695 // Note: i8*'s in the above can be any pointer type
3696 if (CI
->getNumArgOperands() != 5) {
3700 // Remove alignment argument (3), and add alignment attributes to the
3701 // dest/src pointers.
3702 Value
*Args
[4] = {CI
->getArgOperand(0), CI
->getArgOperand(1),
3703 CI
->getArgOperand(2), CI
->getArgOperand(4)};
3704 NewCall
= Builder
.CreateCall(NewFn
, Args
);
3705 auto *MemCI
= cast
<MemIntrinsic
>(NewCall
);
3706 // All mem intrinsics support dest alignment.
3707 const ConstantInt
*Align
= cast
<ConstantInt
>(CI
->getArgOperand(3));
3708 MemCI
->setDestAlignment(Align
->getZExtValue());
3709 // Memcpy/Memmove also support source alignment.
3710 if (auto *MTI
= dyn_cast
<MemTransferInst
>(MemCI
))
3711 MTI
->setSourceAlignment(Align
->getZExtValue());
3715 assert(NewCall
&& "Should have either set this variable or returned through "
3716 "the default case");
3717 std::string Name
= CI
->getName();
3718 if (!Name
.empty()) {
3719 CI
->setName(Name
+ ".old");
3720 NewCall
->setName(Name
);
3722 CI
->replaceAllUsesWith(NewCall
);
3723 CI
->eraseFromParent();
3726 void llvm::UpgradeCallsToIntrinsic(Function
*F
) {
3727 assert(F
&& "Illegal attempt to upgrade a non-existent intrinsic.");
3729 // Check if this function should be upgraded and get the replacement function
3732 if (UpgradeIntrinsicFunction(F
, NewFn
)) {
3733 // Replace all users of the old function with the new function or new
3734 // instructions. This is not a range loop because the call is deleted.
3735 for (auto UI
= F
->user_begin(), UE
= F
->user_end(); UI
!= UE
; )
3736 if (CallInst
*CI
= dyn_cast
<CallInst
>(*UI
++))
3737 UpgradeIntrinsicCall(CI
, NewFn
);
3739 // Remove old function, no longer used, from the module.
3740 F
->eraseFromParent();
3744 MDNode
*llvm::UpgradeTBAANode(MDNode
&MD
) {
3745 // Check if the tag uses struct-path aware TBAA format.
3746 if (isa
<MDNode
>(MD
.getOperand(0)) && MD
.getNumOperands() >= 3)
3749 auto &Context
= MD
.getContext();
3750 if (MD
.getNumOperands() == 3) {
3751 Metadata
*Elts
[] = {MD
.getOperand(0), MD
.getOperand(1)};
3752 MDNode
*ScalarType
= MDNode::get(Context
, Elts
);
3753 // Create a MDNode <ScalarType, ScalarType, offset 0, const>
3754 Metadata
*Elts2
[] = {ScalarType
, ScalarType
,
3755 ConstantAsMetadata::get(
3756 Constant::getNullValue(Type::getInt64Ty(Context
))),
3758 return MDNode::get(Context
, Elts2
);
3760 // Create a MDNode <MD, MD, offset 0>
3761 Metadata
*Elts
[] = {&MD
, &MD
, ConstantAsMetadata::get(Constant::getNullValue(
3762 Type::getInt64Ty(Context
)))};
3763 return MDNode::get(Context
, Elts
);
3766 Instruction
*llvm::UpgradeBitCastInst(unsigned Opc
, Value
*V
, Type
*DestTy
,
3767 Instruction
*&Temp
) {
3768 if (Opc
!= Instruction::BitCast
)
3772 Type
*SrcTy
= V
->getType();
3773 if (SrcTy
->isPtrOrPtrVectorTy() && DestTy
->isPtrOrPtrVectorTy() &&
3774 SrcTy
->getPointerAddressSpace() != DestTy
->getPointerAddressSpace()) {
3775 LLVMContext
&Context
= V
->getContext();
3777 // We have no information about target data layout, so we assume that
3778 // the maximum pointer size is 64bit.
3779 Type
*MidTy
= Type::getInt64Ty(Context
);
3780 Temp
= CastInst::Create(Instruction::PtrToInt
, V
, MidTy
);
3782 return CastInst::Create(Instruction::IntToPtr
, Temp
, DestTy
);
3788 Value
*llvm::UpgradeBitCastExpr(unsigned Opc
, Constant
*C
, Type
*DestTy
) {
3789 if (Opc
!= Instruction::BitCast
)
3792 Type
*SrcTy
= C
->getType();
3793 if (SrcTy
->isPtrOrPtrVectorTy() && DestTy
->isPtrOrPtrVectorTy() &&
3794 SrcTy
->getPointerAddressSpace() != DestTy
->getPointerAddressSpace()) {
3795 LLVMContext
&Context
= C
->getContext();
3797 // We have no information about target data layout, so we assume that
3798 // the maximum pointer size is 64bit.
3799 Type
*MidTy
= Type::getInt64Ty(Context
);
3801 return ConstantExpr::getIntToPtr(ConstantExpr::getPtrToInt(C
, MidTy
),
3808 /// Check the debug info version number, if it is out-dated, drop the debug
3809 /// info. Return true if module is modified.
3810 bool llvm::UpgradeDebugInfo(Module
&M
) {
3811 unsigned Version
= getDebugMetadataVersionFromModule(M
);
3812 if (Version
== DEBUG_METADATA_VERSION
) {
3813 bool BrokenDebugInfo
= false;
3814 if (verifyModule(M
, &llvm::errs(), &BrokenDebugInfo
))
3815 report_fatal_error("Broken module found, compilation aborted!");
3816 if (!BrokenDebugInfo
)
3817 // Everything is ok.
3820 // Diagnose malformed debug info.
3821 DiagnosticInfoIgnoringInvalidDebugMetadata
Diag(M
);
3822 M
.getContext().diagnose(Diag
);
3825 bool Modified
= StripDebugInfo(M
);
3826 if (Modified
&& Version
!= DEBUG_METADATA_VERSION
) {
3827 // Diagnose a version mismatch.
3828 DiagnosticInfoDebugMetadataVersion
DiagVersion(M
, Version
);
3829 M
.getContext().diagnose(DiagVersion
);
3834 bool llvm::UpgradeRetainReleaseMarker(Module
&M
) {
3835 bool Changed
= false;
3836 const char *MarkerKey
= "clang.arc.retainAutoreleasedReturnValueMarker";
3837 NamedMDNode
*ModRetainReleaseMarker
= M
.getNamedMetadata(MarkerKey
);
3838 if (ModRetainReleaseMarker
) {
3839 MDNode
*Op
= ModRetainReleaseMarker
->getOperand(0);
3841 MDString
*ID
= dyn_cast_or_null
<MDString
>(Op
->getOperand(0));
3843 SmallVector
<StringRef
, 4> ValueComp
;
3844 ID
->getString().split(ValueComp
, "#");
3845 if (ValueComp
.size() == 2) {
3846 std::string NewValue
= ValueComp
[0].str() + ";" + ValueComp
[1].str();
3847 ID
= MDString::get(M
.getContext(), NewValue
);
3849 M
.addModuleFlag(Module::Error
, MarkerKey
, ID
);
3850 M
.eraseNamedMetadata(ModRetainReleaseMarker
);
3858 bool llvm::UpgradeModuleFlags(Module
&M
) {
3859 NamedMDNode
*ModFlags
= M
.getModuleFlagsMetadata();
3863 bool HasObjCFlag
= false, HasClassProperties
= false, Changed
= false;
3864 for (unsigned I
= 0, E
= ModFlags
->getNumOperands(); I
!= E
; ++I
) {
3865 MDNode
*Op
= ModFlags
->getOperand(I
);
3866 if (Op
->getNumOperands() != 3)
3868 MDString
*ID
= dyn_cast_or_null
<MDString
>(Op
->getOperand(1));
3871 if (ID
->getString() == "Objective-C Image Info Version")
3873 if (ID
->getString() == "Objective-C Class Properties")
3874 HasClassProperties
= true;
3875 // Upgrade PIC/PIE Module Flags. The module flag behavior for these two
3876 // field was Error and now they are Max.
3877 if (ID
->getString() == "PIC Level" || ID
->getString() == "PIE Level") {
3878 if (auto *Behavior
=
3879 mdconst::dyn_extract_or_null
<ConstantInt
>(Op
->getOperand(0))) {
3880 if (Behavior
->getLimitedValue() == Module::Error
) {
3881 Type
*Int32Ty
= Type::getInt32Ty(M
.getContext());
3882 Metadata
*Ops
[3] = {
3883 ConstantAsMetadata::get(ConstantInt::get(Int32Ty
, Module::Max
)),
3884 MDString::get(M
.getContext(), ID
->getString()),
3886 ModFlags
->setOperand(I
, MDNode::get(M
.getContext(), Ops
));
3891 // Upgrade Objective-C Image Info Section. Removed the whitespce in the
3892 // section name so that llvm-lto will not complain about mismatching
3893 // module flags that is functionally the same.
3894 if (ID
->getString() == "Objective-C Image Info Section") {
3895 if (auto *Value
= dyn_cast_or_null
<MDString
>(Op
->getOperand(2))) {
3896 SmallVector
<StringRef
, 4> ValueComp
;
3897 Value
->getString().split(ValueComp
, " ");
3898 if (ValueComp
.size() != 1) {
3899 std::string NewValue
;
3900 for (auto &S
: ValueComp
)
3901 NewValue
+= S
.str();
3902 Metadata
*Ops
[3] = {Op
->getOperand(0), Op
->getOperand(1),
3903 MDString::get(M
.getContext(), NewValue
)};
3904 ModFlags
->setOperand(I
, MDNode::get(M
.getContext(), Ops
));
3911 // "Objective-C Class Properties" is recently added for Objective-C. We
3912 // upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module
3913 // flag of value 0, so we can correclty downgrade this flag when trying to
3914 // link an ObjC bitcode without this module flag with an ObjC bitcode with
3915 // this module flag.
3916 if (HasObjCFlag
&& !HasClassProperties
) {
3917 M
.addModuleFlag(llvm::Module::Override
, "Objective-C Class Properties",
3925 void llvm::UpgradeSectionAttributes(Module
&M
) {
3926 auto TrimSpaces
= [](StringRef Section
) -> std::string
{
3927 SmallVector
<StringRef
, 5> Components
;
3928 Section
.split(Components
, ',');
3930 SmallString
<32> Buffer
;
3931 raw_svector_ostream
OS(Buffer
);
3933 for (auto Component
: Components
)
3934 OS
<< ',' << Component
.trim();
3936 return OS
.str().substr(1);
3939 for (auto &GV
: M
.globals()) {
3940 if (!GV
.hasSection())
3943 StringRef Section
= GV
.getSection();
3945 if (!Section
.startswith("__DATA, __objc_catlist"))
3948 // __DATA, __objc_catlist, regular, no_dead_strip
3949 // __DATA,__objc_catlist,regular,no_dead_strip
3950 GV
.setSection(TrimSpaces(Section
));
3954 static bool isOldLoopArgument(Metadata
*MD
) {
3955 auto *T
= dyn_cast_or_null
<MDTuple
>(MD
);
3958 if (T
->getNumOperands() < 1)
3960 auto *S
= dyn_cast_or_null
<MDString
>(T
->getOperand(0));
3963 return S
->getString().startswith("llvm.vectorizer.");
3966 static MDString
*upgradeLoopTag(LLVMContext
&C
, StringRef OldTag
) {
3967 StringRef OldPrefix
= "llvm.vectorizer.";
3968 assert(OldTag
.startswith(OldPrefix
) && "Expected old prefix");
3970 if (OldTag
== "llvm.vectorizer.unroll")
3971 return MDString::get(C
, "llvm.loop.interleave.count");
3973 return MDString::get(
3974 C
, (Twine("llvm.loop.vectorize.") + OldTag
.drop_front(OldPrefix
.size()))
3978 static Metadata
*upgradeLoopArgument(Metadata
*MD
) {
3979 auto *T
= dyn_cast_or_null
<MDTuple
>(MD
);
3982 if (T
->getNumOperands() < 1)
3984 auto *OldTag
= dyn_cast_or_null
<MDString
>(T
->getOperand(0));
3987 if (!OldTag
->getString().startswith("llvm.vectorizer."))
3990 // This has an old tag. Upgrade it.
3991 SmallVector
<Metadata
*, 8> Ops
;
3992 Ops
.reserve(T
->getNumOperands());
3993 Ops
.push_back(upgradeLoopTag(T
->getContext(), OldTag
->getString()));
3994 for (unsigned I
= 1, E
= T
->getNumOperands(); I
!= E
; ++I
)
3995 Ops
.push_back(T
->getOperand(I
));
3997 return MDTuple::get(T
->getContext(), Ops
);
4000 MDNode
*llvm::upgradeInstructionLoopAttachment(MDNode
&N
) {
4001 auto *T
= dyn_cast
<MDTuple
>(&N
);
4005 if (none_of(T
->operands(), isOldLoopArgument
))
4008 SmallVector
<Metadata
*, 8> Ops
;
4009 Ops
.reserve(T
->getNumOperands());
4010 for (Metadata
*MD
: T
->operands())
4011 Ops
.push_back(upgradeLoopArgument(MD
));
4013 return MDTuple::get(T
->getContext(), Ops
);