1 //===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the auto-upgrade helper functions.
11 // This is where deprecated IR intrinsics and other IR features are updated to
12 // current specifications.
14 //===----------------------------------------------------------------------===//
16 #include "llvm/IR/AutoUpgrade.h"
17 #include "llvm/ADT/StringSwitch.h"
18 #include "llvm/IR/Constants.h"
19 #include "llvm/IR/DIBuilder.h"
20 #include "llvm/IR/DebugInfo.h"
21 #include "llvm/IR/DiagnosticInfo.h"
22 #include "llvm/IR/Function.h"
23 #include "llvm/IR/IRBuilder.h"
24 #include "llvm/IR/Instruction.h"
25 #include "llvm/IR/IntrinsicInst.h"
26 #include "llvm/IR/LLVMContext.h"
27 #include "llvm/IR/Module.h"
28 #include "llvm/IR/Verifier.h"
29 #include "llvm/Support/ErrorHandling.h"
30 #include "llvm/Support/Regex.h"
34 static void rename(GlobalValue
*GV
) { GV
->setName(GV
->getName() + ".old"); }
36 // Upgrade the declarations of the SSE4.1 ptest intrinsics whose arguments have
37 // changed their type from v4f32 to v2i64.
38 static bool UpgradePTESTIntrinsic(Function
* F
, Intrinsic::ID IID
,
40 // Check whether this is an old version of the function, which received
42 Type
*Arg0Type
= F
->getFunctionType()->getParamType(0);
43 if (Arg0Type
!= VectorType::get(Type::getFloatTy(F
->getContext()), 4))
46 // Yes, it's old, replace it with new version.
48 NewFn
= Intrinsic::getDeclaration(F
->getParent(), IID
);
52 // Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
53 // arguments have changed their type from i32 to i8.
54 static bool UpgradeX86IntrinsicsWith8BitMask(Function
*F
, Intrinsic::ID IID
,
56 // Check that the last argument is an i32.
57 Type
*LastArgType
= F
->getFunctionType()->getParamType(
58 F
->getFunctionType()->getNumParams() - 1);
59 if (!LastArgType
->isIntegerTy(32))
62 // Move this function aside and map down.
64 NewFn
= Intrinsic::getDeclaration(F
->getParent(), IID
);
68 static bool UpgradeADCSBBIntrinsic(Function
*F
, Intrinsic::ID IID
,
70 // If this intrinsic has 3 operands, it's the new version.
71 if (F
->getFunctionType()->getNumParams() == 3)
75 NewFn
= Intrinsic::getDeclaration(F
->getParent(), IID
);
79 static bool ShouldUpgradeX86Intrinsic(Function
*F
, StringRef Name
) {
80 // All of the intrinsics matches below should be marked with which llvm
81 // version started autoupgrading them. At some point in the future we would
82 // like to use this information to remove upgrade code for some older
83 // intrinsics. It is currently undecided how we will determine that future
85 if (Name
.startswith("sse2.paddus.") || // Added in 8.0
86 Name
.startswith("sse2.psubus.") || // Added in 8.0
87 Name
.startswith("avx2.paddus.") || // Added in 8.0
88 Name
.startswith("avx2.psubus.") || // Added in 8.0
89 Name
.startswith("avx512.mask.paddus.") || // Added in 8.0
90 Name
.startswith("avx512.mask.psubus.") || // Added in 8.0
91 Name
=="ssse3.pabs.b.128" || // Added in 6.0
92 Name
=="ssse3.pabs.w.128" || // Added in 6.0
93 Name
=="ssse3.pabs.d.128" || // Added in 6.0
94 Name
.startswith("fma4.vfmadd.s") || // Added in 7.0
95 Name
.startswith("fma.vfmadd.") || // Added in 7.0
96 Name
.startswith("fma.vfmsub.") || // Added in 7.0
97 Name
.startswith("fma.vfmaddsub.") || // Added in 7.0
98 Name
.startswith("fma.vfmsubadd.") || // Added in 7.0
99 Name
.startswith("fma.vfnmadd.") || // Added in 7.0
100 Name
.startswith("fma.vfnmsub.") || // Added in 7.0
101 Name
.startswith("avx512.mask.vfmadd.") || // Added in 7.0
102 Name
.startswith("avx512.mask.vfnmadd.") || // Added in 7.0
103 Name
.startswith("avx512.mask.vfnmsub.") || // Added in 7.0
104 Name
.startswith("avx512.mask3.vfmadd.") || // Added in 7.0
105 Name
.startswith("avx512.maskz.vfmadd.") || // Added in 7.0
106 Name
.startswith("avx512.mask3.vfmsub.") || // Added in 7.0
107 Name
.startswith("avx512.mask3.vfnmsub.") || // Added in 7.0
108 Name
.startswith("avx512.mask.vfmaddsub.") || // Added in 7.0
109 Name
.startswith("avx512.maskz.vfmaddsub.") || // Added in 7.0
110 Name
.startswith("avx512.mask3.vfmaddsub.") || // Added in 7.0
111 Name
.startswith("avx512.mask3.vfmsubadd.") || // Added in 7.0
112 Name
.startswith("avx512.mask.shuf.i") || // Added in 6.0
113 Name
.startswith("avx512.mask.shuf.f") || // Added in 6.0
114 Name
.startswith("avx512.kunpck") || //added in 6.0
115 Name
.startswith("avx2.pabs.") || // Added in 6.0
116 Name
.startswith("avx512.mask.pabs.") || // Added in 6.0
117 Name
.startswith("avx512.broadcastm") || // Added in 6.0
118 Name
== "sse.sqrt.ss" || // Added in 7.0
119 Name
== "sse2.sqrt.sd" || // Added in 7.0
120 Name
.startswith("avx512.mask.sqrt.p") || // Added in 7.0
121 Name
.startswith("avx.sqrt.p") || // Added in 7.0
122 Name
.startswith("sse2.sqrt.p") || // Added in 7.0
123 Name
.startswith("sse.sqrt.p") || // Added in 7.0
124 Name
.startswith("avx512.mask.pbroadcast") || // Added in 6.0
125 Name
.startswith("sse2.pcmpeq.") || // Added in 3.1
126 Name
.startswith("sse2.pcmpgt.") || // Added in 3.1
127 Name
.startswith("avx2.pcmpeq.") || // Added in 3.1
128 Name
.startswith("avx2.pcmpgt.") || // Added in 3.1
129 Name
.startswith("avx512.mask.pcmpeq.") || // Added in 3.9
130 Name
.startswith("avx512.mask.pcmpgt.") || // Added in 3.9
131 Name
.startswith("avx.vperm2f128.") || // Added in 6.0
132 Name
== "avx2.vperm2i128" || // Added in 6.0
133 Name
== "sse.add.ss" || // Added in 4.0
134 Name
== "sse2.add.sd" || // Added in 4.0
135 Name
== "sse.sub.ss" || // Added in 4.0
136 Name
== "sse2.sub.sd" || // Added in 4.0
137 Name
== "sse.mul.ss" || // Added in 4.0
138 Name
== "sse2.mul.sd" || // Added in 4.0
139 Name
== "sse.div.ss" || // Added in 4.0
140 Name
== "sse2.div.sd" || // Added in 4.0
141 Name
== "sse41.pmaxsb" || // Added in 3.9
142 Name
== "sse2.pmaxs.w" || // Added in 3.9
143 Name
== "sse41.pmaxsd" || // Added in 3.9
144 Name
== "sse2.pmaxu.b" || // Added in 3.9
145 Name
== "sse41.pmaxuw" || // Added in 3.9
146 Name
== "sse41.pmaxud" || // Added in 3.9
147 Name
== "sse41.pminsb" || // Added in 3.9
148 Name
== "sse2.pmins.w" || // Added in 3.9
149 Name
== "sse41.pminsd" || // Added in 3.9
150 Name
== "sse2.pminu.b" || // Added in 3.9
151 Name
== "sse41.pminuw" || // Added in 3.9
152 Name
== "sse41.pminud" || // Added in 3.9
153 Name
== "avx512.kand.w" || // Added in 7.0
154 Name
== "avx512.kandn.w" || // Added in 7.0
155 Name
== "avx512.knot.w" || // Added in 7.0
156 Name
== "avx512.kor.w" || // Added in 7.0
157 Name
== "avx512.kxor.w" || // Added in 7.0
158 Name
== "avx512.kxnor.w" || // Added in 7.0
159 Name
== "avx512.kortestc.w" || // Added in 7.0
160 Name
== "avx512.kortestz.w" || // Added in 7.0
161 Name
.startswith("avx512.mask.pshuf.b.") || // Added in 4.0
162 Name
.startswith("avx2.pmax") || // Added in 3.9
163 Name
.startswith("avx2.pmin") || // Added in 3.9
164 Name
.startswith("avx512.mask.pmax") || // Added in 4.0
165 Name
.startswith("avx512.mask.pmin") || // Added in 4.0
166 Name
.startswith("avx2.vbroadcast") || // Added in 3.8
167 Name
.startswith("avx2.pbroadcast") || // Added in 3.8
168 Name
.startswith("avx.vpermil.") || // Added in 3.1
169 Name
.startswith("sse2.pshuf") || // Added in 3.9
170 Name
.startswith("avx512.pbroadcast") || // Added in 3.9
171 Name
.startswith("avx512.mask.broadcast.s") || // Added in 3.9
172 Name
.startswith("avx512.mask.movddup") || // Added in 3.9
173 Name
.startswith("avx512.mask.movshdup") || // Added in 3.9
174 Name
.startswith("avx512.mask.movsldup") || // Added in 3.9
175 Name
.startswith("avx512.mask.pshuf.d.") || // Added in 3.9
176 Name
.startswith("avx512.mask.pshufl.w.") || // Added in 3.9
177 Name
.startswith("avx512.mask.pshufh.w.") || // Added in 3.9
178 Name
.startswith("avx512.mask.shuf.p") || // Added in 4.0
179 Name
.startswith("avx512.mask.vpermil.p") || // Added in 3.9
180 Name
.startswith("avx512.mask.perm.df.") || // Added in 3.9
181 Name
.startswith("avx512.mask.perm.di.") || // Added in 3.9
182 Name
.startswith("avx512.mask.punpckl") || // Added in 3.9
183 Name
.startswith("avx512.mask.punpckh") || // Added in 3.9
184 Name
.startswith("avx512.mask.unpckl.") || // Added in 3.9
185 Name
.startswith("avx512.mask.unpckh.") || // Added in 3.9
186 Name
.startswith("avx512.mask.pand.") || // Added in 3.9
187 Name
.startswith("avx512.mask.pandn.") || // Added in 3.9
188 Name
.startswith("avx512.mask.por.") || // Added in 3.9
189 Name
.startswith("avx512.mask.pxor.") || // Added in 3.9
190 Name
.startswith("avx512.mask.and.") || // Added in 3.9
191 Name
.startswith("avx512.mask.andn.") || // Added in 3.9
192 Name
.startswith("avx512.mask.or.") || // Added in 3.9
193 Name
.startswith("avx512.mask.xor.") || // Added in 3.9
194 Name
.startswith("avx512.mask.padd.") || // Added in 4.0
195 Name
.startswith("avx512.mask.psub.") || // Added in 4.0
196 Name
.startswith("avx512.mask.pmull.") || // Added in 4.0
197 Name
.startswith("avx512.mask.cvtdq2pd.") || // Added in 4.0
198 Name
.startswith("avx512.mask.cvtudq2pd.") || // Added in 4.0
199 Name
== "avx512.mask.cvtudq2ps.128" || // Added in 7.0
200 Name
== "avx512.mask.cvtudq2ps.256" || // Added in 7.0
201 Name
== "avx512.mask.cvtqq2pd.128" || // Added in 7.0
202 Name
== "avx512.mask.cvtqq2pd.256" || // Added in 7.0
203 Name
== "avx512.mask.cvtuqq2pd.128" || // Added in 7.0
204 Name
== "avx512.mask.cvtuqq2pd.256" || // Added in 7.0
205 Name
== "avx512.mask.cvtdq2ps.128" || // Added in 7.0
206 Name
== "avx512.mask.cvtdq2ps.256" || // Added in 7.0
207 Name
== "avx512.mask.cvtpd2dq.256" || // Added in 7.0
208 Name
== "avx512.mask.cvtpd2ps.256" || // Added in 7.0
209 Name
== "avx512.mask.cvttpd2dq.256" || // Added in 7.0
210 Name
== "avx512.mask.cvttps2dq.128" || // Added in 7.0
211 Name
== "avx512.mask.cvttps2dq.256" || // Added in 7.0
212 Name
== "avx512.mask.cvtps2pd.128" || // Added in 7.0
213 Name
== "avx512.mask.cvtps2pd.256" || // Added in 7.0
214 Name
== "avx512.cvtusi2sd" || // Added in 7.0
215 Name
.startswith("avx512.mask.permvar.") || // Added in 7.0
216 Name
.startswith("avx512.mask.permvar.") || // Added in 7.0
217 Name
== "sse2.pmulu.dq" || // Added in 7.0
218 Name
== "sse41.pmuldq" || // Added in 7.0
219 Name
== "avx2.pmulu.dq" || // Added in 7.0
220 Name
== "avx2.pmul.dq" || // Added in 7.0
221 Name
== "avx512.pmulu.dq.512" || // Added in 7.0
222 Name
== "avx512.pmul.dq.512" || // Added in 7.0
223 Name
.startswith("avx512.mask.pmul.dq.") || // Added in 4.0
224 Name
.startswith("avx512.mask.pmulu.dq.") || // Added in 4.0
225 Name
.startswith("avx512.mask.pmul.hr.sw.") || // Added in 7.0
226 Name
.startswith("avx512.mask.pmulh.w.") || // Added in 7.0
227 Name
.startswith("avx512.mask.pmulhu.w.") || // Added in 7.0
228 Name
.startswith("avx512.mask.pmaddw.d.") || // Added in 7.0
229 Name
.startswith("avx512.mask.pmaddubs.w.") || // Added in 7.0
230 Name
.startswith("avx512.mask.packsswb.") || // Added in 5.0
231 Name
.startswith("avx512.mask.packssdw.") || // Added in 5.0
232 Name
.startswith("avx512.mask.packuswb.") || // Added in 5.0
233 Name
.startswith("avx512.mask.packusdw.") || // Added in 5.0
234 Name
.startswith("avx512.mask.cmp.b") || // Added in 5.0
235 Name
.startswith("avx512.mask.cmp.d") || // Added in 5.0
236 Name
.startswith("avx512.mask.cmp.q") || // Added in 5.0
237 Name
.startswith("avx512.mask.cmp.w") || // Added in 5.0
238 Name
.startswith("avx512.mask.cmp.p") || // Added in 7.0
239 Name
.startswith("avx512.mask.ucmp.") || // Added in 5.0
240 Name
.startswith("avx512.cvtb2mask.") || // Added in 7.0
241 Name
.startswith("avx512.cvtw2mask.") || // Added in 7.0
242 Name
.startswith("avx512.cvtd2mask.") || // Added in 7.0
243 Name
.startswith("avx512.cvtq2mask.") || // Added in 7.0
244 Name
.startswith("avx512.mask.vpermilvar.") || // Added in 4.0
245 Name
.startswith("avx512.mask.psll.d") || // Added in 4.0
246 Name
.startswith("avx512.mask.psll.q") || // Added in 4.0
247 Name
.startswith("avx512.mask.psll.w") || // Added in 4.0
248 Name
.startswith("avx512.mask.psra.d") || // Added in 4.0
249 Name
.startswith("avx512.mask.psra.q") || // Added in 4.0
250 Name
.startswith("avx512.mask.psra.w") || // Added in 4.0
251 Name
.startswith("avx512.mask.psrl.d") || // Added in 4.0
252 Name
.startswith("avx512.mask.psrl.q") || // Added in 4.0
253 Name
.startswith("avx512.mask.psrl.w") || // Added in 4.0
254 Name
.startswith("avx512.mask.pslli") || // Added in 4.0
255 Name
.startswith("avx512.mask.psrai") || // Added in 4.0
256 Name
.startswith("avx512.mask.psrli") || // Added in 4.0
257 Name
.startswith("avx512.mask.psllv") || // Added in 4.0
258 Name
.startswith("avx512.mask.psrav") || // Added in 4.0
259 Name
.startswith("avx512.mask.psrlv") || // Added in 4.0
260 Name
.startswith("sse41.pmovsx") || // Added in 3.8
261 Name
.startswith("sse41.pmovzx") || // Added in 3.9
262 Name
.startswith("avx2.pmovsx") || // Added in 3.9
263 Name
.startswith("avx2.pmovzx") || // Added in 3.9
264 Name
.startswith("avx512.mask.pmovsx") || // Added in 4.0
265 Name
.startswith("avx512.mask.pmovzx") || // Added in 4.0
266 Name
.startswith("avx512.mask.lzcnt.") || // Added in 5.0
267 Name
.startswith("avx512.mask.pternlog.") || // Added in 7.0
268 Name
.startswith("avx512.maskz.pternlog.") || // Added in 7.0
269 Name
.startswith("avx512.mask.vpmadd52") || // Added in 7.0
270 Name
.startswith("avx512.maskz.vpmadd52") || // Added in 7.0
271 Name
.startswith("avx512.mask.vpermi2var.") || // Added in 7.0
272 Name
.startswith("avx512.mask.vpermt2var.") || // Added in 7.0
273 Name
.startswith("avx512.maskz.vpermt2var.") || // Added in 7.0
274 Name
.startswith("avx512.mask.vpdpbusd.") || // Added in 7.0
275 Name
.startswith("avx512.maskz.vpdpbusd.") || // Added in 7.0
276 Name
.startswith("avx512.mask.vpdpbusds.") || // Added in 7.0
277 Name
.startswith("avx512.maskz.vpdpbusds.") || // Added in 7.0
278 Name
.startswith("avx512.mask.vpdpwssd.") || // Added in 7.0
279 Name
.startswith("avx512.maskz.vpdpwssd.") || // Added in 7.0
280 Name
.startswith("avx512.mask.vpdpwssds.") || // Added in 7.0
281 Name
.startswith("avx512.maskz.vpdpwssds.") || // Added in 7.0
282 Name
.startswith("avx512.mask.dbpsadbw.") || // Added in 7.0
283 Name
.startswith("avx512.mask.vpshld.") || // Added in 7.0
284 Name
.startswith("avx512.mask.vpshrd.") || // Added in 7.0
285 Name
.startswith("avx512.mask.add.p") || // Added in 7.0. 128/256 in 4.0
286 Name
.startswith("avx512.mask.sub.p") || // Added in 7.0. 128/256 in 4.0
287 Name
.startswith("avx512.mask.mul.p") || // Added in 7.0. 128/256 in 4.0
288 Name
.startswith("avx512.mask.div.p") || // Added in 7.0. 128/256 in 4.0
289 Name
.startswith("avx512.mask.max.p") || // Added in 7.0. 128/256 in 5.0
290 Name
.startswith("avx512.mask.min.p") || // Added in 7.0. 128/256 in 5.0
291 Name
.startswith("avx512.mask.fpclass.p") || // Added in 7.0
292 Name
.startswith("avx512.mask.prorv.") || // Added in 7.0
293 Name
.startswith("avx512.mask.pror.") || // Added in 7.0
294 Name
.startswith("avx512.mask.prolv.") || // Added in 7.0
295 Name
.startswith("avx512.mask.prol.") || // Added in 7.0
296 Name
.startswith("avx512.mask.padds.") || // Added in 8.0
297 Name
.startswith("avx512.mask.psubs.") || // Added in 8.0
298 Name
== "sse.cvtsi2ss" || // Added in 7.0
299 Name
== "sse.cvtsi642ss" || // Added in 7.0
300 Name
== "sse2.cvtsi2sd" || // Added in 7.0
301 Name
== "sse2.cvtsi642sd" || // Added in 7.0
302 Name
== "sse2.cvtss2sd" || // Added in 7.0
303 Name
== "sse2.cvtdq2pd" || // Added in 3.9
304 Name
== "sse2.cvtdq2ps" || // Added in 7.0
305 Name
== "sse2.cvtps2pd" || // Added in 3.9
306 Name
== "avx.cvtdq2.pd.256" || // Added in 3.9
307 Name
== "avx.cvtdq2.ps.256" || // Added in 7.0
308 Name
== "avx.cvt.ps2.pd.256" || // Added in 3.9
309 Name
.startswith("avx.vinsertf128.") || // Added in 3.7
310 Name
== "avx2.vinserti128" || // Added in 3.7
311 Name
.startswith("avx512.mask.insert") || // Added in 4.0
312 Name
.startswith("avx.vextractf128.") || // Added in 3.7
313 Name
== "avx2.vextracti128" || // Added in 3.7
314 Name
.startswith("avx512.mask.vextract") || // Added in 4.0
315 Name
.startswith("sse4a.movnt.") || // Added in 3.9
316 Name
.startswith("avx.movnt.") || // Added in 3.2
317 Name
.startswith("avx512.storent.") || // Added in 3.9
318 Name
== "sse41.movntdqa" || // Added in 5.0
319 Name
== "avx2.movntdqa" || // Added in 5.0
320 Name
== "avx512.movntdqa" || // Added in 5.0
321 Name
== "sse2.storel.dq" || // Added in 3.9
322 Name
.startswith("sse.storeu.") || // Added in 3.9
323 Name
.startswith("sse2.storeu.") || // Added in 3.9
324 Name
.startswith("avx.storeu.") || // Added in 3.9
325 Name
.startswith("avx512.mask.storeu.") || // Added in 3.9
326 Name
.startswith("avx512.mask.store.p") || // Added in 3.9
327 Name
.startswith("avx512.mask.store.b.") || // Added in 3.9
328 Name
.startswith("avx512.mask.store.w.") || // Added in 3.9
329 Name
.startswith("avx512.mask.store.d.") || // Added in 3.9
330 Name
.startswith("avx512.mask.store.q.") || // Added in 3.9
331 Name
== "avx512.mask.store.ss" || // Added in 7.0
332 Name
.startswith("avx512.mask.loadu.") || // Added in 3.9
333 Name
.startswith("avx512.mask.load.") || // Added in 3.9
334 Name
.startswith("avx512.mask.expand.load.") || // Added in 7.0
335 Name
.startswith("avx512.mask.compress.store.") || // Added in 7.0
336 Name
== "sse42.crc32.64.8" || // Added in 3.4
337 Name
.startswith("avx.vbroadcast.s") || // Added in 3.5
338 Name
.startswith("avx512.vbroadcast.s") || // Added in 7.0
339 Name
.startswith("avx512.mask.palignr.") || // Added in 3.9
340 Name
.startswith("avx512.mask.valign.") || // Added in 4.0
341 Name
.startswith("sse2.psll.dq") || // Added in 3.7
342 Name
.startswith("sse2.psrl.dq") || // Added in 3.7
343 Name
.startswith("avx2.psll.dq") || // Added in 3.7
344 Name
.startswith("avx2.psrl.dq") || // Added in 3.7
345 Name
.startswith("avx512.psll.dq") || // Added in 3.9
346 Name
.startswith("avx512.psrl.dq") || // Added in 3.9
347 Name
== "sse41.pblendw" || // Added in 3.7
348 Name
.startswith("sse41.blendp") || // Added in 3.7
349 Name
.startswith("avx.blend.p") || // Added in 3.7
350 Name
== "avx2.pblendw" || // Added in 3.7
351 Name
.startswith("avx2.pblendd.") || // Added in 3.7
352 Name
.startswith("avx.vbroadcastf128") || // Added in 4.0
353 Name
== "avx2.vbroadcasti128" || // Added in 3.7
354 Name
.startswith("avx512.mask.broadcastf") || // Added in 6.0
355 Name
.startswith("avx512.mask.broadcasti") || // Added in 6.0
356 Name
== "xop.vpcmov" || // Added in 3.8
357 Name
== "xop.vpcmov.256" || // Added in 5.0
358 Name
.startswith("avx512.mask.move.s") || // Added in 4.0
359 Name
.startswith("avx512.cvtmask2") || // Added in 5.0
360 (Name
.startswith("xop.vpcom") && // Added in 3.2
361 F
->arg_size() == 2) ||
362 Name
.startswith("avx512.ptestm") || //Added in 6.0
363 Name
.startswith("avx512.ptestnm") || //Added in 6.0
364 Name
.startswith("sse2.pavg") || // Added in 6.0
365 Name
.startswith("avx2.pavg") || // Added in 6.0
366 Name
.startswith("avx512.mask.pavg")) // Added in 6.0
372 static bool UpgradeX86IntrinsicFunction(Function
*F
, StringRef Name
,
374 // Only handle intrinsics that start with "x86.".
375 if (!Name
.startswith("x86."))
377 // Remove "x86." prefix.
378 Name
= Name
.substr(4);
380 if (ShouldUpgradeX86Intrinsic(F
, Name
)) {
385 if (Name
== "addcarryx.u32")
386 return UpgradeADCSBBIntrinsic(F
, Intrinsic::x86_addcarryx_u32
, NewFn
);
387 if (Name
== "addcarryx.u64")
388 return UpgradeADCSBBIntrinsic(F
, Intrinsic::x86_addcarryx_u64
, NewFn
);
389 if (Name
== "addcarry.u32")
390 return UpgradeADCSBBIntrinsic(F
, Intrinsic::x86_addcarry_u32
, NewFn
);
391 if (Name
== "addcarry.u64")
392 return UpgradeADCSBBIntrinsic(F
, Intrinsic::x86_addcarry_u64
, NewFn
);
393 if (Name
== "subborrow.u32")
394 return UpgradeADCSBBIntrinsic(F
, Intrinsic::x86_subborrow_u32
, NewFn
);
395 if (Name
== "subborrow.u64")
396 return UpgradeADCSBBIntrinsic(F
, Intrinsic::x86_subborrow_u64
, NewFn
);
398 if (Name
== "rdtscp") {
399 // If this intrinsic has 0 operands, it's the new version.
400 if (F
->getFunctionType()->getNumParams() == 0)
404 NewFn
= Intrinsic::getDeclaration(F
->getParent(),
405 Intrinsic::x86_rdtscp
);
409 // SSE4.1 ptest functions may have an old signature.
410 if (Name
.startswith("sse41.ptest")) { // Added in 3.2
411 if (Name
.substr(11) == "c")
412 return UpgradePTESTIntrinsic(F
, Intrinsic::x86_sse41_ptestc
, NewFn
);
413 if (Name
.substr(11) == "z")
414 return UpgradePTESTIntrinsic(F
, Intrinsic::x86_sse41_ptestz
, NewFn
);
415 if (Name
.substr(11) == "nzc")
416 return UpgradePTESTIntrinsic(F
, Intrinsic::x86_sse41_ptestnzc
, NewFn
);
418 // Several blend and other instructions with masks used the wrong number of
420 if (Name
== "sse41.insertps") // Added in 3.6
421 return UpgradeX86IntrinsicsWith8BitMask(F
, Intrinsic::x86_sse41_insertps
,
423 if (Name
== "sse41.dppd") // Added in 3.6
424 return UpgradeX86IntrinsicsWith8BitMask(F
, Intrinsic::x86_sse41_dppd
,
426 if (Name
== "sse41.dpps") // Added in 3.6
427 return UpgradeX86IntrinsicsWith8BitMask(F
, Intrinsic::x86_sse41_dpps
,
429 if (Name
== "sse41.mpsadbw") // Added in 3.6
430 return UpgradeX86IntrinsicsWith8BitMask(F
, Intrinsic::x86_sse41_mpsadbw
,
432 if (Name
== "avx.dp.ps.256") // Added in 3.6
433 return UpgradeX86IntrinsicsWith8BitMask(F
, Intrinsic::x86_avx_dp_ps_256
,
435 if (Name
== "avx2.mpsadbw") // Added in 3.6
436 return UpgradeX86IntrinsicsWith8BitMask(F
, Intrinsic::x86_avx2_mpsadbw
,
439 // frcz.ss/sd may need to have an argument dropped. Added in 3.2
440 if (Name
.startswith("xop.vfrcz.ss") && F
->arg_size() == 2) {
442 NewFn
= Intrinsic::getDeclaration(F
->getParent(),
443 Intrinsic::x86_xop_vfrcz_ss
);
446 if (Name
.startswith("xop.vfrcz.sd") && F
->arg_size() == 2) {
448 NewFn
= Intrinsic::getDeclaration(F
->getParent(),
449 Intrinsic::x86_xop_vfrcz_sd
);
452 // Upgrade any XOP PERMIL2 index operand still using a float/double vector.
453 if (Name
.startswith("xop.vpermil2")) { // Added in 3.9
454 auto Idx
= F
->getFunctionType()->getParamType(2);
455 if (Idx
->isFPOrFPVectorTy()) {
457 unsigned IdxSize
= Idx
->getPrimitiveSizeInBits();
458 unsigned EltSize
= Idx
->getScalarSizeInBits();
459 Intrinsic::ID Permil2ID
;
460 if (EltSize
== 64 && IdxSize
== 128)
461 Permil2ID
= Intrinsic::x86_xop_vpermil2pd
;
462 else if (EltSize
== 32 && IdxSize
== 128)
463 Permil2ID
= Intrinsic::x86_xop_vpermil2ps
;
464 else if (EltSize
== 64 && IdxSize
== 256)
465 Permil2ID
= Intrinsic::x86_xop_vpermil2pd_256
;
467 Permil2ID
= Intrinsic::x86_xop_vpermil2ps_256
;
468 NewFn
= Intrinsic::getDeclaration(F
->getParent(), Permil2ID
);
476 static bool UpgradeIntrinsicFunction1(Function
*F
, Function
*&NewFn
) {
477 assert(F
&& "Illegal to upgrade a non-existent Function.");
479 // Quickly eliminate it, if it's not a candidate.
480 StringRef Name
= F
->getName();
481 if (Name
.size() <= 8 || !Name
.startswith("llvm."))
483 Name
= Name
.substr(5); // Strip off "llvm."
488 if (Name
.startswith("arm.rbit") || Name
.startswith("aarch64.rbit")) {
489 NewFn
= Intrinsic::getDeclaration(F
->getParent(), Intrinsic::bitreverse
,
490 F
->arg_begin()->getType());
493 if (Name
.startswith("arm.neon.vclz")) {
495 F
->arg_begin()->getType(),
496 Type::getInt1Ty(F
->getContext())
498 // Can't use Intrinsic::getDeclaration here as it adds a ".i1" to
499 // the end of the name. Change name from llvm.arm.neon.vclz.* to
501 FunctionType
* fType
= FunctionType::get(F
->getReturnType(), args
, false);
502 NewFn
= Function::Create(fType
, F
->getLinkage(), F
->getAddressSpace(),
503 "llvm.ctlz." + Name
.substr(14), F
->getParent());
506 if (Name
.startswith("arm.neon.vcnt")) {
507 NewFn
= Intrinsic::getDeclaration(F
->getParent(), Intrinsic::ctpop
,
508 F
->arg_begin()->getType());
511 Regex
vldRegex("^arm\\.neon\\.vld([1234]|[234]lane)\\.v[a-z0-9]*$");
512 if (vldRegex
.match(Name
)) {
513 auto fArgs
= F
->getFunctionType()->params();
514 SmallVector
<Type
*, 4> Tys(fArgs
.begin(), fArgs
.end());
515 // Can't use Intrinsic::getDeclaration here as the return types might
516 // then only be structurally equal.
517 FunctionType
* fType
= FunctionType::get(F
->getReturnType(), Tys
, false);
518 NewFn
= Function::Create(fType
, F
->getLinkage(), F
->getAddressSpace(),
519 "llvm." + Name
+ ".p0i8", F
->getParent());
522 Regex
vstRegex("^arm\\.neon\\.vst([1234]|[234]lane)\\.v[a-z0-9]*$");
523 if (vstRegex
.match(Name
)) {
524 static const Intrinsic::ID StoreInts
[] = {Intrinsic::arm_neon_vst1
,
525 Intrinsic::arm_neon_vst2
,
526 Intrinsic::arm_neon_vst3
,
527 Intrinsic::arm_neon_vst4
};
529 static const Intrinsic::ID StoreLaneInts
[] = {
530 Intrinsic::arm_neon_vst2lane
, Intrinsic::arm_neon_vst3lane
,
531 Intrinsic::arm_neon_vst4lane
534 auto fArgs
= F
->getFunctionType()->params();
535 Type
*Tys
[] = {fArgs
[0], fArgs
[1]};
536 if (Name
.find("lane") == StringRef::npos
)
537 NewFn
= Intrinsic::getDeclaration(F
->getParent(),
538 StoreInts
[fArgs
.size() - 3], Tys
);
540 NewFn
= Intrinsic::getDeclaration(F
->getParent(),
541 StoreLaneInts
[fArgs
.size() - 5], Tys
);
544 if (Name
== "aarch64.thread.pointer" || Name
== "arm.thread.pointer") {
545 NewFn
= Intrinsic::getDeclaration(F
->getParent(), Intrinsic::thread_pointer
);
552 if (Name
.startswith("ctlz.") && F
->arg_size() == 1) {
554 NewFn
= Intrinsic::getDeclaration(F
->getParent(), Intrinsic::ctlz
,
555 F
->arg_begin()->getType());
558 if (Name
.startswith("cttz.") && F
->arg_size() == 1) {
560 NewFn
= Intrinsic::getDeclaration(F
->getParent(), Intrinsic::cttz
,
561 F
->arg_begin()->getType());
567 if (Name
== "dbg.value" && F
->arg_size() == 4) {
569 NewFn
= Intrinsic::getDeclaration(F
->getParent(), Intrinsic::dbg_value
);
576 bool IsLifetimeStart
= Name
.startswith("lifetime.start");
577 if (IsLifetimeStart
|| Name
.startswith("invariant.start")) {
578 Intrinsic::ID ID
= IsLifetimeStart
?
579 Intrinsic::lifetime_start
: Intrinsic::invariant_start
;
580 auto Args
= F
->getFunctionType()->params();
581 Type
* ObjectPtr
[1] = {Args
[1]};
582 if (F
->getName() != Intrinsic::getName(ID
, ObjectPtr
)) {
584 NewFn
= Intrinsic::getDeclaration(F
->getParent(), ID
, ObjectPtr
);
589 bool IsLifetimeEnd
= Name
.startswith("lifetime.end");
590 if (IsLifetimeEnd
|| Name
.startswith("invariant.end")) {
591 Intrinsic::ID ID
= IsLifetimeEnd
?
592 Intrinsic::lifetime_end
: Intrinsic::invariant_end
;
594 auto Args
= F
->getFunctionType()->params();
595 Type
* ObjectPtr
[1] = {Args
[IsLifetimeEnd
? 1 : 2]};
596 if (F
->getName() != Intrinsic::getName(ID
, ObjectPtr
)) {
598 NewFn
= Intrinsic::getDeclaration(F
->getParent(), ID
, ObjectPtr
);
602 if (Name
.startswith("invariant.group.barrier")) {
603 // Rename invariant.group.barrier to launder.invariant.group
604 auto Args
= F
->getFunctionType()->params();
605 Type
* ObjectPtr
[1] = {Args
[0]};
607 NewFn
= Intrinsic::getDeclaration(F
->getParent(),
608 Intrinsic::launder_invariant_group
, ObjectPtr
);
616 if (Name
.startswith("masked.load.")) {
617 Type
*Tys
[] = { F
->getReturnType(), F
->arg_begin()->getType() };
618 if (F
->getName() != Intrinsic::getName(Intrinsic::masked_load
, Tys
)) {
620 NewFn
= Intrinsic::getDeclaration(F
->getParent(),
621 Intrinsic::masked_load
,
626 if (Name
.startswith("masked.store.")) {
627 auto Args
= F
->getFunctionType()->params();
628 Type
*Tys
[] = { Args
[0], Args
[1] };
629 if (F
->getName() != Intrinsic::getName(Intrinsic::masked_store
, Tys
)) {
631 NewFn
= Intrinsic::getDeclaration(F
->getParent(),
632 Intrinsic::masked_store
,
637 // Renaming gather/scatter intrinsics with no address space overloading
638 // to the new overload which includes an address space
639 if (Name
.startswith("masked.gather.")) {
640 Type
*Tys
[] = {F
->getReturnType(), F
->arg_begin()->getType()};
641 if (F
->getName() != Intrinsic::getName(Intrinsic::masked_gather
, Tys
)) {
643 NewFn
= Intrinsic::getDeclaration(F
->getParent(),
644 Intrinsic::masked_gather
, Tys
);
648 if (Name
.startswith("masked.scatter.")) {
649 auto Args
= F
->getFunctionType()->params();
650 Type
*Tys
[] = {Args
[0], Args
[1]};
651 if (F
->getName() != Intrinsic::getName(Intrinsic::masked_scatter
, Tys
)) {
653 NewFn
= Intrinsic::getDeclaration(F
->getParent(),
654 Intrinsic::masked_scatter
, Tys
);
658 // Updating the memory intrinsics (memcpy/memmove/memset) that have an
659 // alignment parameter to embedding the alignment as an attribute of
661 if (Name
.startswith("memcpy.") && F
->arg_size() == 5) {
663 // Get the types of dest, src, and len
664 ArrayRef
<Type
*> ParamTypes
= F
->getFunctionType()->params().slice(0, 3);
665 NewFn
= Intrinsic::getDeclaration(F
->getParent(), Intrinsic::memcpy
,
669 if (Name
.startswith("memmove.") && F
->arg_size() == 5) {
671 // Get the types of dest, src, and len
672 ArrayRef
<Type
*> ParamTypes
= F
->getFunctionType()->params().slice(0, 3);
673 NewFn
= Intrinsic::getDeclaration(F
->getParent(), Intrinsic::memmove
,
677 if (Name
.startswith("memset.") && F
->arg_size() == 5) {
679 // Get the types of dest, and len
680 const auto *FT
= F
->getFunctionType();
681 Type
*ParamTypes
[2] = {
682 FT
->getParamType(0), // Dest
683 FT
->getParamType(2) // len
685 NewFn
= Intrinsic::getDeclaration(F
->getParent(), Intrinsic::memset
,
692 if (Name
.startswith("nvvm.")) {
693 Name
= Name
.substr(5);
695 // The following nvvm intrinsics correspond exactly to an LLVM intrinsic.
696 Intrinsic::ID IID
= StringSwitch
<Intrinsic::ID
>(Name
)
697 .Cases("brev32", "brev64", Intrinsic::bitreverse
)
698 .Case("clz.i", Intrinsic::ctlz
)
699 .Case("popc.i", Intrinsic::ctpop
)
700 .Default(Intrinsic::not_intrinsic
);
701 if (IID
!= Intrinsic::not_intrinsic
&& F
->arg_size() == 1) {
702 NewFn
= Intrinsic::getDeclaration(F
->getParent(), IID
,
703 {F
->getReturnType()});
707 // The following nvvm intrinsics correspond exactly to an LLVM idiom, but
708 // not to an intrinsic alone. We expand them in UpgradeIntrinsicCall.
710 // TODO: We could add lohi.i2d.
711 bool Expand
= StringSwitch
<bool>(Name
)
712 .Cases("abs.i", "abs.ll", true)
713 .Cases("clz.ll", "popc.ll", "h2f", true)
714 .Cases("max.i", "max.ll", "max.ui", "max.ull", true)
715 .Cases("min.i", "min.ll", "min.ui", "min.ull", true)
725 // We only need to change the name to match the mangling including the
727 if (Name
.startswith("objectsize.")) {
728 Type
*Tys
[2] = { F
->getReturnType(), F
->arg_begin()->getType() };
729 if (F
->arg_size() == 2 ||
730 F
->getName() != Intrinsic::getName(Intrinsic::objectsize
, Tys
)) {
732 NewFn
= Intrinsic::getDeclaration(F
->getParent(), Intrinsic::objectsize
,
740 if (Name
== "stackprotectorcheck") {
747 if (UpgradeX86IntrinsicFunction(F
, Name
, NewFn
))
750 // Remangle our intrinsic since we upgrade the mangling
751 auto Result
= llvm::Intrinsic::remangleIntrinsicFunction(F
);
752 if (Result
!= None
) {
753 NewFn
= Result
.getValue();
757 // This may not belong here. This function is effectively being overloaded
758 // to both detect an intrinsic which needs upgrading, and to provide the
759 // upgraded form of the intrinsic. We should perhaps have two separate
760 // functions for this.
764 bool llvm::UpgradeIntrinsicFunction(Function
*F
, Function
*&NewFn
) {
766 bool Upgraded
= UpgradeIntrinsicFunction1(F
, NewFn
);
767 assert(F
!= NewFn
&& "Intrinsic function upgraded to the same function");
769 // Upgrade intrinsic attributes. This does not change the function.
772 if (Intrinsic::ID id
= F
->getIntrinsicID())
773 F
->setAttributes(Intrinsic::getAttributes(F
->getContext(), id
));
777 bool llvm::UpgradeGlobalVariable(GlobalVariable
*GV
) {
778 // Nothing to do yet.
782 // Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them
784 static Value
*UpgradeX86PSLLDQIntrinsics(IRBuilder
<> &Builder
,
785 Value
*Op
, unsigned Shift
) {
786 Type
*ResultTy
= Op
->getType();
787 unsigned NumElts
= ResultTy
->getVectorNumElements() * 8;
789 // Bitcast from a 64-bit element type to a byte element type.
790 Type
*VecTy
= VectorType::get(Builder
.getInt8Ty(), NumElts
);
791 Op
= Builder
.CreateBitCast(Op
, VecTy
, "cast");
793 // We'll be shuffling in zeroes.
794 Value
*Res
= Constant::getNullValue(VecTy
);
796 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
797 // we'll just return the zero vector.
800 // 256/512-bit version is split into 2/4 16-byte lanes.
801 for (unsigned l
= 0; l
!= NumElts
; l
+= 16)
802 for (unsigned i
= 0; i
!= 16; ++i
) {
803 unsigned Idx
= NumElts
+ i
- Shift
;
805 Idx
-= NumElts
- 16; // end of lane, switch operand.
806 Idxs
[l
+ i
] = Idx
+ l
;
809 Res
= Builder
.CreateShuffleVector(Res
, Op
, makeArrayRef(Idxs
, NumElts
));
812 // Bitcast back to a 64-bit element type.
813 return Builder
.CreateBitCast(Res
, ResultTy
, "cast");
816 // Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them
818 static Value
*UpgradeX86PSRLDQIntrinsics(IRBuilder
<> &Builder
, Value
*Op
,
820 Type
*ResultTy
= Op
->getType();
821 unsigned NumElts
= ResultTy
->getVectorNumElements() * 8;
823 // Bitcast from a 64-bit element type to a byte element type.
824 Type
*VecTy
= VectorType::get(Builder
.getInt8Ty(), NumElts
);
825 Op
= Builder
.CreateBitCast(Op
, VecTy
, "cast");
827 // We'll be shuffling in zeroes.
828 Value
*Res
= Constant::getNullValue(VecTy
);
830 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
831 // we'll just return the zero vector.
834 // 256/512-bit version is split into 2/4 16-byte lanes.
835 for (unsigned l
= 0; l
!= NumElts
; l
+= 16)
836 for (unsigned i
= 0; i
!= 16; ++i
) {
837 unsigned Idx
= i
+ Shift
;
839 Idx
+= NumElts
- 16; // end of lane, switch operand.
840 Idxs
[l
+ i
] = Idx
+ l
;
843 Res
= Builder
.CreateShuffleVector(Op
, Res
, makeArrayRef(Idxs
, NumElts
));
846 // Bitcast back to a 64-bit element type.
847 return Builder
.CreateBitCast(Res
, ResultTy
, "cast");
850 static Value
*getX86MaskVec(IRBuilder
<> &Builder
, Value
*Mask
,
852 llvm::VectorType
*MaskTy
= llvm::VectorType::get(Builder
.getInt1Ty(),
853 cast
<IntegerType
>(Mask
->getType())->getBitWidth());
854 Mask
= Builder
.CreateBitCast(Mask
, MaskTy
);
856 // If we have less than 8 elements, then the starting mask was an i8 and
857 // we need to extract down to the right number of elements.
860 for (unsigned i
= 0; i
!= NumElts
; ++i
)
862 Mask
= Builder
.CreateShuffleVector(Mask
, Mask
,
863 makeArrayRef(Indices
, NumElts
),
870 static Value
*EmitX86Select(IRBuilder
<> &Builder
, Value
*Mask
,
871 Value
*Op0
, Value
*Op1
) {
872 // If the mask is all ones just emit the first operation.
873 if (const auto *C
= dyn_cast
<Constant
>(Mask
))
874 if (C
->isAllOnesValue())
877 Mask
= getX86MaskVec(Builder
, Mask
, Op0
->getType()->getVectorNumElements());
878 return Builder
.CreateSelect(Mask
, Op0
, Op1
);
881 static Value
*EmitX86ScalarSelect(IRBuilder
<> &Builder
, Value
*Mask
,
882 Value
*Op0
, Value
*Op1
) {
883 // If the mask is all ones just emit the first operation.
884 if (const auto *C
= dyn_cast
<Constant
>(Mask
))
885 if (C
->isAllOnesValue())
888 llvm::VectorType
*MaskTy
=
889 llvm::VectorType::get(Builder
.getInt1Ty(),
890 Mask
->getType()->getIntegerBitWidth());
891 Mask
= Builder
.CreateBitCast(Mask
, MaskTy
);
892 Mask
= Builder
.CreateExtractElement(Mask
, (uint64_t)0);
893 return Builder
.CreateSelect(Mask
, Op0
, Op1
);
896 // Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics.
897 // PALIGNR handles large immediates by shifting while VALIGN masks the immediate
898 // so we need to handle both cases. VALIGN also doesn't have 128-bit lanes.
899 static Value
*UpgradeX86ALIGNIntrinsics(IRBuilder
<> &Builder
, Value
*Op0
,
900 Value
*Op1
, Value
*Shift
,
901 Value
*Passthru
, Value
*Mask
,
903 unsigned ShiftVal
= cast
<llvm::ConstantInt
>(Shift
)->getZExtValue();
905 unsigned NumElts
= Op0
->getType()->getVectorNumElements();
906 assert((IsVALIGN
|| NumElts
% 16 == 0) && "Illegal NumElts for PALIGNR!");
907 assert((!IsVALIGN
|| NumElts
<= 16) && "NumElts too large for VALIGN!");
908 assert(isPowerOf2_32(NumElts
) && "NumElts not a power of 2!");
910 // Mask the immediate for VALIGN.
912 ShiftVal
&= (NumElts
- 1);
914 // If palignr is shifting the pair of vectors more than the size of two
917 return llvm::Constant::getNullValue(Op0
->getType());
919 // If palignr is shifting the pair of input vectors more than one lane,
920 // but less than two lanes, convert to shifting in zeroes.
924 Op0
= llvm::Constant::getNullValue(Op0
->getType());
927 uint32_t Indices
[64];
928 // 256-bit palignr operates on 128-bit lanes so we need to handle that
929 for (unsigned l
= 0; l
< NumElts
; l
+= 16) {
930 for (unsigned i
= 0; i
!= 16; ++i
) {
931 unsigned Idx
= ShiftVal
+ i
;
932 if (!IsVALIGN
&& Idx
>= 16) // Disable wrap for VALIGN.
933 Idx
+= NumElts
- 16; // End of lane, switch operand.
934 Indices
[l
+ i
] = Idx
+ l
;
938 Value
*Align
= Builder
.CreateShuffleVector(Op1
, Op0
,
939 makeArrayRef(Indices
, NumElts
),
942 return EmitX86Select(Builder
, Mask
, Align
, Passthru
);
945 static Value
*UpgradeX86AddSubSatIntrinsics(IRBuilder
<> &Builder
, CallInst
&CI
,
947 Value
*Op0
= CI
.getOperand(0);
948 Value
*Op1
= CI
.getOperand(1);
950 // Collect vector elements and type data.
951 Type
*ResultType
= CI
.getType();
955 // ADDUS: a > (a+b) ? ~0 : (a+b)
956 // If Op0 > Add, overflow occured.
957 Value
*Add
= Builder
.CreateAdd(Op0
, Op1
);
958 Value
*ICmp
= Builder
.CreateICmp(ICmpInst::ICMP_UGT
, Op0
, Add
);
959 Value
*Max
= llvm::Constant::getAllOnesValue(ResultType
);
960 Res
= Builder
.CreateSelect(ICmp
, Max
, Add
);
962 // SUBUS: max(a, b) - b
963 Value
*ICmp
= Builder
.CreateICmp(ICmpInst::ICMP_UGT
, Op0
, Op1
);
964 Value
*Select
= Builder
.CreateSelect(ICmp
, Op0
, Op1
);
965 Res
= Builder
.CreateSub(Select
, Op1
);
968 if (CI
.getNumArgOperands() == 4) { // For masked intrinsics.
969 Value
*VecSrc
= CI
.getOperand(2);
970 Value
*Mask
= CI
.getOperand(3);
971 Res
= EmitX86Select(Builder
, Mask
, Res
, VecSrc
);
976 static Value
*UpgradeMaskedStore(IRBuilder
<> &Builder
,
977 Value
*Ptr
, Value
*Data
, Value
*Mask
,
979 // Cast the pointer to the right type.
980 Ptr
= Builder
.CreateBitCast(Ptr
,
981 llvm::PointerType::getUnqual(Data
->getType()));
983 Aligned
? cast
<VectorType
>(Data
->getType())->getBitWidth() / 8 : 1;
985 // If the mask is all ones just emit a regular store.
986 if (const auto *C
= dyn_cast
<Constant
>(Mask
))
987 if (C
->isAllOnesValue())
988 return Builder
.CreateAlignedStore(Data
, Ptr
, Align
);
990 // Convert the mask from an integer type to a vector of i1.
991 unsigned NumElts
= Data
->getType()->getVectorNumElements();
992 Mask
= getX86MaskVec(Builder
, Mask
, NumElts
);
993 return Builder
.CreateMaskedStore(Data
, Ptr
, Align
, Mask
);
996 static Value
*UpgradeMaskedLoad(IRBuilder
<> &Builder
,
997 Value
*Ptr
, Value
*Passthru
, Value
*Mask
,
999 // Cast the pointer to the right type.
1000 Ptr
= Builder
.CreateBitCast(Ptr
,
1001 llvm::PointerType::getUnqual(Passthru
->getType()));
1003 Aligned
? cast
<VectorType
>(Passthru
->getType())->getBitWidth() / 8 : 1;
1005 // If the mask is all ones just emit a regular store.
1006 if (const auto *C
= dyn_cast
<Constant
>(Mask
))
1007 if (C
->isAllOnesValue())
1008 return Builder
.CreateAlignedLoad(Ptr
, Align
);
1010 // Convert the mask from an integer type to a vector of i1.
1011 unsigned NumElts
= Passthru
->getType()->getVectorNumElements();
1012 Mask
= getX86MaskVec(Builder
, Mask
, NumElts
);
1013 return Builder
.CreateMaskedLoad(Ptr
, Align
, Mask
, Passthru
);
1016 static Value
*upgradeAbs(IRBuilder
<> &Builder
, CallInst
&CI
) {
1017 Value
*Op0
= CI
.getArgOperand(0);
1018 llvm::Type
*Ty
= Op0
->getType();
1019 Value
*Zero
= llvm::Constant::getNullValue(Ty
);
1020 Value
*Cmp
= Builder
.CreateICmp(ICmpInst::ICMP_SGT
, Op0
, Zero
);
1021 Value
*Neg
= Builder
.CreateNeg(Op0
);
1022 Value
*Res
= Builder
.CreateSelect(Cmp
, Op0
, Neg
);
1024 if (CI
.getNumArgOperands() == 3)
1025 Res
= EmitX86Select(Builder
,CI
.getArgOperand(2), Res
, CI
.getArgOperand(1));
1030 static Value
*upgradeIntMinMax(IRBuilder
<> &Builder
, CallInst
&CI
,
1031 ICmpInst::Predicate Pred
) {
1032 Value
*Op0
= CI
.getArgOperand(0);
1033 Value
*Op1
= CI
.getArgOperand(1);
1034 Value
*Cmp
= Builder
.CreateICmp(Pred
, Op0
, Op1
);
1035 Value
*Res
= Builder
.CreateSelect(Cmp
, Op0
, Op1
);
1037 if (CI
.getNumArgOperands() == 4)
1038 Res
= EmitX86Select(Builder
, CI
.getArgOperand(3), Res
, CI
.getArgOperand(2));
1043 static Value
*upgradePMULDQ(IRBuilder
<> &Builder
, CallInst
&CI
, bool IsSigned
) {
1044 Type
*Ty
= CI
.getType();
1046 // Arguments have a vXi32 type so cast to vXi64.
1047 Value
*LHS
= Builder
.CreateBitCast(CI
.getArgOperand(0), Ty
);
1048 Value
*RHS
= Builder
.CreateBitCast(CI
.getArgOperand(1), Ty
);
1051 // Shift left then arithmetic shift right.
1052 Constant
*ShiftAmt
= ConstantInt::get(Ty
, 32);
1053 LHS
= Builder
.CreateShl(LHS
, ShiftAmt
);
1054 LHS
= Builder
.CreateAShr(LHS
, ShiftAmt
);
1055 RHS
= Builder
.CreateShl(RHS
, ShiftAmt
);
1056 RHS
= Builder
.CreateAShr(RHS
, ShiftAmt
);
1058 // Clear the upper bits.
1059 Constant
*Mask
= ConstantInt::get(Ty
, 0xffffffff);
1060 LHS
= Builder
.CreateAnd(LHS
, Mask
);
1061 RHS
= Builder
.CreateAnd(RHS
, Mask
);
1064 Value
*Res
= Builder
.CreateMul(LHS
, RHS
);
1066 if (CI
.getNumArgOperands() == 4)
1067 Res
= EmitX86Select(Builder
, CI
.getArgOperand(3), Res
, CI
.getArgOperand(2));
1072 // Applying mask on vector of i1's and make sure result is at least 8 bits wide.
1073 static Value
*ApplyX86MaskOn1BitsVec(IRBuilder
<> &Builder
, Value
*Vec
,
1075 unsigned NumElts
= Vec
->getType()->getVectorNumElements();
1077 const auto *C
= dyn_cast
<Constant
>(Mask
);
1078 if (!C
|| !C
->isAllOnesValue())
1079 Vec
= Builder
.CreateAnd(Vec
, getX86MaskVec(Builder
, Mask
, NumElts
));
1083 uint32_t Indices
[8];
1084 for (unsigned i
= 0; i
!= NumElts
; ++i
)
1086 for (unsigned i
= NumElts
; i
!= 8; ++i
)
1087 Indices
[i
] = NumElts
+ i
% NumElts
;
1088 Vec
= Builder
.CreateShuffleVector(Vec
,
1089 Constant::getNullValue(Vec
->getType()),
1092 return Builder
.CreateBitCast(Vec
, Builder
.getIntNTy(std::max(NumElts
, 8U)));
1095 static Value
*upgradeMaskedCompare(IRBuilder
<> &Builder
, CallInst
&CI
,
1096 unsigned CC
, bool Signed
) {
1097 Value
*Op0
= CI
.getArgOperand(0);
1098 unsigned NumElts
= Op0
->getType()->getVectorNumElements();
1102 Cmp
= Constant::getNullValue(llvm::VectorType::get(Builder
.getInt1Ty(), NumElts
));
1103 } else if (CC
== 7) {
1104 Cmp
= Constant::getAllOnesValue(llvm::VectorType::get(Builder
.getInt1Ty(), NumElts
));
1106 ICmpInst::Predicate Pred
;
1108 default: llvm_unreachable("Unknown condition code");
1109 case 0: Pred
= ICmpInst::ICMP_EQ
; break;
1110 case 1: Pred
= Signed
? ICmpInst::ICMP_SLT
: ICmpInst::ICMP_ULT
; break;
1111 case 2: Pred
= Signed
? ICmpInst::ICMP_SLE
: ICmpInst::ICMP_ULE
; break;
1112 case 4: Pred
= ICmpInst::ICMP_NE
; break;
1113 case 5: Pred
= Signed
? ICmpInst::ICMP_SGE
: ICmpInst::ICMP_UGE
; break;
1114 case 6: Pred
= Signed
? ICmpInst::ICMP_SGT
: ICmpInst::ICMP_UGT
; break;
1116 Cmp
= Builder
.CreateICmp(Pred
, Op0
, CI
.getArgOperand(1));
1119 Value
*Mask
= CI
.getArgOperand(CI
.getNumArgOperands() - 1);
1121 return ApplyX86MaskOn1BitsVec(Builder
, Cmp
, Mask
);
1124 // Replace a masked intrinsic with an older unmasked intrinsic.
1125 static Value
*UpgradeX86MaskedShift(IRBuilder
<> &Builder
, CallInst
&CI
,
1126 Intrinsic::ID IID
) {
1127 Function
*Intrin
= Intrinsic::getDeclaration(CI
.getModule(), IID
);
1128 Value
*Rep
= Builder
.CreateCall(Intrin
,
1129 { CI
.getArgOperand(0), CI
.getArgOperand(1) });
1130 return EmitX86Select(Builder
, CI
.getArgOperand(3), Rep
, CI
.getArgOperand(2));
1133 static Value
* upgradeMaskedMove(IRBuilder
<> &Builder
, CallInst
&CI
) {
1134 Value
* A
= CI
.getArgOperand(0);
1135 Value
* B
= CI
.getArgOperand(1);
1136 Value
* Src
= CI
.getArgOperand(2);
1137 Value
* Mask
= CI
.getArgOperand(3);
1139 Value
* AndNode
= Builder
.CreateAnd(Mask
, APInt(8, 1));
1140 Value
* Cmp
= Builder
.CreateIsNotNull(AndNode
);
1141 Value
* Extract1
= Builder
.CreateExtractElement(B
, (uint64_t)0);
1142 Value
* Extract2
= Builder
.CreateExtractElement(Src
, (uint64_t)0);
1143 Value
* Select
= Builder
.CreateSelect(Cmp
, Extract1
, Extract2
);
1144 return Builder
.CreateInsertElement(A
, Select
, (uint64_t)0);
1148 static Value
* UpgradeMaskToInt(IRBuilder
<> &Builder
, CallInst
&CI
) {
1149 Value
* Op
= CI
.getArgOperand(0);
1150 Type
* ReturnOp
= CI
.getType();
1151 unsigned NumElts
= CI
.getType()->getVectorNumElements();
1152 Value
*Mask
= getX86MaskVec(Builder
, Op
, NumElts
);
1153 return Builder
.CreateSExt(Mask
, ReturnOp
, "vpmovm2");
1156 // Replace intrinsic with unmasked version and a select.
1157 static bool upgradeAVX512MaskToSelect(StringRef Name
, IRBuilder
<> &Builder
,
1158 CallInst
&CI
, Value
*&Rep
) {
1159 Name
= Name
.substr(12); // Remove avx512.mask.
1161 unsigned VecWidth
= CI
.getType()->getPrimitiveSizeInBits();
1162 unsigned EltWidth
= CI
.getType()->getScalarSizeInBits();
1164 if (Name
.startswith("max.p")) {
1165 if (VecWidth
== 128 && EltWidth
== 32)
1166 IID
= Intrinsic::x86_sse_max_ps
;
1167 else if (VecWidth
== 128 && EltWidth
== 64)
1168 IID
= Intrinsic::x86_sse2_max_pd
;
1169 else if (VecWidth
== 256 && EltWidth
== 32)
1170 IID
= Intrinsic::x86_avx_max_ps_256
;
1171 else if (VecWidth
== 256 && EltWidth
== 64)
1172 IID
= Intrinsic::x86_avx_max_pd_256
;
1174 llvm_unreachable("Unexpected intrinsic");
1175 } else if (Name
.startswith("min.p")) {
1176 if (VecWidth
== 128 && EltWidth
== 32)
1177 IID
= Intrinsic::x86_sse_min_ps
;
1178 else if (VecWidth
== 128 && EltWidth
== 64)
1179 IID
= Intrinsic::x86_sse2_min_pd
;
1180 else if (VecWidth
== 256 && EltWidth
== 32)
1181 IID
= Intrinsic::x86_avx_min_ps_256
;
1182 else if (VecWidth
== 256 && EltWidth
== 64)
1183 IID
= Intrinsic::x86_avx_min_pd_256
;
1185 llvm_unreachable("Unexpected intrinsic");
1186 } else if (Name
.startswith("pshuf.b.")) {
1187 if (VecWidth
== 128)
1188 IID
= Intrinsic::x86_ssse3_pshuf_b_128
;
1189 else if (VecWidth
== 256)
1190 IID
= Intrinsic::x86_avx2_pshuf_b
;
1191 else if (VecWidth
== 512)
1192 IID
= Intrinsic::x86_avx512_pshuf_b_512
;
1194 llvm_unreachable("Unexpected intrinsic");
1195 } else if (Name
.startswith("pmul.hr.sw.")) {
1196 if (VecWidth
== 128)
1197 IID
= Intrinsic::x86_ssse3_pmul_hr_sw_128
;
1198 else if (VecWidth
== 256)
1199 IID
= Intrinsic::x86_avx2_pmul_hr_sw
;
1200 else if (VecWidth
== 512)
1201 IID
= Intrinsic::x86_avx512_pmul_hr_sw_512
;
1203 llvm_unreachable("Unexpected intrinsic");
1204 } else if (Name
.startswith("pmulh.w.")) {
1205 if (VecWidth
== 128)
1206 IID
= Intrinsic::x86_sse2_pmulh_w
;
1207 else if (VecWidth
== 256)
1208 IID
= Intrinsic::x86_avx2_pmulh_w
;
1209 else if (VecWidth
== 512)
1210 IID
= Intrinsic::x86_avx512_pmulh_w_512
;
1212 llvm_unreachable("Unexpected intrinsic");
1213 } else if (Name
.startswith("pmulhu.w.")) {
1214 if (VecWidth
== 128)
1215 IID
= Intrinsic::x86_sse2_pmulhu_w
;
1216 else if (VecWidth
== 256)
1217 IID
= Intrinsic::x86_avx2_pmulhu_w
;
1218 else if (VecWidth
== 512)
1219 IID
= Intrinsic::x86_avx512_pmulhu_w_512
;
1221 llvm_unreachable("Unexpected intrinsic");
1222 } else if (Name
.startswith("pmaddw.d.")) {
1223 if (VecWidth
== 128)
1224 IID
= Intrinsic::x86_sse2_pmadd_wd
;
1225 else if (VecWidth
== 256)
1226 IID
= Intrinsic::x86_avx2_pmadd_wd
;
1227 else if (VecWidth
== 512)
1228 IID
= Intrinsic::x86_avx512_pmaddw_d_512
;
1230 llvm_unreachable("Unexpected intrinsic");
1231 } else if (Name
.startswith("pmaddubs.w.")) {
1232 if (VecWidth
== 128)
1233 IID
= Intrinsic::x86_ssse3_pmadd_ub_sw_128
;
1234 else if (VecWidth
== 256)
1235 IID
= Intrinsic::x86_avx2_pmadd_ub_sw
;
1236 else if (VecWidth
== 512)
1237 IID
= Intrinsic::x86_avx512_pmaddubs_w_512
;
1239 llvm_unreachable("Unexpected intrinsic");
1240 } else if (Name
.startswith("packsswb.")) {
1241 if (VecWidth
== 128)
1242 IID
= Intrinsic::x86_sse2_packsswb_128
;
1243 else if (VecWidth
== 256)
1244 IID
= Intrinsic::x86_avx2_packsswb
;
1245 else if (VecWidth
== 512)
1246 IID
= Intrinsic::x86_avx512_packsswb_512
;
1248 llvm_unreachable("Unexpected intrinsic");
1249 } else if (Name
.startswith("packssdw.")) {
1250 if (VecWidth
== 128)
1251 IID
= Intrinsic::x86_sse2_packssdw_128
;
1252 else if (VecWidth
== 256)
1253 IID
= Intrinsic::x86_avx2_packssdw
;
1254 else if (VecWidth
== 512)
1255 IID
= Intrinsic::x86_avx512_packssdw_512
;
1257 llvm_unreachable("Unexpected intrinsic");
1258 } else if (Name
.startswith("packuswb.")) {
1259 if (VecWidth
== 128)
1260 IID
= Intrinsic::x86_sse2_packuswb_128
;
1261 else if (VecWidth
== 256)
1262 IID
= Intrinsic::x86_avx2_packuswb
;
1263 else if (VecWidth
== 512)
1264 IID
= Intrinsic::x86_avx512_packuswb_512
;
1266 llvm_unreachable("Unexpected intrinsic");
1267 } else if (Name
.startswith("packusdw.")) {
1268 if (VecWidth
== 128)
1269 IID
= Intrinsic::x86_sse41_packusdw
;
1270 else if (VecWidth
== 256)
1271 IID
= Intrinsic::x86_avx2_packusdw
;
1272 else if (VecWidth
== 512)
1273 IID
= Intrinsic::x86_avx512_packusdw_512
;
1275 llvm_unreachable("Unexpected intrinsic");
1276 } else if (Name
.startswith("vpermilvar.")) {
1277 if (VecWidth
== 128 && EltWidth
== 32)
1278 IID
= Intrinsic::x86_avx_vpermilvar_ps
;
1279 else if (VecWidth
== 128 && EltWidth
== 64)
1280 IID
= Intrinsic::x86_avx_vpermilvar_pd
;
1281 else if (VecWidth
== 256 && EltWidth
== 32)
1282 IID
= Intrinsic::x86_avx_vpermilvar_ps_256
;
1283 else if (VecWidth
== 256 && EltWidth
== 64)
1284 IID
= Intrinsic::x86_avx_vpermilvar_pd_256
;
1285 else if (VecWidth
== 512 && EltWidth
== 32)
1286 IID
= Intrinsic::x86_avx512_vpermilvar_ps_512
;
1287 else if (VecWidth
== 512 && EltWidth
== 64)
1288 IID
= Intrinsic::x86_avx512_vpermilvar_pd_512
;
1290 llvm_unreachable("Unexpected intrinsic");
1291 } else if (Name
== "cvtpd2dq.256") {
1292 IID
= Intrinsic::x86_avx_cvt_pd2dq_256
;
1293 } else if (Name
== "cvtpd2ps.256") {
1294 IID
= Intrinsic::x86_avx_cvt_pd2_ps_256
;
1295 } else if (Name
== "cvttpd2dq.256") {
1296 IID
= Intrinsic::x86_avx_cvtt_pd2dq_256
;
1297 } else if (Name
== "cvttps2dq.128") {
1298 IID
= Intrinsic::x86_sse2_cvttps2dq
;
1299 } else if (Name
== "cvttps2dq.256") {
1300 IID
= Intrinsic::x86_avx_cvtt_ps2dq_256
;
1301 } else if (Name
.startswith("permvar.")) {
1302 bool IsFloat
= CI
.getType()->isFPOrFPVectorTy();
1303 if (VecWidth
== 256 && EltWidth
== 32 && IsFloat
)
1304 IID
= Intrinsic::x86_avx2_permps
;
1305 else if (VecWidth
== 256 && EltWidth
== 32 && !IsFloat
)
1306 IID
= Intrinsic::x86_avx2_permd
;
1307 else if (VecWidth
== 256 && EltWidth
== 64 && IsFloat
)
1308 IID
= Intrinsic::x86_avx512_permvar_df_256
;
1309 else if (VecWidth
== 256 && EltWidth
== 64 && !IsFloat
)
1310 IID
= Intrinsic::x86_avx512_permvar_di_256
;
1311 else if (VecWidth
== 512 && EltWidth
== 32 && IsFloat
)
1312 IID
= Intrinsic::x86_avx512_permvar_sf_512
;
1313 else if (VecWidth
== 512 && EltWidth
== 32 && !IsFloat
)
1314 IID
= Intrinsic::x86_avx512_permvar_si_512
;
1315 else if (VecWidth
== 512 && EltWidth
== 64 && IsFloat
)
1316 IID
= Intrinsic::x86_avx512_permvar_df_512
;
1317 else if (VecWidth
== 512 && EltWidth
== 64 && !IsFloat
)
1318 IID
= Intrinsic::x86_avx512_permvar_di_512
;
1319 else if (VecWidth
== 128 && EltWidth
== 16)
1320 IID
= Intrinsic::x86_avx512_permvar_hi_128
;
1321 else if (VecWidth
== 256 && EltWidth
== 16)
1322 IID
= Intrinsic::x86_avx512_permvar_hi_256
;
1323 else if (VecWidth
== 512 && EltWidth
== 16)
1324 IID
= Intrinsic::x86_avx512_permvar_hi_512
;
1325 else if (VecWidth
== 128 && EltWidth
== 8)
1326 IID
= Intrinsic::x86_avx512_permvar_qi_128
;
1327 else if (VecWidth
== 256 && EltWidth
== 8)
1328 IID
= Intrinsic::x86_avx512_permvar_qi_256
;
1329 else if (VecWidth
== 512 && EltWidth
== 8)
1330 IID
= Intrinsic::x86_avx512_permvar_qi_512
;
1332 llvm_unreachable("Unexpected intrinsic");
1333 } else if (Name
.startswith("dbpsadbw.")) {
1334 if (VecWidth
== 128)
1335 IID
= Intrinsic::x86_avx512_dbpsadbw_128
;
1336 else if (VecWidth
== 256)
1337 IID
= Intrinsic::x86_avx512_dbpsadbw_256
;
1338 else if (VecWidth
== 512)
1339 IID
= Intrinsic::x86_avx512_dbpsadbw_512
;
1341 llvm_unreachable("Unexpected intrinsic");
1342 } else if (Name
.startswith("vpshld.")) {
1343 if (VecWidth
== 128 && Name
[7] == 'q')
1344 IID
= Intrinsic::x86_avx512_vpshld_q_128
;
1345 else if (VecWidth
== 128 && Name
[7] == 'd')
1346 IID
= Intrinsic::x86_avx512_vpshld_d_128
;
1347 else if (VecWidth
== 128 && Name
[7] == 'w')
1348 IID
= Intrinsic::x86_avx512_vpshld_w_128
;
1349 else if (VecWidth
== 256 && Name
[7] == 'q')
1350 IID
= Intrinsic::x86_avx512_vpshld_q_256
;
1351 else if (VecWidth
== 256 && Name
[7] == 'd')
1352 IID
= Intrinsic::x86_avx512_vpshld_d_256
;
1353 else if (VecWidth
== 256 && Name
[7] == 'w')
1354 IID
= Intrinsic::x86_avx512_vpshld_w_256
;
1355 else if (VecWidth
== 512 && Name
[7] == 'q')
1356 IID
= Intrinsic::x86_avx512_vpshld_q_512
;
1357 else if (VecWidth
== 512 && Name
[7] == 'd')
1358 IID
= Intrinsic::x86_avx512_vpshld_d_512
;
1359 else if (VecWidth
== 512 && Name
[7] == 'w')
1360 IID
= Intrinsic::x86_avx512_vpshld_w_512
;
1362 llvm_unreachable("Unexpected intrinsic");
1363 } else if (Name
.startswith("vpshrd.")) {
1364 if (VecWidth
== 128 && Name
[7] == 'q')
1365 IID
= Intrinsic::x86_avx512_vpshrd_q_128
;
1366 else if (VecWidth
== 128 && Name
[7] == 'd')
1367 IID
= Intrinsic::x86_avx512_vpshrd_d_128
;
1368 else if (VecWidth
== 128 && Name
[7] == 'w')
1369 IID
= Intrinsic::x86_avx512_vpshrd_w_128
;
1370 else if (VecWidth
== 256 && Name
[7] == 'q')
1371 IID
= Intrinsic::x86_avx512_vpshrd_q_256
;
1372 else if (VecWidth
== 256 && Name
[7] == 'd')
1373 IID
= Intrinsic::x86_avx512_vpshrd_d_256
;
1374 else if (VecWidth
== 256 && Name
[7] == 'w')
1375 IID
= Intrinsic::x86_avx512_vpshrd_w_256
;
1376 else if (VecWidth
== 512 && Name
[7] == 'q')
1377 IID
= Intrinsic::x86_avx512_vpshrd_q_512
;
1378 else if (VecWidth
== 512 && Name
[7] == 'd')
1379 IID
= Intrinsic::x86_avx512_vpshrd_d_512
;
1380 else if (VecWidth
== 512 && Name
[7] == 'w')
1381 IID
= Intrinsic::x86_avx512_vpshrd_w_512
;
1383 llvm_unreachable("Unexpected intrinsic");
1384 } else if (Name
.startswith("prorv.")) {
1385 if (VecWidth
== 128 && EltWidth
== 32)
1386 IID
= Intrinsic::x86_avx512_prorv_d_128
;
1387 else if (VecWidth
== 256 && EltWidth
== 32)
1388 IID
= Intrinsic::x86_avx512_prorv_d_256
;
1389 else if (VecWidth
== 512 && EltWidth
== 32)
1390 IID
= Intrinsic::x86_avx512_prorv_d_512
;
1391 else if (VecWidth
== 128 && EltWidth
== 64)
1392 IID
= Intrinsic::x86_avx512_prorv_q_128
;
1393 else if (VecWidth
== 256 && EltWidth
== 64)
1394 IID
= Intrinsic::x86_avx512_prorv_q_256
;
1395 else if (VecWidth
== 512 && EltWidth
== 64)
1396 IID
= Intrinsic::x86_avx512_prorv_q_512
;
1398 llvm_unreachable("Unexpected intrinsic");
1399 } else if (Name
.startswith("prolv.")) {
1400 if (VecWidth
== 128 && EltWidth
== 32)
1401 IID
= Intrinsic::x86_avx512_prolv_d_128
;
1402 else if (VecWidth
== 256 && EltWidth
== 32)
1403 IID
= Intrinsic::x86_avx512_prolv_d_256
;
1404 else if (VecWidth
== 512 && EltWidth
== 32)
1405 IID
= Intrinsic::x86_avx512_prolv_d_512
;
1406 else if (VecWidth
== 128 && EltWidth
== 64)
1407 IID
= Intrinsic::x86_avx512_prolv_q_128
;
1408 else if (VecWidth
== 256 && EltWidth
== 64)
1409 IID
= Intrinsic::x86_avx512_prolv_q_256
;
1410 else if (VecWidth
== 512 && EltWidth
== 64)
1411 IID
= Intrinsic::x86_avx512_prolv_q_512
;
1413 llvm_unreachable("Unexpected intrinsic");
1414 } else if (Name
.startswith("pror.")) {
1415 if (VecWidth
== 128 && EltWidth
== 32)
1416 IID
= Intrinsic::x86_avx512_pror_d_128
;
1417 else if (VecWidth
== 256 && EltWidth
== 32)
1418 IID
= Intrinsic::x86_avx512_pror_d_256
;
1419 else if (VecWidth
== 512 && EltWidth
== 32)
1420 IID
= Intrinsic::x86_avx512_pror_d_512
;
1421 else if (VecWidth
== 128 && EltWidth
== 64)
1422 IID
= Intrinsic::x86_avx512_pror_q_128
;
1423 else if (VecWidth
== 256 && EltWidth
== 64)
1424 IID
= Intrinsic::x86_avx512_pror_q_256
;
1425 else if (VecWidth
== 512 && EltWidth
== 64)
1426 IID
= Intrinsic::x86_avx512_pror_q_512
;
1428 llvm_unreachable("Unexpected intrinsic");
1429 } else if (Name
.startswith("prol.")) {
1430 if (VecWidth
== 128 && EltWidth
== 32)
1431 IID
= Intrinsic::x86_avx512_prol_d_128
;
1432 else if (VecWidth
== 256 && EltWidth
== 32)
1433 IID
= Intrinsic::x86_avx512_prol_d_256
;
1434 else if (VecWidth
== 512 && EltWidth
== 32)
1435 IID
= Intrinsic::x86_avx512_prol_d_512
;
1436 else if (VecWidth
== 128 && EltWidth
== 64)
1437 IID
= Intrinsic::x86_avx512_prol_q_128
;
1438 else if (VecWidth
== 256 && EltWidth
== 64)
1439 IID
= Intrinsic::x86_avx512_prol_q_256
;
1440 else if (VecWidth
== 512 && EltWidth
== 64)
1441 IID
= Intrinsic::x86_avx512_prol_q_512
;
1443 llvm_unreachable("Unexpected intrinsic");
1444 } else if (Name
.startswith("padds.")) {
1445 if (VecWidth
== 128 && EltWidth
== 8)
1446 IID
= Intrinsic::x86_sse2_padds_b
;
1447 else if (VecWidth
== 256 && EltWidth
== 8)
1448 IID
= Intrinsic::x86_avx2_padds_b
;
1449 else if (VecWidth
== 512 && EltWidth
== 8)
1450 IID
= Intrinsic::x86_avx512_padds_b_512
;
1451 else if (VecWidth
== 128 && EltWidth
== 16)
1452 IID
= Intrinsic::x86_sse2_padds_w
;
1453 else if (VecWidth
== 256 && EltWidth
== 16)
1454 IID
= Intrinsic::x86_avx2_padds_w
;
1455 else if (VecWidth
== 512 && EltWidth
== 16)
1456 IID
= Intrinsic::x86_avx512_padds_w_512
;
1458 llvm_unreachable("Unexpected intrinsic");
1459 } else if (Name
.startswith("psubs.")) {
1460 if (VecWidth
== 128 && EltWidth
== 8)
1461 IID
= Intrinsic::x86_sse2_psubs_b
;
1462 else if (VecWidth
== 256 && EltWidth
== 8)
1463 IID
= Intrinsic::x86_avx2_psubs_b
;
1464 else if (VecWidth
== 512 && EltWidth
== 8)
1465 IID
= Intrinsic::x86_avx512_psubs_b_512
;
1466 else if (VecWidth
== 128 && EltWidth
== 16)
1467 IID
= Intrinsic::x86_sse2_psubs_w
;
1468 else if (VecWidth
== 256 && EltWidth
== 16)
1469 IID
= Intrinsic::x86_avx2_psubs_w
;
1470 else if (VecWidth
== 512 && EltWidth
== 16)
1471 IID
= Intrinsic::x86_avx512_psubs_w_512
;
1473 llvm_unreachable("Unexpected intrinsic");
1477 SmallVector
<Value
*, 4> Args(CI
.arg_operands().begin(),
1478 CI
.arg_operands().end());
1481 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(CI
.getModule(), IID
),
1483 unsigned NumArgs
= CI
.getNumArgOperands();
1484 Rep
= EmitX86Select(Builder
, CI
.getArgOperand(NumArgs
- 1), Rep
,
1485 CI
.getArgOperand(NumArgs
- 2));
1489 /// Upgrade comment in call to inline asm that represents an objc retain release
1491 void llvm::UpgradeInlineAsmString(std::string
*AsmStr
) {
1493 if (AsmStr
->find("mov\tfp") == 0 &&
1494 AsmStr
->find("objc_retainAutoreleaseReturnValue") != std::string::npos
&&
1495 (Pos
= AsmStr
->find("# marker")) != std::string::npos
) {
1496 AsmStr
->replace(Pos
, 1, ";");
1501 /// Upgrade a call to an old intrinsic. All argument and return casting must be
1502 /// provided to seamlessly integrate with existing context.
1503 void llvm::UpgradeIntrinsicCall(CallInst
*CI
, Function
*NewFn
) {
1504 Function
*F
= CI
->getCalledFunction();
1505 LLVMContext
&C
= CI
->getContext();
1506 IRBuilder
<> Builder(C
);
1507 Builder
.SetInsertPoint(CI
->getParent(), CI
->getIterator());
1509 assert(F
&& "Intrinsic call is not direct?");
1512 // Get the Function's name.
1513 StringRef Name
= F
->getName();
1515 assert(Name
.startswith("llvm.") && "Intrinsic doesn't start with 'llvm.'");
1516 Name
= Name
.substr(5);
1518 bool IsX86
= Name
.startswith("x86.");
1520 Name
= Name
.substr(4);
1521 bool IsNVVM
= Name
.startswith("nvvm.");
1523 Name
= Name
.substr(5);
1525 if (IsX86
&& Name
.startswith("sse4a.movnt.")) {
1526 Module
*M
= F
->getParent();
1527 SmallVector
<Metadata
*, 1> Elts
;
1529 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C
), 1)));
1530 MDNode
*Node
= MDNode::get(C
, Elts
);
1532 Value
*Arg0
= CI
->getArgOperand(0);
1533 Value
*Arg1
= CI
->getArgOperand(1);
1535 // Nontemporal (unaligned) store of the 0'th element of the float/double
1537 Type
*SrcEltTy
= cast
<VectorType
>(Arg1
->getType())->getElementType();
1538 PointerType
*EltPtrTy
= PointerType::getUnqual(SrcEltTy
);
1539 Value
*Addr
= Builder
.CreateBitCast(Arg0
, EltPtrTy
, "cast");
1541 Builder
.CreateExtractElement(Arg1
, (uint64_t)0, "extractelement");
1543 StoreInst
*SI
= Builder
.CreateAlignedStore(Extract
, Addr
, 1);
1544 SI
->setMetadata(M
->getMDKindID("nontemporal"), Node
);
1546 // Remove intrinsic.
1547 CI
->eraseFromParent();
1551 if (IsX86
&& (Name
.startswith("avx.movnt.") ||
1552 Name
.startswith("avx512.storent."))) {
1553 Module
*M
= F
->getParent();
1554 SmallVector
<Metadata
*, 1> Elts
;
1556 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C
), 1)));
1557 MDNode
*Node
= MDNode::get(C
, Elts
);
1559 Value
*Arg0
= CI
->getArgOperand(0);
1560 Value
*Arg1
= CI
->getArgOperand(1);
1562 // Convert the type of the pointer to a pointer to the stored type.
1563 Value
*BC
= Builder
.CreateBitCast(Arg0
,
1564 PointerType::getUnqual(Arg1
->getType()),
1566 VectorType
*VTy
= cast
<VectorType
>(Arg1
->getType());
1567 StoreInst
*SI
= Builder
.CreateAlignedStore(Arg1
, BC
,
1568 VTy
->getBitWidth() / 8);
1569 SI
->setMetadata(M
->getMDKindID("nontemporal"), Node
);
1571 // Remove intrinsic.
1572 CI
->eraseFromParent();
1576 if (IsX86
&& Name
== "sse2.storel.dq") {
1577 Value
*Arg0
= CI
->getArgOperand(0);
1578 Value
*Arg1
= CI
->getArgOperand(1);
1580 Type
*NewVecTy
= VectorType::get(Type::getInt64Ty(C
), 2);
1581 Value
*BC0
= Builder
.CreateBitCast(Arg1
, NewVecTy
, "cast");
1582 Value
*Elt
= Builder
.CreateExtractElement(BC0
, (uint64_t)0);
1583 Value
*BC
= Builder
.CreateBitCast(Arg0
,
1584 PointerType::getUnqual(Elt
->getType()),
1586 Builder
.CreateAlignedStore(Elt
, BC
, 1);
1588 // Remove intrinsic.
1589 CI
->eraseFromParent();
1593 if (IsX86
&& (Name
.startswith("sse.storeu.") ||
1594 Name
.startswith("sse2.storeu.") ||
1595 Name
.startswith("avx.storeu."))) {
1596 Value
*Arg0
= CI
->getArgOperand(0);
1597 Value
*Arg1
= CI
->getArgOperand(1);
1599 Arg0
= Builder
.CreateBitCast(Arg0
,
1600 PointerType::getUnqual(Arg1
->getType()),
1602 Builder
.CreateAlignedStore(Arg1
, Arg0
, 1);
1604 // Remove intrinsic.
1605 CI
->eraseFromParent();
1609 if (IsX86
&& Name
== "avx512.mask.store.ss") {
1610 Value
*Mask
= Builder
.CreateAnd(CI
->getArgOperand(2), Builder
.getInt8(1));
1611 UpgradeMaskedStore(Builder
, CI
->getArgOperand(0), CI
->getArgOperand(1),
1614 // Remove intrinsic.
1615 CI
->eraseFromParent();
1619 if (IsX86
&& (Name
.startswith("avx512.mask.store"))) {
1620 // "avx512.mask.storeu." or "avx512.mask.store."
1621 bool Aligned
= Name
[17] != 'u'; // "avx512.mask.storeu".
1622 UpgradeMaskedStore(Builder
, CI
->getArgOperand(0), CI
->getArgOperand(1),
1623 CI
->getArgOperand(2), Aligned
);
1625 // Remove intrinsic.
1626 CI
->eraseFromParent();
1631 // Upgrade packed integer vector compare intrinsics to compare instructions.
1632 if (IsX86
&& (Name
.startswith("sse2.pcmp") ||
1633 Name
.startswith("avx2.pcmp"))) {
1634 // "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt."
1635 bool CmpEq
= Name
[9] == 'e';
1636 Rep
= Builder
.CreateICmp(CmpEq
? ICmpInst::ICMP_EQ
: ICmpInst::ICMP_SGT
,
1637 CI
->getArgOperand(0), CI
->getArgOperand(1));
1638 Rep
= Builder
.CreateSExt(Rep
, CI
->getType(), "");
1639 } else if (IsX86
&& (Name
.startswith("avx512.broadcastm"))) {
1640 Type
*ExtTy
= Type::getInt32Ty(C
);
1641 if (CI
->getOperand(0)->getType()->isIntegerTy(8))
1642 ExtTy
= Type::getInt64Ty(C
);
1643 unsigned NumElts
= CI
->getType()->getPrimitiveSizeInBits() /
1644 ExtTy
->getPrimitiveSizeInBits();
1645 Rep
= Builder
.CreateZExt(CI
->getArgOperand(0), ExtTy
);
1646 Rep
= Builder
.CreateVectorSplat(NumElts
, Rep
);
1647 } else if (IsX86
&& (Name
== "sse.sqrt.ss" ||
1648 Name
== "sse2.sqrt.sd")) {
1649 Value
*Vec
= CI
->getArgOperand(0);
1650 Value
*Elt0
= Builder
.CreateExtractElement(Vec
, (uint64_t)0);
1651 Function
*Intr
= Intrinsic::getDeclaration(F
->getParent(),
1652 Intrinsic::sqrt
, Elt0
->getType());
1653 Elt0
= Builder
.CreateCall(Intr
, Elt0
);
1654 Rep
= Builder
.CreateInsertElement(Vec
, Elt0
, (uint64_t)0);
1655 } else if (IsX86
&& (Name
.startswith("avx.sqrt.p") ||
1656 Name
.startswith("sse2.sqrt.p") ||
1657 Name
.startswith("sse.sqrt.p"))) {
1658 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(F
->getParent(),
1661 {CI
->getArgOperand(0)});
1662 } else if (IsX86
&& (Name
.startswith("avx512.mask.sqrt.p"))) {
1663 if (CI
->getNumArgOperands() == 4 &&
1664 (!isa
<ConstantInt
>(CI
->getArgOperand(3)) ||
1665 cast
<ConstantInt
>(CI
->getArgOperand(3))->getZExtValue() != 4)) {
1666 Intrinsic::ID IID
= Name
[18] == 's' ? Intrinsic::x86_avx512_sqrt_ps_512
1667 : Intrinsic::x86_avx512_sqrt_pd_512
;
1669 Value
*Args
[] = { CI
->getArgOperand(0), CI
->getArgOperand(3) };
1670 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(CI
->getModule(),
1673 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(F
->getParent(),
1676 {CI
->getArgOperand(0)});
1678 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(2), Rep
,
1679 CI
->getArgOperand(1));
1680 } else if (IsX86
&& (Name
.startswith("avx512.ptestm") ||
1681 Name
.startswith("avx512.ptestnm"))) {
1682 Value
*Op0
= CI
->getArgOperand(0);
1683 Value
*Op1
= CI
->getArgOperand(1);
1684 Value
*Mask
= CI
->getArgOperand(2);
1685 Rep
= Builder
.CreateAnd(Op0
, Op1
);
1686 llvm::Type
*Ty
= Op0
->getType();
1687 Value
*Zero
= llvm::Constant::getNullValue(Ty
);
1688 ICmpInst::Predicate Pred
=
1689 Name
.startswith("avx512.ptestm") ? ICmpInst::ICMP_NE
: ICmpInst::ICMP_EQ
;
1690 Rep
= Builder
.CreateICmp(Pred
, Rep
, Zero
);
1691 Rep
= ApplyX86MaskOn1BitsVec(Builder
, Rep
, Mask
);
1692 } else if (IsX86
&& (Name
.startswith("avx512.mask.pbroadcast"))){
1694 CI
->getArgOperand(1)->getType()->getVectorNumElements();
1695 Rep
= Builder
.CreateVectorSplat(NumElts
, CI
->getArgOperand(0));
1696 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(2), Rep
,
1697 CI
->getArgOperand(1));
1698 } else if (IsX86
&& (Name
.startswith("avx512.kunpck"))) {
1699 unsigned NumElts
= CI
->getType()->getScalarSizeInBits();
1700 Value
*LHS
= getX86MaskVec(Builder
, CI
->getArgOperand(0), NumElts
);
1701 Value
*RHS
= getX86MaskVec(Builder
, CI
->getArgOperand(1), NumElts
);
1702 uint32_t Indices
[64];
1703 for (unsigned i
= 0; i
!= NumElts
; ++i
)
1706 // First extract half of each vector. This gives better codegen than
1707 // doing it in a single shuffle.
1708 LHS
= Builder
.CreateShuffleVector(LHS
, LHS
,
1709 makeArrayRef(Indices
, NumElts
/ 2));
1710 RHS
= Builder
.CreateShuffleVector(RHS
, RHS
,
1711 makeArrayRef(Indices
, NumElts
/ 2));
1712 // Concat the vectors.
1713 // NOTE: Operands have to be swapped to match intrinsic definition.
1714 Rep
= Builder
.CreateShuffleVector(RHS
, LHS
,
1715 makeArrayRef(Indices
, NumElts
));
1716 Rep
= Builder
.CreateBitCast(Rep
, CI
->getType());
1717 } else if (IsX86
&& Name
== "avx512.kand.w") {
1718 Value
*LHS
= getX86MaskVec(Builder
, CI
->getArgOperand(0), 16);
1719 Value
*RHS
= getX86MaskVec(Builder
, CI
->getArgOperand(1), 16);
1720 Rep
= Builder
.CreateAnd(LHS
, RHS
);
1721 Rep
= Builder
.CreateBitCast(Rep
, CI
->getType());
1722 } else if (IsX86
&& Name
== "avx512.kandn.w") {
1723 Value
*LHS
= getX86MaskVec(Builder
, CI
->getArgOperand(0), 16);
1724 Value
*RHS
= getX86MaskVec(Builder
, CI
->getArgOperand(1), 16);
1725 LHS
= Builder
.CreateNot(LHS
);
1726 Rep
= Builder
.CreateAnd(LHS
, RHS
);
1727 Rep
= Builder
.CreateBitCast(Rep
, CI
->getType());
1728 } else if (IsX86
&& Name
== "avx512.kor.w") {
1729 Value
*LHS
= getX86MaskVec(Builder
, CI
->getArgOperand(0), 16);
1730 Value
*RHS
= getX86MaskVec(Builder
, CI
->getArgOperand(1), 16);
1731 Rep
= Builder
.CreateOr(LHS
, RHS
);
1732 Rep
= Builder
.CreateBitCast(Rep
, CI
->getType());
1733 } else if (IsX86
&& Name
== "avx512.kxor.w") {
1734 Value
*LHS
= getX86MaskVec(Builder
, CI
->getArgOperand(0), 16);
1735 Value
*RHS
= getX86MaskVec(Builder
, CI
->getArgOperand(1), 16);
1736 Rep
= Builder
.CreateXor(LHS
, RHS
);
1737 Rep
= Builder
.CreateBitCast(Rep
, CI
->getType());
1738 } else if (IsX86
&& Name
== "avx512.kxnor.w") {
1739 Value
*LHS
= getX86MaskVec(Builder
, CI
->getArgOperand(0), 16);
1740 Value
*RHS
= getX86MaskVec(Builder
, CI
->getArgOperand(1), 16);
1741 LHS
= Builder
.CreateNot(LHS
);
1742 Rep
= Builder
.CreateXor(LHS
, RHS
);
1743 Rep
= Builder
.CreateBitCast(Rep
, CI
->getType());
1744 } else if (IsX86
&& Name
== "avx512.knot.w") {
1745 Rep
= getX86MaskVec(Builder
, CI
->getArgOperand(0), 16);
1746 Rep
= Builder
.CreateNot(Rep
);
1747 Rep
= Builder
.CreateBitCast(Rep
, CI
->getType());
1749 (Name
== "avx512.kortestz.w" || Name
== "avx512.kortestc.w")) {
1750 Value
*LHS
= getX86MaskVec(Builder
, CI
->getArgOperand(0), 16);
1751 Value
*RHS
= getX86MaskVec(Builder
, CI
->getArgOperand(1), 16);
1752 Rep
= Builder
.CreateOr(LHS
, RHS
);
1753 Rep
= Builder
.CreateBitCast(Rep
, Builder
.getInt16Ty());
1755 if (Name
[14] == 'c')
1756 C
= ConstantInt::getAllOnesValue(Builder
.getInt16Ty());
1758 C
= ConstantInt::getNullValue(Builder
.getInt16Ty());
1759 Rep
= Builder
.CreateICmpEQ(Rep
, C
);
1760 Rep
= Builder
.CreateZExt(Rep
, Builder
.getInt32Ty());
1761 } else if (IsX86
&& (Name
== "sse.add.ss" || Name
== "sse2.add.sd" ||
1762 Name
== "sse.sub.ss" || Name
== "sse2.sub.sd" ||
1763 Name
== "sse.mul.ss" || Name
== "sse2.mul.sd" ||
1764 Name
== "sse.div.ss" || Name
== "sse2.div.sd")) {
1765 Type
*I32Ty
= Type::getInt32Ty(C
);
1766 Value
*Elt0
= Builder
.CreateExtractElement(CI
->getArgOperand(0),
1767 ConstantInt::get(I32Ty
, 0));
1768 Value
*Elt1
= Builder
.CreateExtractElement(CI
->getArgOperand(1),
1769 ConstantInt::get(I32Ty
, 0));
1771 if (Name
.contains(".add."))
1772 EltOp
= Builder
.CreateFAdd(Elt0
, Elt1
);
1773 else if (Name
.contains(".sub."))
1774 EltOp
= Builder
.CreateFSub(Elt0
, Elt1
);
1775 else if (Name
.contains(".mul."))
1776 EltOp
= Builder
.CreateFMul(Elt0
, Elt1
);
1778 EltOp
= Builder
.CreateFDiv(Elt0
, Elt1
);
1779 Rep
= Builder
.CreateInsertElement(CI
->getArgOperand(0), EltOp
,
1780 ConstantInt::get(I32Ty
, 0));
1781 } else if (IsX86
&& Name
.startswith("avx512.mask.pcmp")) {
1782 // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt."
1783 bool CmpEq
= Name
[16] == 'e';
1784 Rep
= upgradeMaskedCompare(Builder
, *CI
, CmpEq
? 0 : 6, true);
1785 } else if (IsX86
&& Name
.startswith("avx512.mask.fpclass.p")) {
1786 Type
*OpTy
= CI
->getArgOperand(0)->getType();
1787 unsigned VecWidth
= OpTy
->getPrimitiveSizeInBits();
1788 unsigned EltWidth
= OpTy
->getScalarSizeInBits();
1790 if (VecWidth
== 128 && EltWidth
== 32)
1791 IID
= Intrinsic::x86_avx512_fpclass_ps_128
;
1792 else if (VecWidth
== 256 && EltWidth
== 32)
1793 IID
= Intrinsic::x86_avx512_fpclass_ps_256
;
1794 else if (VecWidth
== 512 && EltWidth
== 32)
1795 IID
= Intrinsic::x86_avx512_fpclass_ps_512
;
1796 else if (VecWidth
== 128 && EltWidth
== 64)
1797 IID
= Intrinsic::x86_avx512_fpclass_pd_128
;
1798 else if (VecWidth
== 256 && EltWidth
== 64)
1799 IID
= Intrinsic::x86_avx512_fpclass_pd_256
;
1800 else if (VecWidth
== 512 && EltWidth
== 64)
1801 IID
= Intrinsic::x86_avx512_fpclass_pd_512
;
1803 llvm_unreachable("Unexpected intrinsic");
1805 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(F
->getParent(), IID
),
1806 { CI
->getOperand(0), CI
->getArgOperand(1) });
1807 Rep
= ApplyX86MaskOn1BitsVec(Builder
, Rep
, CI
->getArgOperand(2));
1808 } else if (IsX86
&& Name
.startswith("avx512.mask.cmp.p")) {
1809 Type
*OpTy
= CI
->getArgOperand(0)->getType();
1810 unsigned VecWidth
= OpTy
->getPrimitiveSizeInBits();
1811 unsigned EltWidth
= OpTy
->getScalarSizeInBits();
1813 if (VecWidth
== 128 && EltWidth
== 32)
1814 IID
= Intrinsic::x86_avx512_cmp_ps_128
;
1815 else if (VecWidth
== 256 && EltWidth
== 32)
1816 IID
= Intrinsic::x86_avx512_cmp_ps_256
;
1817 else if (VecWidth
== 512 && EltWidth
== 32)
1818 IID
= Intrinsic::x86_avx512_cmp_ps_512
;
1819 else if (VecWidth
== 128 && EltWidth
== 64)
1820 IID
= Intrinsic::x86_avx512_cmp_pd_128
;
1821 else if (VecWidth
== 256 && EltWidth
== 64)
1822 IID
= Intrinsic::x86_avx512_cmp_pd_256
;
1823 else if (VecWidth
== 512 && EltWidth
== 64)
1824 IID
= Intrinsic::x86_avx512_cmp_pd_512
;
1826 llvm_unreachable("Unexpected intrinsic");
1828 SmallVector
<Value
*, 4> Args
;
1829 Args
.push_back(CI
->getArgOperand(0));
1830 Args
.push_back(CI
->getArgOperand(1));
1831 Args
.push_back(CI
->getArgOperand(2));
1832 if (CI
->getNumArgOperands() == 5)
1833 Args
.push_back(CI
->getArgOperand(4));
1835 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(F
->getParent(), IID
),
1837 Rep
= ApplyX86MaskOn1BitsVec(Builder
, Rep
, CI
->getArgOperand(3));
1838 } else if (IsX86
&& Name
.startswith("avx512.mask.cmp.") &&
1840 // Integer compare intrinsics.
1841 unsigned Imm
= cast
<ConstantInt
>(CI
->getArgOperand(2))->getZExtValue();
1842 Rep
= upgradeMaskedCompare(Builder
, *CI
, Imm
, true);
1843 } else if (IsX86
&& Name
.startswith("avx512.mask.ucmp.")) {
1844 unsigned Imm
= cast
<ConstantInt
>(CI
->getArgOperand(2))->getZExtValue();
1845 Rep
= upgradeMaskedCompare(Builder
, *CI
, Imm
, false);
1846 } else if (IsX86
&& (Name
.startswith("avx512.cvtb2mask.") ||
1847 Name
.startswith("avx512.cvtw2mask.") ||
1848 Name
.startswith("avx512.cvtd2mask.") ||
1849 Name
.startswith("avx512.cvtq2mask."))) {
1850 Value
*Op
= CI
->getArgOperand(0);
1851 Value
*Zero
= llvm::Constant::getNullValue(Op
->getType());
1852 Rep
= Builder
.CreateICmp(ICmpInst::ICMP_SLT
, Op
, Zero
);
1853 Rep
= ApplyX86MaskOn1BitsVec(Builder
, Rep
, nullptr);
1854 } else if(IsX86
&& (Name
== "ssse3.pabs.b.128" ||
1855 Name
== "ssse3.pabs.w.128" ||
1856 Name
== "ssse3.pabs.d.128" ||
1857 Name
.startswith("avx2.pabs") ||
1858 Name
.startswith("avx512.mask.pabs"))) {
1859 Rep
= upgradeAbs(Builder
, *CI
);
1860 } else if (IsX86
&& (Name
== "sse41.pmaxsb" ||
1861 Name
== "sse2.pmaxs.w" ||
1862 Name
== "sse41.pmaxsd" ||
1863 Name
.startswith("avx2.pmaxs") ||
1864 Name
.startswith("avx512.mask.pmaxs"))) {
1865 Rep
= upgradeIntMinMax(Builder
, *CI
, ICmpInst::ICMP_SGT
);
1866 } else if (IsX86
&& (Name
== "sse2.pmaxu.b" ||
1867 Name
== "sse41.pmaxuw" ||
1868 Name
== "sse41.pmaxud" ||
1869 Name
.startswith("avx2.pmaxu") ||
1870 Name
.startswith("avx512.mask.pmaxu"))) {
1871 Rep
= upgradeIntMinMax(Builder
, *CI
, ICmpInst::ICMP_UGT
);
1872 } else if (IsX86
&& (Name
== "sse41.pminsb" ||
1873 Name
== "sse2.pmins.w" ||
1874 Name
== "sse41.pminsd" ||
1875 Name
.startswith("avx2.pmins") ||
1876 Name
.startswith("avx512.mask.pmins"))) {
1877 Rep
= upgradeIntMinMax(Builder
, *CI
, ICmpInst::ICMP_SLT
);
1878 } else if (IsX86
&& (Name
== "sse2.pminu.b" ||
1879 Name
== "sse41.pminuw" ||
1880 Name
== "sse41.pminud" ||
1881 Name
.startswith("avx2.pminu") ||
1882 Name
.startswith("avx512.mask.pminu"))) {
1883 Rep
= upgradeIntMinMax(Builder
, *CI
, ICmpInst::ICMP_ULT
);
1884 } else if (IsX86
&& (Name
== "sse2.pmulu.dq" ||
1885 Name
== "avx2.pmulu.dq" ||
1886 Name
== "avx512.pmulu.dq.512" ||
1887 Name
.startswith("avx512.mask.pmulu.dq."))) {
1888 Rep
= upgradePMULDQ(Builder
, *CI
, /*Signed*/false);
1889 } else if (IsX86
&& (Name
== "sse41.pmuldq" ||
1890 Name
== "avx2.pmul.dq" ||
1891 Name
== "avx512.pmul.dq.512" ||
1892 Name
.startswith("avx512.mask.pmul.dq."))) {
1893 Rep
= upgradePMULDQ(Builder
, *CI
, /*Signed*/true);
1894 } else if (IsX86
&& (Name
== "sse.cvtsi2ss" ||
1895 Name
== "sse2.cvtsi2sd" ||
1896 Name
== "sse.cvtsi642ss" ||
1897 Name
== "sse2.cvtsi642sd")) {
1898 Rep
= Builder
.CreateSIToFP(CI
->getArgOperand(1),
1899 CI
->getType()->getVectorElementType());
1900 Rep
= Builder
.CreateInsertElement(CI
->getArgOperand(0), Rep
, (uint64_t)0);
1901 } else if (IsX86
&& Name
== "avx512.cvtusi2sd") {
1902 Rep
= Builder
.CreateUIToFP(CI
->getArgOperand(1),
1903 CI
->getType()->getVectorElementType());
1904 Rep
= Builder
.CreateInsertElement(CI
->getArgOperand(0), Rep
, (uint64_t)0);
1905 } else if (IsX86
&& Name
== "sse2.cvtss2sd") {
1906 Rep
= Builder
.CreateExtractElement(CI
->getArgOperand(1), (uint64_t)0);
1907 Rep
= Builder
.CreateFPExt(Rep
, CI
->getType()->getVectorElementType());
1908 Rep
= Builder
.CreateInsertElement(CI
->getArgOperand(0), Rep
, (uint64_t)0);
1909 } else if (IsX86
&& (Name
== "sse2.cvtdq2pd" ||
1910 Name
== "sse2.cvtdq2ps" ||
1911 Name
== "avx.cvtdq2.pd.256" ||
1912 Name
== "avx.cvtdq2.ps.256" ||
1913 Name
.startswith("avx512.mask.cvtdq2pd.") ||
1914 Name
.startswith("avx512.mask.cvtudq2pd.") ||
1915 Name
== "avx512.mask.cvtdq2ps.128" ||
1916 Name
== "avx512.mask.cvtdq2ps.256" ||
1917 Name
== "avx512.mask.cvtudq2ps.128" ||
1918 Name
== "avx512.mask.cvtudq2ps.256" ||
1919 Name
== "avx512.mask.cvtqq2pd.128" ||
1920 Name
== "avx512.mask.cvtqq2pd.256" ||
1921 Name
== "avx512.mask.cvtuqq2pd.128" ||
1922 Name
== "avx512.mask.cvtuqq2pd.256" ||
1923 Name
== "sse2.cvtps2pd" ||
1924 Name
== "avx.cvt.ps2.pd.256" ||
1925 Name
== "avx512.mask.cvtps2pd.128" ||
1926 Name
== "avx512.mask.cvtps2pd.256")) {
1927 Type
*DstTy
= CI
->getType();
1928 Rep
= CI
->getArgOperand(0);
1930 unsigned NumDstElts
= DstTy
->getVectorNumElements();
1931 if (NumDstElts
< Rep
->getType()->getVectorNumElements()) {
1932 assert(NumDstElts
== 2 && "Unexpected vector size");
1933 uint32_t ShuffleMask
[2] = { 0, 1 };
1934 Rep
= Builder
.CreateShuffleVector(Rep
, Rep
, ShuffleMask
);
1937 bool IsPS2PD
= (StringRef::npos
!= Name
.find("ps2"));
1938 bool IsUnsigned
= (StringRef::npos
!= Name
.find("cvtu"));
1940 Rep
= Builder
.CreateFPExt(Rep
, DstTy
, "cvtps2pd");
1941 else if (IsUnsigned
)
1942 Rep
= Builder
.CreateUIToFP(Rep
, DstTy
, "cvt");
1944 Rep
= Builder
.CreateSIToFP(Rep
, DstTy
, "cvt");
1946 if (CI
->getNumArgOperands() == 3)
1947 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(2), Rep
,
1948 CI
->getArgOperand(1));
1949 } else if (IsX86
&& (Name
.startswith("avx512.mask.loadu."))) {
1950 Rep
= UpgradeMaskedLoad(Builder
, CI
->getArgOperand(0),
1951 CI
->getArgOperand(1), CI
->getArgOperand(2),
1953 } else if (IsX86
&& (Name
.startswith("avx512.mask.load."))) {
1954 Rep
= UpgradeMaskedLoad(Builder
, CI
->getArgOperand(0),
1955 CI
->getArgOperand(1),CI
->getArgOperand(2),
1957 } else if (IsX86
&& Name
.startswith("avx512.mask.expand.load.")) {
1958 Type
*ResultTy
= CI
->getType();
1959 Type
*PtrTy
= ResultTy
->getVectorElementType();
1961 // Cast the pointer to element type.
1962 Value
*Ptr
= Builder
.CreateBitCast(CI
->getOperand(0),
1963 llvm::PointerType::getUnqual(PtrTy
));
1965 Value
*MaskVec
= getX86MaskVec(Builder
, CI
->getArgOperand(2),
1966 ResultTy
->getVectorNumElements());
1968 Function
*ELd
= Intrinsic::getDeclaration(F
->getParent(),
1969 Intrinsic::masked_expandload
,
1971 Rep
= Builder
.CreateCall(ELd
, { Ptr
, MaskVec
, CI
->getOperand(1) });
1972 } else if (IsX86
&& Name
.startswith("avx512.mask.compress.store.")) {
1973 Type
*ResultTy
= CI
->getArgOperand(1)->getType();
1974 Type
*PtrTy
= ResultTy
->getVectorElementType();
1976 // Cast the pointer to element type.
1977 Value
*Ptr
= Builder
.CreateBitCast(CI
->getOperand(0),
1978 llvm::PointerType::getUnqual(PtrTy
));
1980 Value
*MaskVec
= getX86MaskVec(Builder
, CI
->getArgOperand(2),
1981 ResultTy
->getVectorNumElements());
1983 Function
*CSt
= Intrinsic::getDeclaration(F
->getParent(),
1984 Intrinsic::masked_compressstore
,
1986 Rep
= Builder
.CreateCall(CSt
, { CI
->getArgOperand(1), Ptr
, MaskVec
});
1987 } else if (IsX86
&& Name
.startswith("xop.vpcom")) {
1988 Intrinsic::ID intID
;
1989 if (Name
.endswith("ub"))
1990 intID
= Intrinsic::x86_xop_vpcomub
;
1991 else if (Name
.endswith("uw"))
1992 intID
= Intrinsic::x86_xop_vpcomuw
;
1993 else if (Name
.endswith("ud"))
1994 intID
= Intrinsic::x86_xop_vpcomud
;
1995 else if (Name
.endswith("uq"))
1996 intID
= Intrinsic::x86_xop_vpcomuq
;
1997 else if (Name
.endswith("b"))
1998 intID
= Intrinsic::x86_xop_vpcomb
;
1999 else if (Name
.endswith("w"))
2000 intID
= Intrinsic::x86_xop_vpcomw
;
2001 else if (Name
.endswith("d"))
2002 intID
= Intrinsic::x86_xop_vpcomd
;
2003 else if (Name
.endswith("q"))
2004 intID
= Intrinsic::x86_xop_vpcomq
;
2006 llvm_unreachable("Unknown suffix");
2008 Name
= Name
.substr(9); // strip off "xop.vpcom"
2010 if (Name
.startswith("lt"))
2012 else if (Name
.startswith("le"))
2014 else if (Name
.startswith("gt"))
2016 else if (Name
.startswith("ge"))
2018 else if (Name
.startswith("eq"))
2020 else if (Name
.startswith("ne"))
2022 else if (Name
.startswith("false"))
2024 else if (Name
.startswith("true"))
2027 llvm_unreachable("Unknown condition");
2029 Function
*VPCOM
= Intrinsic::getDeclaration(F
->getParent(), intID
);
2031 Builder
.CreateCall(VPCOM
, {CI
->getArgOperand(0), CI
->getArgOperand(1),
2032 Builder
.getInt8(Imm
)});
2033 } else if (IsX86
&& Name
.startswith("xop.vpcmov")) {
2034 Value
*Sel
= CI
->getArgOperand(2);
2035 Value
*NotSel
= Builder
.CreateNot(Sel
);
2036 Value
*Sel0
= Builder
.CreateAnd(CI
->getArgOperand(0), Sel
);
2037 Value
*Sel1
= Builder
.CreateAnd(CI
->getArgOperand(1), NotSel
);
2038 Rep
= Builder
.CreateOr(Sel0
, Sel1
);
2039 } else if (IsX86
&& Name
== "sse42.crc32.64.8") {
2040 Function
*CRC32
= Intrinsic::getDeclaration(F
->getParent(),
2041 Intrinsic::x86_sse42_crc32_32_8
);
2042 Value
*Trunc0
= Builder
.CreateTrunc(CI
->getArgOperand(0), Type::getInt32Ty(C
));
2043 Rep
= Builder
.CreateCall(CRC32
, {Trunc0
, CI
->getArgOperand(1)});
2044 Rep
= Builder
.CreateZExt(Rep
, CI
->getType(), "");
2045 } else if (IsX86
&& (Name
.startswith("avx.vbroadcast.s") ||
2046 Name
.startswith("avx512.vbroadcast.s"))) {
2047 // Replace broadcasts with a series of insertelements.
2048 Type
*VecTy
= CI
->getType();
2049 Type
*EltTy
= VecTy
->getVectorElementType();
2050 unsigned EltNum
= VecTy
->getVectorNumElements();
2051 Value
*Cast
= Builder
.CreateBitCast(CI
->getArgOperand(0),
2052 EltTy
->getPointerTo());
2053 Value
*Load
= Builder
.CreateLoad(EltTy
, Cast
);
2054 Type
*I32Ty
= Type::getInt32Ty(C
);
2055 Rep
= UndefValue::get(VecTy
);
2056 for (unsigned I
= 0; I
< EltNum
; ++I
)
2057 Rep
= Builder
.CreateInsertElement(Rep
, Load
,
2058 ConstantInt::get(I32Ty
, I
));
2059 } else if (IsX86
&& (Name
.startswith("sse41.pmovsx") ||
2060 Name
.startswith("sse41.pmovzx") ||
2061 Name
.startswith("avx2.pmovsx") ||
2062 Name
.startswith("avx2.pmovzx") ||
2063 Name
.startswith("avx512.mask.pmovsx") ||
2064 Name
.startswith("avx512.mask.pmovzx"))) {
2065 VectorType
*SrcTy
= cast
<VectorType
>(CI
->getArgOperand(0)->getType());
2066 VectorType
*DstTy
= cast
<VectorType
>(CI
->getType());
2067 unsigned NumDstElts
= DstTy
->getNumElements();
2069 // Extract a subvector of the first NumDstElts lanes and sign/zero extend.
2070 SmallVector
<uint32_t, 8> ShuffleMask(NumDstElts
);
2071 for (unsigned i
= 0; i
!= NumDstElts
; ++i
)
2074 Value
*SV
= Builder
.CreateShuffleVector(
2075 CI
->getArgOperand(0), UndefValue::get(SrcTy
), ShuffleMask
);
2077 bool DoSext
= (StringRef::npos
!= Name
.find("pmovsx"));
2078 Rep
= DoSext
? Builder
.CreateSExt(SV
, DstTy
)
2079 : Builder
.CreateZExt(SV
, DstTy
);
2080 // If there are 3 arguments, it's a masked intrinsic so we need a select.
2081 if (CI
->getNumArgOperands() == 3)
2082 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(2), Rep
,
2083 CI
->getArgOperand(1));
2084 } else if (IsX86
&& (Name
.startswith("avx.vbroadcastf128") ||
2085 Name
== "avx2.vbroadcasti128")) {
2086 // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.
2087 Type
*EltTy
= CI
->getType()->getVectorElementType();
2088 unsigned NumSrcElts
= 128 / EltTy
->getPrimitiveSizeInBits();
2089 Type
*VT
= VectorType::get(EltTy
, NumSrcElts
);
2090 Value
*Op
= Builder
.CreatePointerCast(CI
->getArgOperand(0),
2091 PointerType::getUnqual(VT
));
2092 Value
*Load
= Builder
.CreateAlignedLoad(Op
, 1);
2093 if (NumSrcElts
== 2)
2094 Rep
= Builder
.CreateShuffleVector(Load
, UndefValue::get(Load
->getType()),
2097 Rep
= Builder
.CreateShuffleVector(Load
, UndefValue::get(Load
->getType()),
2098 { 0, 1, 2, 3, 0, 1, 2, 3 });
2099 } else if (IsX86
&& (Name
.startswith("avx512.mask.shuf.i") ||
2100 Name
.startswith("avx512.mask.shuf.f"))) {
2101 unsigned Imm
= cast
<ConstantInt
>(CI
->getArgOperand(2))->getZExtValue();
2102 Type
*VT
= CI
->getType();
2103 unsigned NumLanes
= VT
->getPrimitiveSizeInBits() / 128;
2104 unsigned NumElementsInLane
= 128 / VT
->getScalarSizeInBits();
2105 unsigned ControlBitsMask
= NumLanes
- 1;
2106 unsigned NumControlBits
= NumLanes
/ 2;
2107 SmallVector
<uint32_t, 8> ShuffleMask(0);
2109 for (unsigned l
= 0; l
!= NumLanes
; ++l
) {
2110 unsigned LaneMask
= (Imm
>> (l
* NumControlBits
)) & ControlBitsMask
;
2111 // We actually need the other source.
2112 if (l
>= NumLanes
/ 2)
2113 LaneMask
+= NumLanes
;
2114 for (unsigned i
= 0; i
!= NumElementsInLane
; ++i
)
2115 ShuffleMask
.push_back(LaneMask
* NumElementsInLane
+ i
);
2117 Rep
= Builder
.CreateShuffleVector(CI
->getArgOperand(0),
2118 CI
->getArgOperand(1), ShuffleMask
);
2119 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(4), Rep
,
2120 CI
->getArgOperand(3));
2121 }else if (IsX86
&& (Name
.startswith("avx512.mask.broadcastf") ||
2122 Name
.startswith("avx512.mask.broadcasti"))) {
2123 unsigned NumSrcElts
=
2124 CI
->getArgOperand(0)->getType()->getVectorNumElements();
2125 unsigned NumDstElts
= CI
->getType()->getVectorNumElements();
2127 SmallVector
<uint32_t, 8> ShuffleMask(NumDstElts
);
2128 for (unsigned i
= 0; i
!= NumDstElts
; ++i
)
2129 ShuffleMask
[i
] = i
% NumSrcElts
;
2131 Rep
= Builder
.CreateShuffleVector(CI
->getArgOperand(0),
2132 CI
->getArgOperand(0),
2134 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(2), Rep
,
2135 CI
->getArgOperand(1));
2136 } else if (IsX86
&& (Name
.startswith("avx2.pbroadcast") ||
2137 Name
.startswith("avx2.vbroadcast") ||
2138 Name
.startswith("avx512.pbroadcast") ||
2139 Name
.startswith("avx512.mask.broadcast.s"))) {
2140 // Replace vp?broadcasts with a vector shuffle.
2141 Value
*Op
= CI
->getArgOperand(0);
2142 unsigned NumElts
= CI
->getType()->getVectorNumElements();
2143 Type
*MaskTy
= VectorType::get(Type::getInt32Ty(C
), NumElts
);
2144 Rep
= Builder
.CreateShuffleVector(Op
, UndefValue::get(Op
->getType()),
2145 Constant::getNullValue(MaskTy
));
2147 if (CI
->getNumArgOperands() == 3)
2148 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(2), Rep
,
2149 CI
->getArgOperand(1));
2150 } else if (IsX86
&& (Name
.startswith("sse2.paddus.") ||
2151 Name
.startswith("sse2.psubus.") ||
2152 Name
.startswith("avx2.paddus.") ||
2153 Name
.startswith("avx2.psubus.") ||
2154 Name
.startswith("avx512.mask.paddus.") ||
2155 Name
.startswith("avx512.mask.psubus."))) {
2156 bool IsAdd
= Name
.contains(".paddus");
2157 Rep
= UpgradeX86AddSubSatIntrinsics(Builder
, *CI
, IsAdd
);
2158 } else if (IsX86
&& Name
.startswith("avx512.mask.palignr.")) {
2159 Rep
= UpgradeX86ALIGNIntrinsics(Builder
, CI
->getArgOperand(0),
2160 CI
->getArgOperand(1),
2161 CI
->getArgOperand(2),
2162 CI
->getArgOperand(3),
2163 CI
->getArgOperand(4),
2165 } else if (IsX86
&& Name
.startswith("avx512.mask.valign.")) {
2166 Rep
= UpgradeX86ALIGNIntrinsics(Builder
, CI
->getArgOperand(0),
2167 CI
->getArgOperand(1),
2168 CI
->getArgOperand(2),
2169 CI
->getArgOperand(3),
2170 CI
->getArgOperand(4),
2172 } else if (IsX86
&& (Name
== "sse2.psll.dq" ||
2173 Name
== "avx2.psll.dq")) {
2174 // 128/256-bit shift left specified in bits.
2175 unsigned Shift
= cast
<ConstantInt
>(CI
->getArgOperand(1))->getZExtValue();
2176 Rep
= UpgradeX86PSLLDQIntrinsics(Builder
, CI
->getArgOperand(0),
2177 Shift
/ 8); // Shift is in bits.
2178 } else if (IsX86
&& (Name
== "sse2.psrl.dq" ||
2179 Name
== "avx2.psrl.dq")) {
2180 // 128/256-bit shift right specified in bits.
2181 unsigned Shift
= cast
<ConstantInt
>(CI
->getArgOperand(1))->getZExtValue();
2182 Rep
= UpgradeX86PSRLDQIntrinsics(Builder
, CI
->getArgOperand(0),
2183 Shift
/ 8); // Shift is in bits.
2184 } else if (IsX86
&& (Name
== "sse2.psll.dq.bs" ||
2185 Name
== "avx2.psll.dq.bs" ||
2186 Name
== "avx512.psll.dq.512")) {
2187 // 128/256/512-bit shift left specified in bytes.
2188 unsigned Shift
= cast
<ConstantInt
>(CI
->getArgOperand(1))->getZExtValue();
2189 Rep
= UpgradeX86PSLLDQIntrinsics(Builder
, CI
->getArgOperand(0), Shift
);
2190 } else if (IsX86
&& (Name
== "sse2.psrl.dq.bs" ||
2191 Name
== "avx2.psrl.dq.bs" ||
2192 Name
== "avx512.psrl.dq.512")) {
2193 // 128/256/512-bit shift right specified in bytes.
2194 unsigned Shift
= cast
<ConstantInt
>(CI
->getArgOperand(1))->getZExtValue();
2195 Rep
= UpgradeX86PSRLDQIntrinsics(Builder
, CI
->getArgOperand(0), Shift
);
2196 } else if (IsX86
&& (Name
== "sse41.pblendw" ||
2197 Name
.startswith("sse41.blendp") ||
2198 Name
.startswith("avx.blend.p") ||
2199 Name
== "avx2.pblendw" ||
2200 Name
.startswith("avx2.pblendd."))) {
2201 Value
*Op0
= CI
->getArgOperand(0);
2202 Value
*Op1
= CI
->getArgOperand(1);
2203 unsigned Imm
= cast
<ConstantInt
>(CI
->getArgOperand(2))->getZExtValue();
2204 VectorType
*VecTy
= cast
<VectorType
>(CI
->getType());
2205 unsigned NumElts
= VecTy
->getNumElements();
2207 SmallVector
<uint32_t, 16> Idxs(NumElts
);
2208 for (unsigned i
= 0; i
!= NumElts
; ++i
)
2209 Idxs
[i
] = ((Imm
>> (i
%8)) & 1) ? i
+ NumElts
: i
;
2211 Rep
= Builder
.CreateShuffleVector(Op0
, Op1
, Idxs
);
2212 } else if (IsX86
&& (Name
.startswith("avx.vinsertf128.") ||
2213 Name
== "avx2.vinserti128" ||
2214 Name
.startswith("avx512.mask.insert"))) {
2215 Value
*Op0
= CI
->getArgOperand(0);
2216 Value
*Op1
= CI
->getArgOperand(1);
2217 unsigned Imm
= cast
<ConstantInt
>(CI
->getArgOperand(2))->getZExtValue();
2218 unsigned DstNumElts
= CI
->getType()->getVectorNumElements();
2219 unsigned SrcNumElts
= Op1
->getType()->getVectorNumElements();
2220 unsigned Scale
= DstNumElts
/ SrcNumElts
;
2222 // Mask off the high bits of the immediate value; hardware ignores those.
2225 // Extend the second operand into a vector the size of the destination.
2226 Value
*UndefV
= UndefValue::get(Op1
->getType());
2227 SmallVector
<uint32_t, 8> Idxs(DstNumElts
);
2228 for (unsigned i
= 0; i
!= SrcNumElts
; ++i
)
2230 for (unsigned i
= SrcNumElts
; i
!= DstNumElts
; ++i
)
2231 Idxs
[i
] = SrcNumElts
;
2232 Rep
= Builder
.CreateShuffleVector(Op1
, UndefV
, Idxs
);
2234 // Insert the second operand into the first operand.
2236 // Note that there is no guarantee that instruction lowering will actually
2237 // produce a vinsertf128 instruction for the created shuffles. In
2238 // particular, the 0 immediate case involves no lane changes, so it can
2239 // be handled as a blend.
2241 // Example of shuffle mask for 32-bit elements:
2242 // Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
2243 // Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 >
2245 // First fill with identify mask.
2246 for (unsigned i
= 0; i
!= DstNumElts
; ++i
)
2248 // Then replace the elements where we need to insert.
2249 for (unsigned i
= 0; i
!= SrcNumElts
; ++i
)
2250 Idxs
[i
+ Imm
* SrcNumElts
] = i
+ DstNumElts
;
2251 Rep
= Builder
.CreateShuffleVector(Op0
, Rep
, Idxs
);
2253 // If the intrinsic has a mask operand, handle that.
2254 if (CI
->getNumArgOperands() == 5)
2255 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(4), Rep
,
2256 CI
->getArgOperand(3));
2257 } else if (IsX86
&& (Name
.startswith("avx.vextractf128.") ||
2258 Name
== "avx2.vextracti128" ||
2259 Name
.startswith("avx512.mask.vextract"))) {
2260 Value
*Op0
= CI
->getArgOperand(0);
2261 unsigned Imm
= cast
<ConstantInt
>(CI
->getArgOperand(1))->getZExtValue();
2262 unsigned DstNumElts
= CI
->getType()->getVectorNumElements();
2263 unsigned SrcNumElts
= Op0
->getType()->getVectorNumElements();
2264 unsigned Scale
= SrcNumElts
/ DstNumElts
;
2266 // Mask off the high bits of the immediate value; hardware ignores those.
2269 // Get indexes for the subvector of the input vector.
2270 SmallVector
<uint32_t, 8> Idxs(DstNumElts
);
2271 for (unsigned i
= 0; i
!= DstNumElts
; ++i
) {
2272 Idxs
[i
] = i
+ (Imm
* DstNumElts
);
2274 Rep
= Builder
.CreateShuffleVector(Op0
, Op0
, Idxs
);
2276 // If the intrinsic has a mask operand, handle that.
2277 if (CI
->getNumArgOperands() == 4)
2278 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
2279 CI
->getArgOperand(2));
2280 } else if (!IsX86
&& Name
== "stackprotectorcheck") {
2282 } else if (IsX86
&& (Name
.startswith("avx512.mask.perm.df.") ||
2283 Name
.startswith("avx512.mask.perm.di."))) {
2284 Value
*Op0
= CI
->getArgOperand(0);
2285 unsigned Imm
= cast
<ConstantInt
>(CI
->getArgOperand(1))->getZExtValue();
2286 VectorType
*VecTy
= cast
<VectorType
>(CI
->getType());
2287 unsigned NumElts
= VecTy
->getNumElements();
2289 SmallVector
<uint32_t, 8> Idxs(NumElts
);
2290 for (unsigned i
= 0; i
!= NumElts
; ++i
)
2291 Idxs
[i
] = (i
& ~0x3) + ((Imm
>> (2 * (i
& 0x3))) & 3);
2293 Rep
= Builder
.CreateShuffleVector(Op0
, Op0
, Idxs
);
2295 if (CI
->getNumArgOperands() == 4)
2296 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
2297 CI
->getArgOperand(2));
2298 } else if (IsX86
&& (Name
.startswith("avx.vperm2f128.") ||
2299 Name
== "avx2.vperm2i128")) {
2300 // The immediate permute control byte looks like this:
2301 // [1:0] - select 128 bits from sources for low half of destination
2303 // [3] - zero low half of destination
2304 // [5:4] - select 128 bits from sources for high half of destination
2306 // [7] - zero high half of destination
2308 uint8_t Imm
= cast
<ConstantInt
>(CI
->getArgOperand(2))->getZExtValue();
2310 unsigned NumElts
= CI
->getType()->getVectorNumElements();
2311 unsigned HalfSize
= NumElts
/ 2;
2312 SmallVector
<uint32_t, 8> ShuffleMask(NumElts
);
2314 // Determine which operand(s) are actually in use for this instruction.
2315 Value
*V0
= (Imm
& 0x02) ? CI
->getArgOperand(1) : CI
->getArgOperand(0);
2316 Value
*V1
= (Imm
& 0x20) ? CI
->getArgOperand(1) : CI
->getArgOperand(0);
2318 // If needed, replace operands based on zero mask.
2319 V0
= (Imm
& 0x08) ? ConstantAggregateZero::get(CI
->getType()) : V0
;
2320 V1
= (Imm
& 0x80) ? ConstantAggregateZero::get(CI
->getType()) : V1
;
2322 // Permute low half of result.
2323 unsigned StartIndex
= (Imm
& 0x01) ? HalfSize
: 0;
2324 for (unsigned i
= 0; i
< HalfSize
; ++i
)
2325 ShuffleMask
[i
] = StartIndex
+ i
;
2327 // Permute high half of result.
2328 StartIndex
= (Imm
& 0x10) ? HalfSize
: 0;
2329 for (unsigned i
= 0; i
< HalfSize
; ++i
)
2330 ShuffleMask
[i
+ HalfSize
] = NumElts
+ StartIndex
+ i
;
2332 Rep
= Builder
.CreateShuffleVector(V0
, V1
, ShuffleMask
);
2334 } else if (IsX86
&& (Name
.startswith("avx.vpermil.") ||
2335 Name
== "sse2.pshuf.d" ||
2336 Name
.startswith("avx512.mask.vpermil.p") ||
2337 Name
.startswith("avx512.mask.pshuf.d."))) {
2338 Value
*Op0
= CI
->getArgOperand(0);
2339 unsigned Imm
= cast
<ConstantInt
>(CI
->getArgOperand(1))->getZExtValue();
2340 VectorType
*VecTy
= cast
<VectorType
>(CI
->getType());
2341 unsigned NumElts
= VecTy
->getNumElements();
2342 // Calculate the size of each index in the immediate.
2343 unsigned IdxSize
= 64 / VecTy
->getScalarSizeInBits();
2344 unsigned IdxMask
= ((1 << IdxSize
) - 1);
2346 SmallVector
<uint32_t, 8> Idxs(NumElts
);
2347 // Lookup the bits for this element, wrapping around the immediate every
2348 // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need
2349 // to offset by the first index of each group.
2350 for (unsigned i
= 0; i
!= NumElts
; ++i
)
2351 Idxs
[i
] = ((Imm
>> ((i
* IdxSize
) % 8)) & IdxMask
) | (i
& ~IdxMask
);
2353 Rep
= Builder
.CreateShuffleVector(Op0
, Op0
, Idxs
);
2355 if (CI
->getNumArgOperands() == 4)
2356 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
2357 CI
->getArgOperand(2));
2358 } else if (IsX86
&& (Name
== "sse2.pshufl.w" ||
2359 Name
.startswith("avx512.mask.pshufl.w."))) {
2360 Value
*Op0
= CI
->getArgOperand(0);
2361 unsigned Imm
= cast
<ConstantInt
>(CI
->getArgOperand(1))->getZExtValue();
2362 unsigned NumElts
= CI
->getType()->getVectorNumElements();
2364 SmallVector
<uint32_t, 16> Idxs(NumElts
);
2365 for (unsigned l
= 0; l
!= NumElts
; l
+= 8) {
2366 for (unsigned i
= 0; i
!= 4; ++i
)
2367 Idxs
[i
+ l
] = ((Imm
>> (2 * i
)) & 0x3) + l
;
2368 for (unsigned i
= 4; i
!= 8; ++i
)
2369 Idxs
[i
+ l
] = i
+ l
;
2372 Rep
= Builder
.CreateShuffleVector(Op0
, Op0
, Idxs
);
2374 if (CI
->getNumArgOperands() == 4)
2375 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
2376 CI
->getArgOperand(2));
2377 } else if (IsX86
&& (Name
== "sse2.pshufh.w" ||
2378 Name
.startswith("avx512.mask.pshufh.w."))) {
2379 Value
*Op0
= CI
->getArgOperand(0);
2380 unsigned Imm
= cast
<ConstantInt
>(CI
->getArgOperand(1))->getZExtValue();
2381 unsigned NumElts
= CI
->getType()->getVectorNumElements();
2383 SmallVector
<uint32_t, 16> Idxs(NumElts
);
2384 for (unsigned l
= 0; l
!= NumElts
; l
+= 8) {
2385 for (unsigned i
= 0; i
!= 4; ++i
)
2386 Idxs
[i
+ l
] = i
+ l
;
2387 for (unsigned i
= 0; i
!= 4; ++i
)
2388 Idxs
[i
+ l
+ 4] = ((Imm
>> (2 * i
)) & 0x3) + 4 + l
;
2391 Rep
= Builder
.CreateShuffleVector(Op0
, Op0
, Idxs
);
2393 if (CI
->getNumArgOperands() == 4)
2394 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
2395 CI
->getArgOperand(2));
2396 } else if (IsX86
&& Name
.startswith("avx512.mask.shuf.p")) {
2397 Value
*Op0
= CI
->getArgOperand(0);
2398 Value
*Op1
= CI
->getArgOperand(1);
2399 unsigned Imm
= cast
<ConstantInt
>(CI
->getArgOperand(2))->getZExtValue();
2400 unsigned NumElts
= CI
->getType()->getVectorNumElements();
2402 unsigned NumLaneElts
= 128/CI
->getType()->getScalarSizeInBits();
2403 unsigned HalfLaneElts
= NumLaneElts
/ 2;
2405 SmallVector
<uint32_t, 16> Idxs(NumElts
);
2406 for (unsigned i
= 0; i
!= NumElts
; ++i
) {
2407 // Base index is the starting element of the lane.
2408 Idxs
[i
] = i
- (i
% NumLaneElts
);
2409 // If we are half way through the lane switch to the other source.
2410 if ((i
% NumLaneElts
) >= HalfLaneElts
)
2412 // Now select the specific element. By adding HalfLaneElts bits from
2413 // the immediate. Wrapping around the immediate every 8-bits.
2414 Idxs
[i
] += (Imm
>> ((i
* HalfLaneElts
) % 8)) & ((1 << HalfLaneElts
) - 1);
2417 Rep
= Builder
.CreateShuffleVector(Op0
, Op1
, Idxs
);
2419 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(4), Rep
,
2420 CI
->getArgOperand(3));
2421 } else if (IsX86
&& (Name
.startswith("avx512.mask.movddup") ||
2422 Name
.startswith("avx512.mask.movshdup") ||
2423 Name
.startswith("avx512.mask.movsldup"))) {
2424 Value
*Op0
= CI
->getArgOperand(0);
2425 unsigned NumElts
= CI
->getType()->getVectorNumElements();
2426 unsigned NumLaneElts
= 128/CI
->getType()->getScalarSizeInBits();
2428 unsigned Offset
= 0;
2429 if (Name
.startswith("avx512.mask.movshdup."))
2432 SmallVector
<uint32_t, 16> Idxs(NumElts
);
2433 for (unsigned l
= 0; l
!= NumElts
; l
+= NumLaneElts
)
2434 for (unsigned i
= 0; i
!= NumLaneElts
; i
+= 2) {
2435 Idxs
[i
+ l
+ 0] = i
+ l
+ Offset
;
2436 Idxs
[i
+ l
+ 1] = i
+ l
+ Offset
;
2439 Rep
= Builder
.CreateShuffleVector(Op0
, Op0
, Idxs
);
2441 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(2), Rep
,
2442 CI
->getArgOperand(1));
2443 } else if (IsX86
&& (Name
.startswith("avx512.mask.punpckl") ||
2444 Name
.startswith("avx512.mask.unpckl."))) {
2445 Value
*Op0
= CI
->getArgOperand(0);
2446 Value
*Op1
= CI
->getArgOperand(1);
2447 int NumElts
= CI
->getType()->getVectorNumElements();
2448 int NumLaneElts
= 128/CI
->getType()->getScalarSizeInBits();
2450 SmallVector
<uint32_t, 64> Idxs(NumElts
);
2451 for (int l
= 0; l
!= NumElts
; l
+= NumLaneElts
)
2452 for (int i
= 0; i
!= NumLaneElts
; ++i
)
2453 Idxs
[i
+ l
] = l
+ (i
/ 2) + NumElts
* (i
% 2);
2455 Rep
= Builder
.CreateShuffleVector(Op0
, Op1
, Idxs
);
2457 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
2458 CI
->getArgOperand(2));
2459 } else if (IsX86
&& (Name
.startswith("avx512.mask.punpckh") ||
2460 Name
.startswith("avx512.mask.unpckh."))) {
2461 Value
*Op0
= CI
->getArgOperand(0);
2462 Value
*Op1
= CI
->getArgOperand(1);
2463 int NumElts
= CI
->getType()->getVectorNumElements();
2464 int NumLaneElts
= 128/CI
->getType()->getScalarSizeInBits();
2466 SmallVector
<uint32_t, 64> Idxs(NumElts
);
2467 for (int l
= 0; l
!= NumElts
; l
+= NumLaneElts
)
2468 for (int i
= 0; i
!= NumLaneElts
; ++i
)
2469 Idxs
[i
+ l
] = (NumLaneElts
/ 2) + l
+ (i
/ 2) + NumElts
* (i
% 2);
2471 Rep
= Builder
.CreateShuffleVector(Op0
, Op1
, Idxs
);
2473 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
2474 CI
->getArgOperand(2));
2475 } else if (IsX86
&& Name
.startswith("avx512.mask.pand.")) {
2476 Rep
= Builder
.CreateAnd(CI
->getArgOperand(0), CI
->getArgOperand(1));
2477 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
2478 CI
->getArgOperand(2));
2479 } else if (IsX86
&& Name
.startswith("avx512.mask.pandn.")) {
2480 Rep
= Builder
.CreateAnd(Builder
.CreateNot(CI
->getArgOperand(0)),
2481 CI
->getArgOperand(1));
2482 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
2483 CI
->getArgOperand(2));
2484 } else if (IsX86
&& Name
.startswith("avx512.mask.por.")) {
2485 Rep
= Builder
.CreateOr(CI
->getArgOperand(0), CI
->getArgOperand(1));
2486 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
2487 CI
->getArgOperand(2));
2488 } else if (IsX86
&& Name
.startswith("avx512.mask.pxor.")) {
2489 Rep
= Builder
.CreateXor(CI
->getArgOperand(0), CI
->getArgOperand(1));
2490 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
2491 CI
->getArgOperand(2));
2492 } else if (IsX86
&& Name
.startswith("avx512.mask.and.")) {
2493 VectorType
*FTy
= cast
<VectorType
>(CI
->getType());
2494 VectorType
*ITy
= VectorType::getInteger(FTy
);
2495 Rep
= Builder
.CreateAnd(Builder
.CreateBitCast(CI
->getArgOperand(0), ITy
),
2496 Builder
.CreateBitCast(CI
->getArgOperand(1), ITy
));
2497 Rep
= Builder
.CreateBitCast(Rep
, FTy
);
2498 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
2499 CI
->getArgOperand(2));
2500 } else if (IsX86
&& Name
.startswith("avx512.mask.andn.")) {
2501 VectorType
*FTy
= cast
<VectorType
>(CI
->getType());
2502 VectorType
*ITy
= VectorType::getInteger(FTy
);
2503 Rep
= Builder
.CreateNot(Builder
.CreateBitCast(CI
->getArgOperand(0), ITy
));
2504 Rep
= Builder
.CreateAnd(Rep
,
2505 Builder
.CreateBitCast(CI
->getArgOperand(1), ITy
));
2506 Rep
= Builder
.CreateBitCast(Rep
, FTy
);
2507 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
2508 CI
->getArgOperand(2));
2509 } else if (IsX86
&& Name
.startswith("avx512.mask.or.")) {
2510 VectorType
*FTy
= cast
<VectorType
>(CI
->getType());
2511 VectorType
*ITy
= VectorType::getInteger(FTy
);
2512 Rep
= Builder
.CreateOr(Builder
.CreateBitCast(CI
->getArgOperand(0), ITy
),
2513 Builder
.CreateBitCast(CI
->getArgOperand(1), ITy
));
2514 Rep
= Builder
.CreateBitCast(Rep
, FTy
);
2515 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
2516 CI
->getArgOperand(2));
2517 } else if (IsX86
&& Name
.startswith("avx512.mask.xor.")) {
2518 VectorType
*FTy
= cast
<VectorType
>(CI
->getType());
2519 VectorType
*ITy
= VectorType::getInteger(FTy
);
2520 Rep
= Builder
.CreateXor(Builder
.CreateBitCast(CI
->getArgOperand(0), ITy
),
2521 Builder
.CreateBitCast(CI
->getArgOperand(1), ITy
));
2522 Rep
= Builder
.CreateBitCast(Rep
, FTy
);
2523 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
2524 CI
->getArgOperand(2));
2525 } else if (IsX86
&& Name
.startswith("avx512.mask.padd.")) {
2526 Rep
= Builder
.CreateAdd(CI
->getArgOperand(0), CI
->getArgOperand(1));
2527 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
2528 CI
->getArgOperand(2));
2529 } else if (IsX86
&& Name
.startswith("avx512.mask.psub.")) {
2530 Rep
= Builder
.CreateSub(CI
->getArgOperand(0), CI
->getArgOperand(1));
2531 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
2532 CI
->getArgOperand(2));
2533 } else if (IsX86
&& Name
.startswith("avx512.mask.pmull.")) {
2534 Rep
= Builder
.CreateMul(CI
->getArgOperand(0), CI
->getArgOperand(1));
2535 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
2536 CI
->getArgOperand(2));
2537 } else if (IsX86
&& Name
.startswith("avx512.mask.add.p")) {
2538 if (Name
.endswith(".512")) {
2540 if (Name
[17] == 's')
2541 IID
= Intrinsic::x86_avx512_add_ps_512
;
2543 IID
= Intrinsic::x86_avx512_add_pd_512
;
2545 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(F
->getParent(), IID
),
2546 { CI
->getArgOperand(0), CI
->getArgOperand(1),
2547 CI
->getArgOperand(4) });
2549 Rep
= Builder
.CreateFAdd(CI
->getArgOperand(0), CI
->getArgOperand(1));
2551 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
2552 CI
->getArgOperand(2));
2553 } else if (IsX86
&& Name
.startswith("avx512.mask.div.p")) {
2554 if (Name
.endswith(".512")) {
2556 if (Name
[17] == 's')
2557 IID
= Intrinsic::x86_avx512_div_ps_512
;
2559 IID
= Intrinsic::x86_avx512_div_pd_512
;
2561 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(F
->getParent(), IID
),
2562 { CI
->getArgOperand(0), CI
->getArgOperand(1),
2563 CI
->getArgOperand(4) });
2565 Rep
= Builder
.CreateFDiv(CI
->getArgOperand(0), CI
->getArgOperand(1));
2567 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
2568 CI
->getArgOperand(2));
2569 } else if (IsX86
&& Name
.startswith("avx512.mask.mul.p")) {
2570 if (Name
.endswith(".512")) {
2572 if (Name
[17] == 's')
2573 IID
= Intrinsic::x86_avx512_mul_ps_512
;
2575 IID
= Intrinsic::x86_avx512_mul_pd_512
;
2577 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(F
->getParent(), IID
),
2578 { CI
->getArgOperand(0), CI
->getArgOperand(1),
2579 CI
->getArgOperand(4) });
2581 Rep
= Builder
.CreateFMul(CI
->getArgOperand(0), CI
->getArgOperand(1));
2583 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
2584 CI
->getArgOperand(2));
2585 } else if (IsX86
&& Name
.startswith("avx512.mask.sub.p")) {
2586 if (Name
.endswith(".512")) {
2588 if (Name
[17] == 's')
2589 IID
= Intrinsic::x86_avx512_sub_ps_512
;
2591 IID
= Intrinsic::x86_avx512_sub_pd_512
;
2593 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(F
->getParent(), IID
),
2594 { CI
->getArgOperand(0), CI
->getArgOperand(1),
2595 CI
->getArgOperand(4) });
2597 Rep
= Builder
.CreateFSub(CI
->getArgOperand(0), CI
->getArgOperand(1));
2599 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
2600 CI
->getArgOperand(2));
2601 } else if (IsX86
&& Name
.startswith("avx512.mask.max.p") &&
2602 Name
.drop_front(18) == ".512") {
2604 if (Name
[17] == 's')
2605 IID
= Intrinsic::x86_avx512_max_ps_512
;
2607 IID
= Intrinsic::x86_avx512_max_pd_512
;
2609 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(F
->getParent(), IID
),
2610 { CI
->getArgOperand(0), CI
->getArgOperand(1),
2611 CI
->getArgOperand(4) });
2612 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
2613 CI
->getArgOperand(2));
2614 } else if (IsX86
&& Name
.startswith("avx512.mask.min.p") &&
2615 Name
.drop_front(18) == ".512") {
2617 if (Name
[17] == 's')
2618 IID
= Intrinsic::x86_avx512_min_ps_512
;
2620 IID
= Intrinsic::x86_avx512_min_pd_512
;
2622 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(F
->getParent(), IID
),
2623 { CI
->getArgOperand(0), CI
->getArgOperand(1),
2624 CI
->getArgOperand(4) });
2625 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
2626 CI
->getArgOperand(2));
2627 } else if (IsX86
&& Name
.startswith("avx512.mask.lzcnt.")) {
2628 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(F
->getParent(),
2631 { CI
->getArgOperand(0), Builder
.getInt1(false) });
2632 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(2), Rep
,
2633 CI
->getArgOperand(1));
2634 } else if (IsX86
&& Name
.startswith("avx512.mask.psll")) {
2635 bool IsImmediate
= Name
[16] == 'i' ||
2636 (Name
.size() > 18 && Name
[18] == 'i');
2637 bool IsVariable
= Name
[16] == 'v';
2638 char Size
= Name
[16] == '.' ? Name
[17] :
2639 Name
[17] == '.' ? Name
[18] :
2640 Name
[18] == '.' ? Name
[19] :
2644 if (IsVariable
&& Name
[17] != '.') {
2645 if (Size
== 'd' && Name
[17] == '2') // avx512.mask.psllv2.di
2646 IID
= Intrinsic::x86_avx2_psllv_q
;
2647 else if (Size
== 'd' && Name
[17] == '4') // avx512.mask.psllv4.di
2648 IID
= Intrinsic::x86_avx2_psllv_q_256
;
2649 else if (Size
== 's' && Name
[17] == '4') // avx512.mask.psllv4.si
2650 IID
= Intrinsic::x86_avx2_psllv_d
;
2651 else if (Size
== 's' && Name
[17] == '8') // avx512.mask.psllv8.si
2652 IID
= Intrinsic::x86_avx2_psllv_d_256
;
2653 else if (Size
== 'h' && Name
[17] == '8') // avx512.mask.psllv8.hi
2654 IID
= Intrinsic::x86_avx512_psllv_w_128
;
2655 else if (Size
== 'h' && Name
[17] == '1') // avx512.mask.psllv16.hi
2656 IID
= Intrinsic::x86_avx512_psllv_w_256
;
2657 else if (Name
[17] == '3' && Name
[18] == '2') // avx512.mask.psllv32hi
2658 IID
= Intrinsic::x86_avx512_psllv_w_512
;
2660 llvm_unreachable("Unexpected size");
2661 } else if (Name
.endswith(".128")) {
2662 if (Size
== 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128
2663 IID
= IsImmediate
? Intrinsic::x86_sse2_pslli_d
2664 : Intrinsic::x86_sse2_psll_d
;
2665 else if (Size
== 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.128
2666 IID
= IsImmediate
? Intrinsic::x86_sse2_pslli_q
2667 : Intrinsic::x86_sse2_psll_q
;
2668 else if (Size
== 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.128
2669 IID
= IsImmediate
? Intrinsic::x86_sse2_pslli_w
2670 : Intrinsic::x86_sse2_psll_w
;
2672 llvm_unreachable("Unexpected size");
2673 } else if (Name
.endswith(".256")) {
2674 if (Size
== 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256
2675 IID
= IsImmediate
? Intrinsic::x86_avx2_pslli_d
2676 : Intrinsic::x86_avx2_psll_d
;
2677 else if (Size
== 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.256
2678 IID
= IsImmediate
? Intrinsic::x86_avx2_pslli_q
2679 : Intrinsic::x86_avx2_psll_q
;
2680 else if (Size
== 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.256
2681 IID
= IsImmediate
? Intrinsic::x86_avx2_pslli_w
2682 : Intrinsic::x86_avx2_psll_w
;
2684 llvm_unreachable("Unexpected size");
2686 if (Size
== 'd') // psll.di.512, pslli.d, psll.d, psllv.d.512
2687 IID
= IsImmediate
? Intrinsic::x86_avx512_pslli_d_512
:
2688 IsVariable
? Intrinsic::x86_avx512_psllv_d_512
:
2689 Intrinsic::x86_avx512_psll_d_512
;
2690 else if (Size
== 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.512
2691 IID
= IsImmediate
? Intrinsic::x86_avx512_pslli_q_512
:
2692 IsVariable
? Intrinsic::x86_avx512_psllv_q_512
:
2693 Intrinsic::x86_avx512_psll_q_512
;
2694 else if (Size
== 'w') // psll.wi.512, pslli.w, psll.w
2695 IID
= IsImmediate
? Intrinsic::x86_avx512_pslli_w_512
2696 : Intrinsic::x86_avx512_psll_w_512
;
2698 llvm_unreachable("Unexpected size");
2701 Rep
= UpgradeX86MaskedShift(Builder
, *CI
, IID
);
2702 } else if (IsX86
&& Name
.startswith("avx512.mask.psrl")) {
2703 bool IsImmediate
= Name
[16] == 'i' ||
2704 (Name
.size() > 18 && Name
[18] == 'i');
2705 bool IsVariable
= Name
[16] == 'v';
2706 char Size
= Name
[16] == '.' ? Name
[17] :
2707 Name
[17] == '.' ? Name
[18] :
2708 Name
[18] == '.' ? Name
[19] :
2712 if (IsVariable
&& Name
[17] != '.') {
2713 if (Size
== 'd' && Name
[17] == '2') // avx512.mask.psrlv2.di
2714 IID
= Intrinsic::x86_avx2_psrlv_q
;
2715 else if (Size
== 'd' && Name
[17] == '4') // avx512.mask.psrlv4.di
2716 IID
= Intrinsic::x86_avx2_psrlv_q_256
;
2717 else if (Size
== 's' && Name
[17] == '4') // avx512.mask.psrlv4.si
2718 IID
= Intrinsic::x86_avx2_psrlv_d
;
2719 else if (Size
== 's' && Name
[17] == '8') // avx512.mask.psrlv8.si
2720 IID
= Intrinsic::x86_avx2_psrlv_d_256
;
2721 else if (Size
== 'h' && Name
[17] == '8') // avx512.mask.psrlv8.hi
2722 IID
= Intrinsic::x86_avx512_psrlv_w_128
;
2723 else if (Size
== 'h' && Name
[17] == '1') // avx512.mask.psrlv16.hi
2724 IID
= Intrinsic::x86_avx512_psrlv_w_256
;
2725 else if (Name
[17] == '3' && Name
[18] == '2') // avx512.mask.psrlv32hi
2726 IID
= Intrinsic::x86_avx512_psrlv_w_512
;
2728 llvm_unreachable("Unexpected size");
2729 } else if (Name
.endswith(".128")) {
2730 if (Size
== 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128
2731 IID
= IsImmediate
? Intrinsic::x86_sse2_psrli_d
2732 : Intrinsic::x86_sse2_psrl_d
;
2733 else if (Size
== 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.128
2734 IID
= IsImmediate
? Intrinsic::x86_sse2_psrli_q
2735 : Intrinsic::x86_sse2_psrl_q
;
2736 else if (Size
== 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.128
2737 IID
= IsImmediate
? Intrinsic::x86_sse2_psrli_w
2738 : Intrinsic::x86_sse2_psrl_w
;
2740 llvm_unreachable("Unexpected size");
2741 } else if (Name
.endswith(".256")) {
2742 if (Size
== 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256
2743 IID
= IsImmediate
? Intrinsic::x86_avx2_psrli_d
2744 : Intrinsic::x86_avx2_psrl_d
;
2745 else if (Size
== 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.256
2746 IID
= IsImmediate
? Intrinsic::x86_avx2_psrli_q
2747 : Intrinsic::x86_avx2_psrl_q
;
2748 else if (Size
== 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.256
2749 IID
= IsImmediate
? Intrinsic::x86_avx2_psrli_w
2750 : Intrinsic::x86_avx2_psrl_w
;
2752 llvm_unreachable("Unexpected size");
2754 if (Size
== 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.512
2755 IID
= IsImmediate
? Intrinsic::x86_avx512_psrli_d_512
:
2756 IsVariable
? Intrinsic::x86_avx512_psrlv_d_512
:
2757 Intrinsic::x86_avx512_psrl_d_512
;
2758 else if (Size
== 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.512
2759 IID
= IsImmediate
? Intrinsic::x86_avx512_psrli_q_512
:
2760 IsVariable
? Intrinsic::x86_avx512_psrlv_q_512
:
2761 Intrinsic::x86_avx512_psrl_q_512
;
2762 else if (Size
== 'w') // psrl.wi.512, psrli.w, psrl.w)
2763 IID
= IsImmediate
? Intrinsic::x86_avx512_psrli_w_512
2764 : Intrinsic::x86_avx512_psrl_w_512
;
2766 llvm_unreachable("Unexpected size");
2769 Rep
= UpgradeX86MaskedShift(Builder
, *CI
, IID
);
2770 } else if (IsX86
&& Name
.startswith("avx512.mask.psra")) {
2771 bool IsImmediate
= Name
[16] == 'i' ||
2772 (Name
.size() > 18 && Name
[18] == 'i');
2773 bool IsVariable
= Name
[16] == 'v';
2774 char Size
= Name
[16] == '.' ? Name
[17] :
2775 Name
[17] == '.' ? Name
[18] :
2776 Name
[18] == '.' ? Name
[19] :
2780 if (IsVariable
&& Name
[17] != '.') {
2781 if (Size
== 's' && Name
[17] == '4') // avx512.mask.psrav4.si
2782 IID
= Intrinsic::x86_avx2_psrav_d
;
2783 else if (Size
== 's' && Name
[17] == '8') // avx512.mask.psrav8.si
2784 IID
= Intrinsic::x86_avx2_psrav_d_256
;
2785 else if (Size
== 'h' && Name
[17] == '8') // avx512.mask.psrav8.hi
2786 IID
= Intrinsic::x86_avx512_psrav_w_128
;
2787 else if (Size
== 'h' && Name
[17] == '1') // avx512.mask.psrav16.hi
2788 IID
= Intrinsic::x86_avx512_psrav_w_256
;
2789 else if (Name
[17] == '3' && Name
[18] == '2') // avx512.mask.psrav32hi
2790 IID
= Intrinsic::x86_avx512_psrav_w_512
;
2792 llvm_unreachable("Unexpected size");
2793 } else if (Name
.endswith(".128")) {
2794 if (Size
== 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128
2795 IID
= IsImmediate
? Intrinsic::x86_sse2_psrai_d
2796 : Intrinsic::x86_sse2_psra_d
;
2797 else if (Size
== 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.128
2798 IID
= IsImmediate
? Intrinsic::x86_avx512_psrai_q_128
:
2799 IsVariable
? Intrinsic::x86_avx512_psrav_q_128
:
2800 Intrinsic::x86_avx512_psra_q_128
;
2801 else if (Size
== 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.128
2802 IID
= IsImmediate
? Intrinsic::x86_sse2_psrai_w
2803 : Intrinsic::x86_sse2_psra_w
;
2805 llvm_unreachable("Unexpected size");
2806 } else if (Name
.endswith(".256")) {
2807 if (Size
== 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256
2808 IID
= IsImmediate
? Intrinsic::x86_avx2_psrai_d
2809 : Intrinsic::x86_avx2_psra_d
;
2810 else if (Size
== 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.256
2811 IID
= IsImmediate
? Intrinsic::x86_avx512_psrai_q_256
:
2812 IsVariable
? Intrinsic::x86_avx512_psrav_q_256
:
2813 Intrinsic::x86_avx512_psra_q_256
;
2814 else if (Size
== 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.256
2815 IID
= IsImmediate
? Intrinsic::x86_avx2_psrai_w
2816 : Intrinsic::x86_avx2_psra_w
;
2818 llvm_unreachable("Unexpected size");
2820 if (Size
== 'd') // psra.di.512, psrai.d, psra.d, psrav.d.512
2821 IID
= IsImmediate
? Intrinsic::x86_avx512_psrai_d_512
:
2822 IsVariable
? Intrinsic::x86_avx512_psrav_d_512
:
2823 Intrinsic::x86_avx512_psra_d_512
;
2824 else if (Size
== 'q') // psra.qi.512, psrai.q, psra.q
2825 IID
= IsImmediate
? Intrinsic::x86_avx512_psrai_q_512
:
2826 IsVariable
? Intrinsic::x86_avx512_psrav_q_512
:
2827 Intrinsic::x86_avx512_psra_q_512
;
2828 else if (Size
== 'w') // psra.wi.512, psrai.w, psra.w
2829 IID
= IsImmediate
? Intrinsic::x86_avx512_psrai_w_512
2830 : Intrinsic::x86_avx512_psra_w_512
;
2832 llvm_unreachable("Unexpected size");
2835 Rep
= UpgradeX86MaskedShift(Builder
, *CI
, IID
);
2836 } else if (IsX86
&& Name
.startswith("avx512.mask.move.s")) {
2837 Rep
= upgradeMaskedMove(Builder
, *CI
);
2838 } else if (IsX86
&& Name
.startswith("avx512.cvtmask2")) {
2839 Rep
= UpgradeMaskToInt(Builder
, *CI
);
2840 } else if (IsX86
&& Name
.endswith(".movntdqa")) {
2841 Module
*M
= F
->getParent();
2842 MDNode
*Node
= MDNode::get(
2843 C
, ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C
), 1)));
2845 Value
*Ptr
= CI
->getArgOperand(0);
2846 VectorType
*VTy
= cast
<VectorType
>(CI
->getType());
2848 // Convert the type of the pointer to a pointer to the stored type.
2850 Builder
.CreateBitCast(Ptr
, PointerType::getUnqual(VTy
), "cast");
2851 LoadInst
*LI
= Builder
.CreateAlignedLoad(BC
, VTy
->getBitWidth() / 8);
2852 LI
->setMetadata(M
->getMDKindID("nontemporal"), Node
);
2855 (Name
.startswith("sse2.pavg") || Name
.startswith("avx2.pavg") ||
2856 Name
.startswith("avx512.mask.pavg"))) {
2857 // llvm.x86.sse2.pavg.b/w, llvm.x86.avx2.pavg.b/w,
2858 // llvm.x86.avx512.mask.pavg.b/w
2859 Value
*A
= CI
->getArgOperand(0);
2860 Value
*B
= CI
->getArgOperand(1);
2861 VectorType
*ZextType
= VectorType::getExtendedElementVectorType(
2862 cast
<VectorType
>(A
->getType()));
2863 Value
*ExtendedA
= Builder
.CreateZExt(A
, ZextType
);
2864 Value
*ExtendedB
= Builder
.CreateZExt(B
, ZextType
);
2865 Value
*Sum
= Builder
.CreateAdd(ExtendedA
, ExtendedB
);
2866 Value
*AddOne
= Builder
.CreateAdd(Sum
, ConstantInt::get(ZextType
, 1));
2867 Value
*ShiftR
= Builder
.CreateLShr(AddOne
, ConstantInt::get(ZextType
, 1));
2868 Rep
= Builder
.CreateTrunc(ShiftR
, A
->getType());
2869 if (CI
->getNumArgOperands() > 2) {
2870 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
2871 CI
->getArgOperand(2));
2873 } else if (IsX86
&& (Name
.startswith("fma.vfmadd.") ||
2874 Name
.startswith("fma.vfmsub.") ||
2875 Name
.startswith("fma.vfnmadd.") ||
2876 Name
.startswith("fma.vfnmsub."))) {
2877 bool NegMul
= Name
[6] == 'n';
2878 bool NegAcc
= NegMul
? Name
[8] == 's' : Name
[7] == 's';
2879 bool IsScalar
= NegMul
? Name
[12] == 's' : Name
[11] == 's';
2881 Value
*Ops
[] = { CI
->getArgOperand(0), CI
->getArgOperand(1),
2882 CI
->getArgOperand(2) };
2885 Ops
[0] = Builder
.CreateExtractElement(Ops
[0], (uint64_t)0);
2886 Ops
[1] = Builder
.CreateExtractElement(Ops
[1], (uint64_t)0);
2887 Ops
[2] = Builder
.CreateExtractElement(Ops
[2], (uint64_t)0);
2890 if (NegMul
&& !IsScalar
)
2891 Ops
[0] = Builder
.CreateFNeg(Ops
[0]);
2892 if (NegMul
&& IsScalar
)
2893 Ops
[1] = Builder
.CreateFNeg(Ops
[1]);
2895 Ops
[2] = Builder
.CreateFNeg(Ops
[2]);
2897 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(CI
->getModule(),
2903 Rep
= Builder
.CreateInsertElement(CI
->getArgOperand(0), Rep
,
2905 } else if (IsX86
&& Name
.startswith("fma4.vfmadd.s")) {
2906 Value
*Ops
[] = { CI
->getArgOperand(0), CI
->getArgOperand(1),
2907 CI
->getArgOperand(2) };
2909 Ops
[0] = Builder
.CreateExtractElement(Ops
[0], (uint64_t)0);
2910 Ops
[1] = Builder
.CreateExtractElement(Ops
[1], (uint64_t)0);
2911 Ops
[2] = Builder
.CreateExtractElement(Ops
[2], (uint64_t)0);
2913 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(CI
->getModule(),
2918 Rep
= Builder
.CreateInsertElement(Constant::getNullValue(CI
->getType()),
2920 } else if (IsX86
&& (Name
.startswith("avx512.mask.vfmadd.s") ||
2921 Name
.startswith("avx512.maskz.vfmadd.s") ||
2922 Name
.startswith("avx512.mask3.vfmadd.s") ||
2923 Name
.startswith("avx512.mask3.vfmsub.s") ||
2924 Name
.startswith("avx512.mask3.vfnmsub.s"))) {
2925 bool IsMask3
= Name
[11] == '3';
2926 bool IsMaskZ
= Name
[11] == 'z';
2927 // Drop the "avx512.mask." to make it easier.
2928 Name
= Name
.drop_front(IsMask3
|| IsMaskZ
? 13 : 12);
2929 bool NegMul
= Name
[2] == 'n';
2930 bool NegAcc
= NegMul
? Name
[4] == 's' : Name
[3] == 's';
2932 Value
*A
= CI
->getArgOperand(0);
2933 Value
*B
= CI
->getArgOperand(1);
2934 Value
*C
= CI
->getArgOperand(2);
2936 if (NegMul
&& (IsMask3
|| IsMaskZ
))
2937 A
= Builder
.CreateFNeg(A
);
2938 if (NegMul
&& !(IsMask3
|| IsMaskZ
))
2939 B
= Builder
.CreateFNeg(B
);
2941 C
= Builder
.CreateFNeg(C
);
2943 A
= Builder
.CreateExtractElement(A
, (uint64_t)0);
2944 B
= Builder
.CreateExtractElement(B
, (uint64_t)0);
2945 C
= Builder
.CreateExtractElement(C
, (uint64_t)0);
2947 if (!isa
<ConstantInt
>(CI
->getArgOperand(4)) ||
2948 cast
<ConstantInt
>(CI
->getArgOperand(4))->getZExtValue() != 4) {
2949 Value
*Ops
[] = { A
, B
, C
, CI
->getArgOperand(4) };
2952 if (Name
.back() == 'd')
2953 IID
= Intrinsic::x86_avx512_vfmadd_f64
;
2955 IID
= Intrinsic::x86_avx512_vfmadd_f32
;
2956 Function
*FMA
= Intrinsic::getDeclaration(CI
->getModule(), IID
);
2957 Rep
= Builder
.CreateCall(FMA
, Ops
);
2959 Function
*FMA
= Intrinsic::getDeclaration(CI
->getModule(),
2962 Rep
= Builder
.CreateCall(FMA
, { A
, B
, C
});
2965 Value
*PassThru
= IsMaskZ
? Constant::getNullValue(Rep
->getType()) :
2968 // For Mask3 with NegAcc, we need to create a new extractelement that
2969 // avoids the negation above.
2970 if (NegAcc
&& IsMask3
)
2971 PassThru
= Builder
.CreateExtractElement(CI
->getArgOperand(2),
2974 Rep
= EmitX86ScalarSelect(Builder
, CI
->getArgOperand(3),
2976 Rep
= Builder
.CreateInsertElement(CI
->getArgOperand(IsMask3
? 2 : 0),
2978 } else if (IsX86
&& (Name
.startswith("avx512.mask.vfmadd.p") ||
2979 Name
.startswith("avx512.mask.vfnmadd.p") ||
2980 Name
.startswith("avx512.mask.vfnmsub.p") ||
2981 Name
.startswith("avx512.mask3.vfmadd.p") ||
2982 Name
.startswith("avx512.mask3.vfmsub.p") ||
2983 Name
.startswith("avx512.mask3.vfnmsub.p") ||
2984 Name
.startswith("avx512.maskz.vfmadd.p"))) {
2985 bool IsMask3
= Name
[11] == '3';
2986 bool IsMaskZ
= Name
[11] == 'z';
2987 // Drop the "avx512.mask." to make it easier.
2988 Name
= Name
.drop_front(IsMask3
|| IsMaskZ
? 13 : 12);
2989 bool NegMul
= Name
[2] == 'n';
2990 bool NegAcc
= NegMul
? Name
[4] == 's' : Name
[3] == 's';
2992 Value
*A
= CI
->getArgOperand(0);
2993 Value
*B
= CI
->getArgOperand(1);
2994 Value
*C
= CI
->getArgOperand(2);
2996 if (NegMul
&& (IsMask3
|| IsMaskZ
))
2997 A
= Builder
.CreateFNeg(A
);
2998 if (NegMul
&& !(IsMask3
|| IsMaskZ
))
2999 B
= Builder
.CreateFNeg(B
);
3001 C
= Builder
.CreateFNeg(C
);
3003 if (CI
->getNumArgOperands() == 5 &&
3004 (!isa
<ConstantInt
>(CI
->getArgOperand(4)) ||
3005 cast
<ConstantInt
>(CI
->getArgOperand(4))->getZExtValue() != 4)) {
3007 // Check the character before ".512" in string.
3008 if (Name
[Name
.size()-5] == 's')
3009 IID
= Intrinsic::x86_avx512_vfmadd_ps_512
;
3011 IID
= Intrinsic::x86_avx512_vfmadd_pd_512
;
3013 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(F
->getParent(), IID
),
3014 { A
, B
, C
, CI
->getArgOperand(4) });
3016 Function
*FMA
= Intrinsic::getDeclaration(CI
->getModule(),
3019 Rep
= Builder
.CreateCall(FMA
, { A
, B
, C
});
3022 Value
*PassThru
= IsMaskZ
? llvm::Constant::getNullValue(CI
->getType()) :
3023 IsMask3
? CI
->getArgOperand(2) :
3024 CI
->getArgOperand(0);
3026 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
, PassThru
);
3027 } else if (IsX86
&& (Name
.startswith("fma.vfmaddsub.p") ||
3028 Name
.startswith("fma.vfmsubadd.p"))) {
3029 bool IsSubAdd
= Name
[7] == 's';
3030 int NumElts
= CI
->getType()->getVectorNumElements();
3032 Value
*Ops
[] = { CI
->getArgOperand(0), CI
->getArgOperand(1),
3033 CI
->getArgOperand(2) };
3035 Function
*FMA
= Intrinsic::getDeclaration(CI
->getModule(), Intrinsic::fma
,
3037 Value
*Odd
= Builder
.CreateCall(FMA
, Ops
);
3038 Ops
[2] = Builder
.CreateFNeg(Ops
[2]);
3039 Value
*Even
= Builder
.CreateCall(FMA
, Ops
);
3042 std::swap(Even
, Odd
);
3044 SmallVector
<uint32_t, 32> Idxs(NumElts
);
3045 for (int i
= 0; i
!= NumElts
; ++i
)
3046 Idxs
[i
] = i
+ (i
% 2) * NumElts
;
3048 Rep
= Builder
.CreateShuffleVector(Even
, Odd
, Idxs
);
3049 } else if (IsX86
&& (Name
.startswith("avx512.mask.vfmaddsub.p") ||
3050 Name
.startswith("avx512.mask3.vfmaddsub.p") ||
3051 Name
.startswith("avx512.maskz.vfmaddsub.p") ||
3052 Name
.startswith("avx512.mask3.vfmsubadd.p"))) {
3053 bool IsMask3
= Name
[11] == '3';
3054 bool IsMaskZ
= Name
[11] == 'z';
3055 // Drop the "avx512.mask." to make it easier.
3056 Name
= Name
.drop_front(IsMask3
|| IsMaskZ
? 13 : 12);
3057 bool IsSubAdd
= Name
[3] == 's';
3058 if (CI
->getNumArgOperands() == 5 &&
3059 (!isa
<ConstantInt
>(CI
->getArgOperand(4)) ||
3060 cast
<ConstantInt
>(CI
->getArgOperand(4))->getZExtValue() != 4)) {
3062 // Check the character before ".512" in string.
3063 if (Name
[Name
.size()-5] == 's')
3064 IID
= Intrinsic::x86_avx512_vfmaddsub_ps_512
;
3066 IID
= Intrinsic::x86_avx512_vfmaddsub_pd_512
;
3068 Value
*Ops
[] = { CI
->getArgOperand(0), CI
->getArgOperand(1),
3069 CI
->getArgOperand(2), CI
->getArgOperand(4) };
3071 Ops
[2] = Builder
.CreateFNeg(Ops
[2]);
3073 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(F
->getParent(), IID
),
3074 {CI
->getArgOperand(0), CI
->getArgOperand(1),
3075 CI
->getArgOperand(2), CI
->getArgOperand(4)});
3077 int NumElts
= CI
->getType()->getVectorNumElements();
3079 Value
*Ops
[] = { CI
->getArgOperand(0), CI
->getArgOperand(1),
3080 CI
->getArgOperand(2) };
3082 Function
*FMA
= Intrinsic::getDeclaration(CI
->getModule(), Intrinsic::fma
,
3084 Value
*Odd
= Builder
.CreateCall(FMA
, Ops
);
3085 Ops
[2] = Builder
.CreateFNeg(Ops
[2]);
3086 Value
*Even
= Builder
.CreateCall(FMA
, Ops
);
3089 std::swap(Even
, Odd
);
3091 SmallVector
<uint32_t, 32> Idxs(NumElts
);
3092 for (int i
= 0; i
!= NumElts
; ++i
)
3093 Idxs
[i
] = i
+ (i
% 2) * NumElts
;
3095 Rep
= Builder
.CreateShuffleVector(Even
, Odd
, Idxs
);
3098 Value
*PassThru
= IsMaskZ
? llvm::Constant::getNullValue(CI
->getType()) :
3099 IsMask3
? CI
->getArgOperand(2) :
3100 CI
->getArgOperand(0);
3102 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
, PassThru
);
3103 } else if (IsX86
&& (Name
.startswith("avx512.mask.pternlog.") ||
3104 Name
.startswith("avx512.maskz.pternlog."))) {
3105 bool ZeroMask
= Name
[11] == 'z';
3106 unsigned VecWidth
= CI
->getType()->getPrimitiveSizeInBits();
3107 unsigned EltWidth
= CI
->getType()->getScalarSizeInBits();
3109 if (VecWidth
== 128 && EltWidth
== 32)
3110 IID
= Intrinsic::x86_avx512_pternlog_d_128
;
3111 else if (VecWidth
== 256 && EltWidth
== 32)
3112 IID
= Intrinsic::x86_avx512_pternlog_d_256
;
3113 else if (VecWidth
== 512 && EltWidth
== 32)
3114 IID
= Intrinsic::x86_avx512_pternlog_d_512
;
3115 else if (VecWidth
== 128 && EltWidth
== 64)
3116 IID
= Intrinsic::x86_avx512_pternlog_q_128
;
3117 else if (VecWidth
== 256 && EltWidth
== 64)
3118 IID
= Intrinsic::x86_avx512_pternlog_q_256
;
3119 else if (VecWidth
== 512 && EltWidth
== 64)
3120 IID
= Intrinsic::x86_avx512_pternlog_q_512
;
3122 llvm_unreachable("Unexpected intrinsic");
3124 Value
*Args
[] = { CI
->getArgOperand(0) , CI
->getArgOperand(1),
3125 CI
->getArgOperand(2), CI
->getArgOperand(3) };
3126 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(CI
->getModule(), IID
),
3128 Value
*PassThru
= ZeroMask
? ConstantAggregateZero::get(CI
->getType())
3129 : CI
->getArgOperand(0);
3130 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(4), Rep
, PassThru
);
3131 } else if (IsX86
&& (Name
.startswith("avx512.mask.vpmadd52") ||
3132 Name
.startswith("avx512.maskz.vpmadd52"))) {
3133 bool ZeroMask
= Name
[11] == 'z';
3134 bool High
= Name
[20] == 'h' || Name
[21] == 'h';
3135 unsigned VecWidth
= CI
->getType()->getPrimitiveSizeInBits();
3137 if (VecWidth
== 128 && !High
)
3138 IID
= Intrinsic::x86_avx512_vpmadd52l_uq_128
;
3139 else if (VecWidth
== 256 && !High
)
3140 IID
= Intrinsic::x86_avx512_vpmadd52l_uq_256
;
3141 else if (VecWidth
== 512 && !High
)
3142 IID
= Intrinsic::x86_avx512_vpmadd52l_uq_512
;
3143 else if (VecWidth
== 128 && High
)
3144 IID
= Intrinsic::x86_avx512_vpmadd52h_uq_128
;
3145 else if (VecWidth
== 256 && High
)
3146 IID
= Intrinsic::x86_avx512_vpmadd52h_uq_256
;
3147 else if (VecWidth
== 512 && High
)
3148 IID
= Intrinsic::x86_avx512_vpmadd52h_uq_512
;
3150 llvm_unreachable("Unexpected intrinsic");
3152 Value
*Args
[] = { CI
->getArgOperand(0) , CI
->getArgOperand(1),
3153 CI
->getArgOperand(2) };
3154 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(CI
->getModule(), IID
),
3156 Value
*PassThru
= ZeroMask
? ConstantAggregateZero::get(CI
->getType())
3157 : CI
->getArgOperand(0);
3158 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
, PassThru
);
3159 } else if (IsX86
&& (Name
.startswith("avx512.mask.vpermi2var.") ||
3160 Name
.startswith("avx512.mask.vpermt2var.") ||
3161 Name
.startswith("avx512.maskz.vpermt2var."))) {
3162 bool ZeroMask
= Name
[11] == 'z';
3163 bool IndexForm
= Name
[17] == 'i';
3164 unsigned VecWidth
= CI
->getType()->getPrimitiveSizeInBits();
3165 unsigned EltWidth
= CI
->getType()->getScalarSizeInBits();
3166 bool IsFloat
= CI
->getType()->isFPOrFPVectorTy();
3168 if (VecWidth
== 128 && EltWidth
== 32 && IsFloat
)
3169 IID
= Intrinsic::x86_avx512_vpermi2var_ps_128
;
3170 else if (VecWidth
== 128 && EltWidth
== 32 && !IsFloat
)
3171 IID
= Intrinsic::x86_avx512_vpermi2var_d_128
;
3172 else if (VecWidth
== 128 && EltWidth
== 64 && IsFloat
)
3173 IID
= Intrinsic::x86_avx512_vpermi2var_pd_128
;
3174 else if (VecWidth
== 128 && EltWidth
== 64 && !IsFloat
)
3175 IID
= Intrinsic::x86_avx512_vpermi2var_q_128
;
3176 else if (VecWidth
== 256 && EltWidth
== 32 && IsFloat
)
3177 IID
= Intrinsic::x86_avx512_vpermi2var_ps_256
;
3178 else if (VecWidth
== 256 && EltWidth
== 32 && !IsFloat
)
3179 IID
= Intrinsic::x86_avx512_vpermi2var_d_256
;
3180 else if (VecWidth
== 256 && EltWidth
== 64 && IsFloat
)
3181 IID
= Intrinsic::x86_avx512_vpermi2var_pd_256
;
3182 else if (VecWidth
== 256 && EltWidth
== 64 && !IsFloat
)
3183 IID
= Intrinsic::x86_avx512_vpermi2var_q_256
;
3184 else if (VecWidth
== 512 && EltWidth
== 32 && IsFloat
)
3185 IID
= Intrinsic::x86_avx512_vpermi2var_ps_512
;
3186 else if (VecWidth
== 512 && EltWidth
== 32 && !IsFloat
)
3187 IID
= Intrinsic::x86_avx512_vpermi2var_d_512
;
3188 else if (VecWidth
== 512 && EltWidth
== 64 && IsFloat
)
3189 IID
= Intrinsic::x86_avx512_vpermi2var_pd_512
;
3190 else if (VecWidth
== 512 && EltWidth
== 64 && !IsFloat
)
3191 IID
= Intrinsic::x86_avx512_vpermi2var_q_512
;
3192 else if (VecWidth
== 128 && EltWidth
== 16)
3193 IID
= Intrinsic::x86_avx512_vpermi2var_hi_128
;
3194 else if (VecWidth
== 256 && EltWidth
== 16)
3195 IID
= Intrinsic::x86_avx512_vpermi2var_hi_256
;
3196 else if (VecWidth
== 512 && EltWidth
== 16)
3197 IID
= Intrinsic::x86_avx512_vpermi2var_hi_512
;
3198 else if (VecWidth
== 128 && EltWidth
== 8)
3199 IID
= Intrinsic::x86_avx512_vpermi2var_qi_128
;
3200 else if (VecWidth
== 256 && EltWidth
== 8)
3201 IID
= Intrinsic::x86_avx512_vpermi2var_qi_256
;
3202 else if (VecWidth
== 512 && EltWidth
== 8)
3203 IID
= Intrinsic::x86_avx512_vpermi2var_qi_512
;
3205 llvm_unreachable("Unexpected intrinsic");
3207 Value
*Args
[] = { CI
->getArgOperand(0) , CI
->getArgOperand(1),
3208 CI
->getArgOperand(2) };
3210 // If this isn't index form we need to swap operand 0 and 1.
3212 std::swap(Args
[0], Args
[1]);
3214 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(CI
->getModule(), IID
),
3216 Value
*PassThru
= ZeroMask
? ConstantAggregateZero::get(CI
->getType())
3217 : Builder
.CreateBitCast(CI
->getArgOperand(1),
3219 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
, PassThru
);
3220 } else if (IsX86
&& (Name
.startswith("avx512.mask.vpdpbusd.") ||
3221 Name
.startswith("avx512.maskz.vpdpbusd.") ||
3222 Name
.startswith("avx512.mask.vpdpbusds.") ||
3223 Name
.startswith("avx512.maskz.vpdpbusds."))) {
3224 bool ZeroMask
= Name
[11] == 'z';
3225 bool IsSaturating
= Name
[ZeroMask
? 21 : 20] == 's';
3226 unsigned VecWidth
= CI
->getType()->getPrimitiveSizeInBits();
3228 if (VecWidth
== 128 && !IsSaturating
)
3229 IID
= Intrinsic::x86_avx512_vpdpbusd_128
;
3230 else if (VecWidth
== 256 && !IsSaturating
)
3231 IID
= Intrinsic::x86_avx512_vpdpbusd_256
;
3232 else if (VecWidth
== 512 && !IsSaturating
)
3233 IID
= Intrinsic::x86_avx512_vpdpbusd_512
;
3234 else if (VecWidth
== 128 && IsSaturating
)
3235 IID
= Intrinsic::x86_avx512_vpdpbusds_128
;
3236 else if (VecWidth
== 256 && IsSaturating
)
3237 IID
= Intrinsic::x86_avx512_vpdpbusds_256
;
3238 else if (VecWidth
== 512 && IsSaturating
)
3239 IID
= Intrinsic::x86_avx512_vpdpbusds_512
;
3241 llvm_unreachable("Unexpected intrinsic");
3243 Value
*Args
[] = { CI
->getArgOperand(0), CI
->getArgOperand(1),
3244 CI
->getArgOperand(2) };
3245 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(CI
->getModule(), IID
),
3247 Value
*PassThru
= ZeroMask
? ConstantAggregateZero::get(CI
->getType())
3248 : CI
->getArgOperand(0);
3249 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
, PassThru
);
3250 } else if (IsX86
&& (Name
.startswith("avx512.mask.vpdpwssd.") ||
3251 Name
.startswith("avx512.maskz.vpdpwssd.") ||
3252 Name
.startswith("avx512.mask.vpdpwssds.") ||
3253 Name
.startswith("avx512.maskz.vpdpwssds."))) {
3254 bool ZeroMask
= Name
[11] == 'z';
3255 bool IsSaturating
= Name
[ZeroMask
? 21 : 20] == 's';
3256 unsigned VecWidth
= CI
->getType()->getPrimitiveSizeInBits();
3258 if (VecWidth
== 128 && !IsSaturating
)
3259 IID
= Intrinsic::x86_avx512_vpdpwssd_128
;
3260 else if (VecWidth
== 256 && !IsSaturating
)
3261 IID
= Intrinsic::x86_avx512_vpdpwssd_256
;
3262 else if (VecWidth
== 512 && !IsSaturating
)
3263 IID
= Intrinsic::x86_avx512_vpdpwssd_512
;
3264 else if (VecWidth
== 128 && IsSaturating
)
3265 IID
= Intrinsic::x86_avx512_vpdpwssds_128
;
3266 else if (VecWidth
== 256 && IsSaturating
)
3267 IID
= Intrinsic::x86_avx512_vpdpwssds_256
;
3268 else if (VecWidth
== 512 && IsSaturating
)
3269 IID
= Intrinsic::x86_avx512_vpdpwssds_512
;
3271 llvm_unreachable("Unexpected intrinsic");
3273 Value
*Args
[] = { CI
->getArgOperand(0), CI
->getArgOperand(1),
3274 CI
->getArgOperand(2) };
3275 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(CI
->getModule(), IID
),
3277 Value
*PassThru
= ZeroMask
? ConstantAggregateZero::get(CI
->getType())
3278 : CI
->getArgOperand(0);
3279 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
, PassThru
);
3280 } else if (IsX86
&& Name
.startswith("avx512.mask.") &&
3281 upgradeAVX512MaskToSelect(Name
, Builder
, *CI
, Rep
)) {
3282 // Rep will be updated by the call in the condition.
3283 } else if (IsNVVM
&& (Name
== "abs.i" || Name
== "abs.ll")) {
3284 Value
*Arg
= CI
->getArgOperand(0);
3285 Value
*Neg
= Builder
.CreateNeg(Arg
, "neg");
3286 Value
*Cmp
= Builder
.CreateICmpSGE(
3287 Arg
, llvm::Constant::getNullValue(Arg
->getType()), "abs.cond");
3288 Rep
= Builder
.CreateSelect(Cmp
, Arg
, Neg
, "abs");
3289 } else if (IsNVVM
&& (Name
== "max.i" || Name
== "max.ll" ||
3290 Name
== "max.ui" || Name
== "max.ull")) {
3291 Value
*Arg0
= CI
->getArgOperand(0);
3292 Value
*Arg1
= CI
->getArgOperand(1);
3293 Value
*Cmp
= Name
.endswith(".ui") || Name
.endswith(".ull")
3294 ? Builder
.CreateICmpUGE(Arg0
, Arg1
, "max.cond")
3295 : Builder
.CreateICmpSGE(Arg0
, Arg1
, "max.cond");
3296 Rep
= Builder
.CreateSelect(Cmp
, Arg0
, Arg1
, "max");
3297 } else if (IsNVVM
&& (Name
== "min.i" || Name
== "min.ll" ||
3298 Name
== "min.ui" || Name
== "min.ull")) {
3299 Value
*Arg0
= CI
->getArgOperand(0);
3300 Value
*Arg1
= CI
->getArgOperand(1);
3301 Value
*Cmp
= Name
.endswith(".ui") || Name
.endswith(".ull")
3302 ? Builder
.CreateICmpULE(Arg0
, Arg1
, "min.cond")
3303 : Builder
.CreateICmpSLE(Arg0
, Arg1
, "min.cond");
3304 Rep
= Builder
.CreateSelect(Cmp
, Arg0
, Arg1
, "min");
3305 } else if (IsNVVM
&& Name
== "clz.ll") {
3306 // llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 and returns an i64.
3307 Value
*Arg
= CI
->getArgOperand(0);
3308 Value
*Ctlz
= Builder
.CreateCall(
3309 Intrinsic::getDeclaration(F
->getParent(), Intrinsic::ctlz
,
3311 {Arg
, Builder
.getFalse()}, "ctlz");
3312 Rep
= Builder
.CreateTrunc(Ctlz
, Builder
.getInt32Ty(), "ctlz.trunc");
3313 } else if (IsNVVM
&& Name
== "popc.ll") {
3314 // llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 and returns an
3316 Value
*Arg
= CI
->getArgOperand(0);
3317 Value
*Popc
= Builder
.CreateCall(
3318 Intrinsic::getDeclaration(F
->getParent(), Intrinsic::ctpop
,
3321 Rep
= Builder
.CreateTrunc(Popc
, Builder
.getInt32Ty(), "ctpop.trunc");
3322 } else if (IsNVVM
&& Name
== "h2f") {
3323 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(
3324 F
->getParent(), Intrinsic::convert_from_fp16
,
3325 {Builder
.getFloatTy()}),
3326 CI
->getArgOperand(0), "h2f");
3328 llvm_unreachable("Unknown function for CallInst upgrade.");
3332 CI
->replaceAllUsesWith(Rep
);
3333 CI
->eraseFromParent();
3337 const auto &DefaultCase
= [&NewFn
, &CI
]() -> void {
3338 // Handle generic mangling change, but nothing else
3340 (CI
->getCalledFunction()->getName() != NewFn
->getName()) &&
3341 "Unknown function for CallInst upgrade and isn't just a name change");
3342 CI
->setCalledFunction(NewFn
);
3344 CallInst
*NewCall
= nullptr;
3345 switch (NewFn
->getIntrinsicID()) {
3351 case Intrinsic::arm_neon_vld1
:
3352 case Intrinsic::arm_neon_vld2
:
3353 case Intrinsic::arm_neon_vld3
:
3354 case Intrinsic::arm_neon_vld4
:
3355 case Intrinsic::arm_neon_vld2lane
:
3356 case Intrinsic::arm_neon_vld3lane
:
3357 case Intrinsic::arm_neon_vld4lane
:
3358 case Intrinsic::arm_neon_vst1
:
3359 case Intrinsic::arm_neon_vst2
:
3360 case Intrinsic::arm_neon_vst3
:
3361 case Intrinsic::arm_neon_vst4
:
3362 case Intrinsic::arm_neon_vst2lane
:
3363 case Intrinsic::arm_neon_vst3lane
:
3364 case Intrinsic::arm_neon_vst4lane
: {
3365 SmallVector
<Value
*, 4> Args(CI
->arg_operands().begin(),
3366 CI
->arg_operands().end());
3367 NewCall
= Builder
.CreateCall(NewFn
, Args
);
3371 case Intrinsic::bitreverse
:
3372 NewCall
= Builder
.CreateCall(NewFn
, {CI
->getArgOperand(0)});
3375 case Intrinsic::ctlz
:
3376 case Intrinsic::cttz
:
3377 assert(CI
->getNumArgOperands() == 1 &&
3378 "Mismatch between function args and call args");
3380 Builder
.CreateCall(NewFn
, {CI
->getArgOperand(0), Builder
.getFalse()});
3383 case Intrinsic::objectsize
: {
3384 Value
*NullIsUnknownSize
= CI
->getNumArgOperands() == 2
3385 ? Builder
.getFalse()
3386 : CI
->getArgOperand(2);
3387 NewCall
= Builder
.CreateCall(
3388 NewFn
, {CI
->getArgOperand(0), CI
->getArgOperand(1), NullIsUnknownSize
});
3392 case Intrinsic::ctpop
:
3393 NewCall
= Builder
.CreateCall(NewFn
, {CI
->getArgOperand(0)});
3396 case Intrinsic::convert_from_fp16
:
3397 NewCall
= Builder
.CreateCall(NewFn
, {CI
->getArgOperand(0)});
3400 case Intrinsic::dbg_value
:
3401 // Upgrade from the old version that had an extra offset argument.
3402 assert(CI
->getNumArgOperands() == 4);
3403 // Drop nonzero offsets instead of attempting to upgrade them.
3404 if (auto *Offset
= dyn_cast_or_null
<Constant
>(CI
->getArgOperand(1)))
3405 if (Offset
->isZeroValue()) {
3406 NewCall
= Builder
.CreateCall(
3408 {CI
->getArgOperand(0), CI
->getArgOperand(2), CI
->getArgOperand(3)});
3411 CI
->eraseFromParent();
3414 case Intrinsic::x86_xop_vfrcz_ss
:
3415 case Intrinsic::x86_xop_vfrcz_sd
:
3416 NewCall
= Builder
.CreateCall(NewFn
, {CI
->getArgOperand(1)});
3419 case Intrinsic::x86_xop_vpermil2pd
:
3420 case Intrinsic::x86_xop_vpermil2ps
:
3421 case Intrinsic::x86_xop_vpermil2pd_256
:
3422 case Intrinsic::x86_xop_vpermil2ps_256
: {
3423 SmallVector
<Value
*, 4> Args(CI
->arg_operands().begin(),
3424 CI
->arg_operands().end());
3425 VectorType
*FltIdxTy
= cast
<VectorType
>(Args
[2]->getType());
3426 VectorType
*IntIdxTy
= VectorType::getInteger(FltIdxTy
);
3427 Args
[2] = Builder
.CreateBitCast(Args
[2], IntIdxTy
);
3428 NewCall
= Builder
.CreateCall(NewFn
, Args
);
3432 case Intrinsic::x86_sse41_ptestc
:
3433 case Intrinsic::x86_sse41_ptestz
:
3434 case Intrinsic::x86_sse41_ptestnzc
: {
3435 // The arguments for these intrinsics used to be v4f32, and changed
3436 // to v2i64. This is purely a nop, since those are bitwise intrinsics.
3437 // So, the only thing required is a bitcast for both arguments.
3438 // First, check the arguments have the old type.
3439 Value
*Arg0
= CI
->getArgOperand(0);
3440 if (Arg0
->getType() != VectorType::get(Type::getFloatTy(C
), 4))
3443 // Old intrinsic, add bitcasts
3444 Value
*Arg1
= CI
->getArgOperand(1);
3446 Type
*NewVecTy
= VectorType::get(Type::getInt64Ty(C
), 2);
3448 Value
*BC0
= Builder
.CreateBitCast(Arg0
, NewVecTy
, "cast");
3449 Value
*BC1
= Builder
.CreateBitCast(Arg1
, NewVecTy
, "cast");
3451 NewCall
= Builder
.CreateCall(NewFn
, {BC0
, BC1
});
3455 case Intrinsic::x86_rdtscp
: {
3456 // This used to take 1 arguments. If we have no arguments, it is already
3458 if (CI
->getNumOperands() == 0)
3461 NewCall
= Builder
.CreateCall(NewFn
);
3462 // Extract the second result and store it.
3463 Value
*Data
= Builder
.CreateExtractValue(NewCall
, 1);
3464 // Cast the pointer to the right type.
3465 Value
*Ptr
= Builder
.CreateBitCast(CI
->getArgOperand(0),
3466 llvm::PointerType::getUnqual(Data
->getType()));
3467 Builder
.CreateAlignedStore(Data
, Ptr
, 1);
3468 // Replace the original call result with the first result of the new call.
3469 Value
*TSC
= Builder
.CreateExtractValue(NewCall
, 0);
3471 std::string Name
= CI
->getName();
3472 if (!Name
.empty()) {
3473 CI
->setName(Name
+ ".old");
3474 NewCall
->setName(Name
);
3476 CI
->replaceAllUsesWith(TSC
);
3477 CI
->eraseFromParent();
3481 case Intrinsic::x86_addcarryx_u32
:
3482 case Intrinsic::x86_addcarryx_u64
:
3483 case Intrinsic::x86_addcarry_u32
:
3484 case Intrinsic::x86_addcarry_u64
:
3485 case Intrinsic::x86_subborrow_u32
:
3486 case Intrinsic::x86_subborrow_u64
: {
3487 // This used to take 4 arguments. If we only have 3 arguments its already
3489 if (CI
->getNumOperands() == 3)
3492 // Make a call with 3 operands.
3493 NewCall
= Builder
.CreateCall(NewFn
, { CI
->getArgOperand(0),
3494 CI
->getArgOperand(1),
3495 CI
->getArgOperand(2)});
3496 // Extract the second result and store it.
3497 Value
*Data
= Builder
.CreateExtractValue(NewCall
, 1);
3498 // Cast the pointer to the right type.
3499 Value
*Ptr
= Builder
.CreateBitCast(CI
->getArgOperand(3),
3500 llvm::PointerType::getUnqual(Data
->getType()));
3501 Builder
.CreateAlignedStore(Data
, Ptr
, 1);
3502 // Replace the original call result with the first result of the new call.
3503 Value
*CF
= Builder
.CreateExtractValue(NewCall
, 0);
3505 std::string Name
= CI
->getName();
3506 if (!Name
.empty()) {
3507 CI
->setName(Name
+ ".old");
3508 NewCall
->setName(Name
);
3510 CI
->replaceAllUsesWith(CF
);
3511 CI
->eraseFromParent();
3515 case Intrinsic::x86_sse41_insertps
:
3516 case Intrinsic::x86_sse41_dppd
:
3517 case Intrinsic::x86_sse41_dpps
:
3518 case Intrinsic::x86_sse41_mpsadbw
:
3519 case Intrinsic::x86_avx_dp_ps_256
:
3520 case Intrinsic::x86_avx2_mpsadbw
: {
3521 // Need to truncate the last argument from i32 to i8 -- this argument models
3522 // an inherently 8-bit immediate operand to these x86 instructions.
3523 SmallVector
<Value
*, 4> Args(CI
->arg_operands().begin(),
3524 CI
->arg_operands().end());
3526 // Replace the last argument with a trunc.
3527 Args
.back() = Builder
.CreateTrunc(Args
.back(), Type::getInt8Ty(C
), "trunc");
3528 NewCall
= Builder
.CreateCall(NewFn
, Args
);
3532 case Intrinsic::thread_pointer
: {
3533 NewCall
= Builder
.CreateCall(NewFn
, {});
3537 case Intrinsic::invariant_start
:
3538 case Intrinsic::invariant_end
:
3539 case Intrinsic::masked_load
:
3540 case Intrinsic::masked_store
:
3541 case Intrinsic::masked_gather
:
3542 case Intrinsic::masked_scatter
: {
3543 SmallVector
<Value
*, 4> Args(CI
->arg_operands().begin(),
3544 CI
->arg_operands().end());
3545 NewCall
= Builder
.CreateCall(NewFn
, Args
);
3549 case Intrinsic::memcpy
:
3550 case Intrinsic::memmove
:
3551 case Intrinsic::memset
: {
3552 // We have to make sure that the call signature is what we're expecting.
3553 // We only want to change the old signatures by removing the alignment arg:
3554 // @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i32, i1)
3555 // -> @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i1)
3556 // @llvm.memset...(i8*, i8, i[32|64], i32, i1)
3557 // -> @llvm.memset...(i8*, i8, i[32|64], i1)
3558 // Note: i8*'s in the above can be any pointer type
3559 if (CI
->getNumArgOperands() != 5) {
3563 // Remove alignment argument (3), and add alignment attributes to the
3564 // dest/src pointers.
3565 Value
*Args
[4] = {CI
->getArgOperand(0), CI
->getArgOperand(1),
3566 CI
->getArgOperand(2), CI
->getArgOperand(4)};
3567 NewCall
= Builder
.CreateCall(NewFn
, Args
);
3568 auto *MemCI
= cast
<MemIntrinsic
>(NewCall
);
3569 // All mem intrinsics support dest alignment.
3570 const ConstantInt
*Align
= cast
<ConstantInt
>(CI
->getArgOperand(3));
3571 MemCI
->setDestAlignment(Align
->getZExtValue());
3572 // Memcpy/Memmove also support source alignment.
3573 if (auto *MTI
= dyn_cast
<MemTransferInst
>(MemCI
))
3574 MTI
->setSourceAlignment(Align
->getZExtValue());
3578 assert(NewCall
&& "Should have either set this variable or returned through "
3579 "the default case");
3580 std::string Name
= CI
->getName();
3581 if (!Name
.empty()) {
3582 CI
->setName(Name
+ ".old");
3583 NewCall
->setName(Name
);
3585 CI
->replaceAllUsesWith(NewCall
);
3586 CI
->eraseFromParent();
3589 void llvm::UpgradeCallsToIntrinsic(Function
*F
) {
3590 assert(F
&& "Illegal attempt to upgrade a non-existent intrinsic.");
3592 // Check if this function should be upgraded and get the replacement function
3595 if (UpgradeIntrinsicFunction(F
, NewFn
)) {
3596 // Replace all users of the old function with the new function or new
3597 // instructions. This is not a range loop because the call is deleted.
3598 for (auto UI
= F
->user_begin(), UE
= F
->user_end(); UI
!= UE
; )
3599 if (CallInst
*CI
= dyn_cast
<CallInst
>(*UI
++))
3600 UpgradeIntrinsicCall(CI
, NewFn
);
3602 // Remove old function, no longer used, from the module.
3603 F
->eraseFromParent();
3607 MDNode
*llvm::UpgradeTBAANode(MDNode
&MD
) {
3608 // Check if the tag uses struct-path aware TBAA format.
3609 if (isa
<MDNode
>(MD
.getOperand(0)) && MD
.getNumOperands() >= 3)
3612 auto &Context
= MD
.getContext();
3613 if (MD
.getNumOperands() == 3) {
3614 Metadata
*Elts
[] = {MD
.getOperand(0), MD
.getOperand(1)};
3615 MDNode
*ScalarType
= MDNode::get(Context
, Elts
);
3616 // Create a MDNode <ScalarType, ScalarType, offset 0, const>
3617 Metadata
*Elts2
[] = {ScalarType
, ScalarType
,
3618 ConstantAsMetadata::get(
3619 Constant::getNullValue(Type::getInt64Ty(Context
))),
3621 return MDNode::get(Context
, Elts2
);
3623 // Create a MDNode <MD, MD, offset 0>
3624 Metadata
*Elts
[] = {&MD
, &MD
, ConstantAsMetadata::get(Constant::getNullValue(
3625 Type::getInt64Ty(Context
)))};
3626 return MDNode::get(Context
, Elts
);
3629 Instruction
*llvm::UpgradeBitCastInst(unsigned Opc
, Value
*V
, Type
*DestTy
,
3630 Instruction
*&Temp
) {
3631 if (Opc
!= Instruction::BitCast
)
3635 Type
*SrcTy
= V
->getType();
3636 if (SrcTy
->isPtrOrPtrVectorTy() && DestTy
->isPtrOrPtrVectorTy() &&
3637 SrcTy
->getPointerAddressSpace() != DestTy
->getPointerAddressSpace()) {
3638 LLVMContext
&Context
= V
->getContext();
3640 // We have no information about target data layout, so we assume that
3641 // the maximum pointer size is 64bit.
3642 Type
*MidTy
= Type::getInt64Ty(Context
);
3643 Temp
= CastInst::Create(Instruction::PtrToInt
, V
, MidTy
);
3645 return CastInst::Create(Instruction::IntToPtr
, Temp
, DestTy
);
3651 Value
*llvm::UpgradeBitCastExpr(unsigned Opc
, Constant
*C
, Type
*DestTy
) {
3652 if (Opc
!= Instruction::BitCast
)
3655 Type
*SrcTy
= C
->getType();
3656 if (SrcTy
->isPtrOrPtrVectorTy() && DestTy
->isPtrOrPtrVectorTy() &&
3657 SrcTy
->getPointerAddressSpace() != DestTy
->getPointerAddressSpace()) {
3658 LLVMContext
&Context
= C
->getContext();
3660 // We have no information about target data layout, so we assume that
3661 // the maximum pointer size is 64bit.
3662 Type
*MidTy
= Type::getInt64Ty(Context
);
3664 return ConstantExpr::getIntToPtr(ConstantExpr::getPtrToInt(C
, MidTy
),
3671 /// Check the debug info version number, if it is out-dated, drop the debug
3672 /// info. Return true if module is modified.
3673 bool llvm::UpgradeDebugInfo(Module
&M
) {
3674 unsigned Version
= getDebugMetadataVersionFromModule(M
);
3675 if (Version
== DEBUG_METADATA_VERSION
) {
3676 bool BrokenDebugInfo
= false;
3677 if (verifyModule(M
, &llvm::errs(), &BrokenDebugInfo
))
3678 report_fatal_error("Broken module found, compilation aborted!");
3679 if (!BrokenDebugInfo
)
3680 // Everything is ok.
3683 // Diagnose malformed debug info.
3684 DiagnosticInfoIgnoringInvalidDebugMetadata
Diag(M
);
3685 M
.getContext().diagnose(Diag
);
3688 bool Modified
= StripDebugInfo(M
);
3689 if (Modified
&& Version
!= DEBUG_METADATA_VERSION
) {
3690 // Diagnose a version mismatch.
3691 DiagnosticInfoDebugMetadataVersion
DiagVersion(M
, Version
);
3692 M
.getContext().diagnose(DiagVersion
);
3697 bool llvm::UpgradeRetainReleaseMarker(Module
&M
) {
3698 bool Changed
= false;
3699 NamedMDNode
*ModRetainReleaseMarker
=
3700 M
.getNamedMetadata("clang.arc.retainAutoreleasedReturnValueMarker");
3701 if (ModRetainReleaseMarker
) {
3702 MDNode
*Op
= ModRetainReleaseMarker
->getOperand(0);
3704 MDString
*ID
= dyn_cast_or_null
<MDString
>(Op
->getOperand(0));
3706 SmallVector
<StringRef
, 4> ValueComp
;
3707 ID
->getString().split(ValueComp
, "#");
3708 if (ValueComp
.size() == 2) {
3709 std::string NewValue
= ValueComp
[0].str() + ";" + ValueComp
[1].str();
3710 Metadata
*Ops
[1] = {MDString::get(M
.getContext(), NewValue
)};
3711 ModRetainReleaseMarker
->setOperand(0,
3712 MDNode::get(M
.getContext(), Ops
));
3721 bool llvm::UpgradeModuleFlags(Module
&M
) {
3722 NamedMDNode
*ModFlags
= M
.getModuleFlagsMetadata();
3726 bool HasObjCFlag
= false, HasClassProperties
= false, Changed
= false;
3727 for (unsigned I
= 0, E
= ModFlags
->getNumOperands(); I
!= E
; ++I
) {
3728 MDNode
*Op
= ModFlags
->getOperand(I
);
3729 if (Op
->getNumOperands() != 3)
3731 MDString
*ID
= dyn_cast_or_null
<MDString
>(Op
->getOperand(1));
3734 if (ID
->getString() == "Objective-C Image Info Version")
3736 if (ID
->getString() == "Objective-C Class Properties")
3737 HasClassProperties
= true;
3738 // Upgrade PIC/PIE Module Flags. The module flag behavior for these two
3739 // field was Error and now they are Max.
3740 if (ID
->getString() == "PIC Level" || ID
->getString() == "PIE Level") {
3741 if (auto *Behavior
=
3742 mdconst::dyn_extract_or_null
<ConstantInt
>(Op
->getOperand(0))) {
3743 if (Behavior
->getLimitedValue() == Module::Error
) {
3744 Type
*Int32Ty
= Type::getInt32Ty(M
.getContext());
3745 Metadata
*Ops
[3] = {
3746 ConstantAsMetadata::get(ConstantInt::get(Int32Ty
, Module::Max
)),
3747 MDString::get(M
.getContext(), ID
->getString()),
3749 ModFlags
->setOperand(I
, MDNode::get(M
.getContext(), Ops
));
3754 // Upgrade Objective-C Image Info Section. Removed the whitespce in the
3755 // section name so that llvm-lto will not complain about mismatching
3756 // module flags that is functionally the same.
3757 if (ID
->getString() == "Objective-C Image Info Section") {
3758 if (auto *Value
= dyn_cast_or_null
<MDString
>(Op
->getOperand(2))) {
3759 SmallVector
<StringRef
, 4> ValueComp
;
3760 Value
->getString().split(ValueComp
, " ");
3761 if (ValueComp
.size() != 1) {
3762 std::string NewValue
;
3763 for (auto &S
: ValueComp
)
3764 NewValue
+= S
.str();
3765 Metadata
*Ops
[3] = {Op
->getOperand(0), Op
->getOperand(1),
3766 MDString::get(M
.getContext(), NewValue
)};
3767 ModFlags
->setOperand(I
, MDNode::get(M
.getContext(), Ops
));
3774 // "Objective-C Class Properties" is recently added for Objective-C. We
3775 // upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module
3776 // flag of value 0, so we can correclty downgrade this flag when trying to
3777 // link an ObjC bitcode without this module flag with an ObjC bitcode with
3778 // this module flag.
3779 if (HasObjCFlag
&& !HasClassProperties
) {
3780 M
.addModuleFlag(llvm::Module::Override
, "Objective-C Class Properties",
3788 void llvm::UpgradeSectionAttributes(Module
&M
) {
3789 auto TrimSpaces
= [](StringRef Section
) -> std::string
{
3790 SmallVector
<StringRef
, 5> Components
;
3791 Section
.split(Components
, ',');
3793 SmallString
<32> Buffer
;
3794 raw_svector_ostream
OS(Buffer
);
3796 for (auto Component
: Components
)
3797 OS
<< ',' << Component
.trim();
3799 return OS
.str().substr(1);
3802 for (auto &GV
: M
.globals()) {
3803 if (!GV
.hasSection())
3806 StringRef Section
= GV
.getSection();
3808 if (!Section
.startswith("__DATA, __objc_catlist"))
3811 // __DATA, __objc_catlist, regular, no_dead_strip
3812 // __DATA,__objc_catlist,regular,no_dead_strip
3813 GV
.setSection(TrimSpaces(Section
));
3817 static bool isOldLoopArgument(Metadata
*MD
) {
3818 auto *T
= dyn_cast_or_null
<MDTuple
>(MD
);
3821 if (T
->getNumOperands() < 1)
3823 auto *S
= dyn_cast_or_null
<MDString
>(T
->getOperand(0));
3826 return S
->getString().startswith("llvm.vectorizer.");
3829 static MDString
*upgradeLoopTag(LLVMContext
&C
, StringRef OldTag
) {
3830 StringRef OldPrefix
= "llvm.vectorizer.";
3831 assert(OldTag
.startswith(OldPrefix
) && "Expected old prefix");
3833 if (OldTag
== "llvm.vectorizer.unroll")
3834 return MDString::get(C
, "llvm.loop.interleave.count");
3836 return MDString::get(
3837 C
, (Twine("llvm.loop.vectorize.") + OldTag
.drop_front(OldPrefix
.size()))
3841 static Metadata
*upgradeLoopArgument(Metadata
*MD
) {
3842 auto *T
= dyn_cast_or_null
<MDTuple
>(MD
);
3845 if (T
->getNumOperands() < 1)
3847 auto *OldTag
= dyn_cast_or_null
<MDString
>(T
->getOperand(0));
3850 if (!OldTag
->getString().startswith("llvm.vectorizer."))
3853 // This has an old tag. Upgrade it.
3854 SmallVector
<Metadata
*, 8> Ops
;
3855 Ops
.reserve(T
->getNumOperands());
3856 Ops
.push_back(upgradeLoopTag(T
->getContext(), OldTag
->getString()));
3857 for (unsigned I
= 1, E
= T
->getNumOperands(); I
!= E
; ++I
)
3858 Ops
.push_back(T
->getOperand(I
));
3860 return MDTuple::get(T
->getContext(), Ops
);
3863 MDNode
*llvm::upgradeInstructionLoopAttachment(MDNode
&N
) {
3864 auto *T
= dyn_cast
<MDTuple
>(&N
);
3868 if (none_of(T
->operands(), isOldLoopArgument
))
3871 SmallVector
<Metadata
*, 8> Ops
;
3872 Ops
.reserve(T
->getNumOperands());
3873 for (Metadata
*MD
: T
->operands())
3874 Ops
.push_back(upgradeLoopArgument(MD
));
3876 return MDTuple::get(T
->getContext(), Ops
);