1 //===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements the auto-upgrade helper functions.
10 // This is where deprecated IR intrinsics and other IR features are updated to
11 // current specifications.
13 //===----------------------------------------------------------------------===//
15 #include "llvm/IR/AutoUpgrade.h"
16 #include "llvm/ADT/StringSwitch.h"
17 #include "llvm/IR/Constants.h"
18 #include "llvm/IR/DIBuilder.h"
19 #include "llvm/IR/DebugInfo.h"
20 #include "llvm/IR/DiagnosticInfo.h"
21 #include "llvm/IR/Function.h"
22 #include "llvm/IR/IRBuilder.h"
23 #include "llvm/IR/Instruction.h"
24 #include "llvm/IR/IntrinsicInst.h"
25 #include "llvm/IR/LLVMContext.h"
26 #include "llvm/IR/Module.h"
27 #include "llvm/IR/Verifier.h"
28 #include "llvm/Support/ErrorHandling.h"
29 #include "llvm/Support/Regex.h"
33 static void rename(GlobalValue
*GV
) { GV
->setName(GV
->getName() + ".old"); }
35 // Upgrade the declarations of the SSE4.1 ptest intrinsics whose arguments have
36 // changed their type from v4f32 to v2i64.
37 static bool UpgradePTESTIntrinsic(Function
* F
, Intrinsic::ID IID
,
39 // Check whether this is an old version of the function, which received
41 Type
*Arg0Type
= F
->getFunctionType()->getParamType(0);
42 if (Arg0Type
!= VectorType::get(Type::getFloatTy(F
->getContext()), 4))
45 // Yes, it's old, replace it with new version.
47 NewFn
= Intrinsic::getDeclaration(F
->getParent(), IID
);
51 // Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
52 // arguments have changed their type from i32 to i8.
53 static bool UpgradeX86IntrinsicsWith8BitMask(Function
*F
, Intrinsic::ID IID
,
55 // Check that the last argument is an i32.
56 Type
*LastArgType
= F
->getFunctionType()->getParamType(
57 F
->getFunctionType()->getNumParams() - 1);
58 if (!LastArgType
->isIntegerTy(32))
61 // Move this function aside and map down.
63 NewFn
= Intrinsic::getDeclaration(F
->getParent(), IID
);
67 static bool ShouldUpgradeX86Intrinsic(Function
*F
, StringRef Name
) {
68 // All of the intrinsics matches below should be marked with which llvm
69 // version started autoupgrading them. At some point in the future we would
70 // like to use this information to remove upgrade code for some older
71 // intrinsics. It is currently undecided how we will determine that future
73 if (Name
== "addcarryx.u32" || // Added in 8.0
74 Name
== "addcarryx.u64" || // Added in 8.0
75 Name
== "addcarry.u32" || // Added in 8.0
76 Name
== "addcarry.u64" || // Added in 8.0
77 Name
== "subborrow.u32" || // Added in 8.0
78 Name
== "subborrow.u64" || // Added in 8.0
79 Name
.startswith("sse2.padds.") || // Added in 8.0
80 Name
.startswith("sse2.psubs.") || // Added in 8.0
81 Name
.startswith("sse2.paddus.") || // Added in 8.0
82 Name
.startswith("sse2.psubus.") || // Added in 8.0
83 Name
.startswith("avx2.padds.") || // Added in 8.0
84 Name
.startswith("avx2.psubs.") || // Added in 8.0
85 Name
.startswith("avx2.paddus.") || // Added in 8.0
86 Name
.startswith("avx2.psubus.") || // Added in 8.0
87 Name
.startswith("avx512.padds.") || // Added in 8.0
88 Name
.startswith("avx512.psubs.") || // Added in 8.0
89 Name
.startswith("avx512.mask.padds.") || // Added in 8.0
90 Name
.startswith("avx512.mask.psubs.") || // Added in 8.0
91 Name
.startswith("avx512.mask.paddus.") || // Added in 8.0
92 Name
.startswith("avx512.mask.psubus.") || // Added in 8.0
93 Name
=="ssse3.pabs.b.128" || // Added in 6.0
94 Name
=="ssse3.pabs.w.128" || // Added in 6.0
95 Name
=="ssse3.pabs.d.128" || // Added in 6.0
96 Name
.startswith("fma4.vfmadd.s") || // Added in 7.0
97 Name
.startswith("fma.vfmadd.") || // Added in 7.0
98 Name
.startswith("fma.vfmsub.") || // Added in 7.0
99 Name
.startswith("fma.vfmaddsub.") || // Added in 7.0
100 Name
.startswith("fma.vfmsubadd.") || // Added in 7.0
101 Name
.startswith("fma.vfnmadd.") || // Added in 7.0
102 Name
.startswith("fma.vfnmsub.") || // Added in 7.0
103 Name
.startswith("avx512.mask.vfmadd.") || // Added in 7.0
104 Name
.startswith("avx512.mask.vfnmadd.") || // Added in 7.0
105 Name
.startswith("avx512.mask.vfnmsub.") || // Added in 7.0
106 Name
.startswith("avx512.mask3.vfmadd.") || // Added in 7.0
107 Name
.startswith("avx512.maskz.vfmadd.") || // Added in 7.0
108 Name
.startswith("avx512.mask3.vfmsub.") || // Added in 7.0
109 Name
.startswith("avx512.mask3.vfnmsub.") || // Added in 7.0
110 Name
.startswith("avx512.mask.vfmaddsub.") || // Added in 7.0
111 Name
.startswith("avx512.maskz.vfmaddsub.") || // Added in 7.0
112 Name
.startswith("avx512.mask3.vfmaddsub.") || // Added in 7.0
113 Name
.startswith("avx512.mask3.vfmsubadd.") || // Added in 7.0
114 Name
.startswith("avx512.mask.shuf.i") || // Added in 6.0
115 Name
.startswith("avx512.mask.shuf.f") || // Added in 6.0
116 Name
.startswith("avx512.kunpck") || //added in 6.0
117 Name
.startswith("avx2.pabs.") || // Added in 6.0
118 Name
.startswith("avx512.mask.pabs.") || // Added in 6.0
119 Name
.startswith("avx512.broadcastm") || // Added in 6.0
120 Name
== "sse.sqrt.ss" || // Added in 7.0
121 Name
== "sse2.sqrt.sd" || // Added in 7.0
122 Name
.startswith("avx512.mask.sqrt.p") || // Added in 7.0
123 Name
.startswith("avx.sqrt.p") || // Added in 7.0
124 Name
.startswith("sse2.sqrt.p") || // Added in 7.0
125 Name
.startswith("sse.sqrt.p") || // Added in 7.0
126 Name
.startswith("avx512.mask.pbroadcast") || // Added in 6.0
127 Name
.startswith("sse2.pcmpeq.") || // Added in 3.1
128 Name
.startswith("sse2.pcmpgt.") || // Added in 3.1
129 Name
.startswith("avx2.pcmpeq.") || // Added in 3.1
130 Name
.startswith("avx2.pcmpgt.") || // Added in 3.1
131 Name
.startswith("avx512.mask.pcmpeq.") || // Added in 3.9
132 Name
.startswith("avx512.mask.pcmpgt.") || // Added in 3.9
133 Name
.startswith("avx.vperm2f128.") || // Added in 6.0
134 Name
== "avx2.vperm2i128" || // Added in 6.0
135 Name
== "sse.add.ss" || // Added in 4.0
136 Name
== "sse2.add.sd" || // Added in 4.0
137 Name
== "sse.sub.ss" || // Added in 4.0
138 Name
== "sse2.sub.sd" || // Added in 4.0
139 Name
== "sse.mul.ss" || // Added in 4.0
140 Name
== "sse2.mul.sd" || // Added in 4.0
141 Name
== "sse.div.ss" || // Added in 4.0
142 Name
== "sse2.div.sd" || // Added in 4.0
143 Name
== "sse41.pmaxsb" || // Added in 3.9
144 Name
== "sse2.pmaxs.w" || // Added in 3.9
145 Name
== "sse41.pmaxsd" || // Added in 3.9
146 Name
== "sse2.pmaxu.b" || // Added in 3.9
147 Name
== "sse41.pmaxuw" || // Added in 3.9
148 Name
== "sse41.pmaxud" || // Added in 3.9
149 Name
== "sse41.pminsb" || // Added in 3.9
150 Name
== "sse2.pmins.w" || // Added in 3.9
151 Name
== "sse41.pminsd" || // Added in 3.9
152 Name
== "sse2.pminu.b" || // Added in 3.9
153 Name
== "sse41.pminuw" || // Added in 3.9
154 Name
== "sse41.pminud" || // Added in 3.9
155 Name
== "avx512.kand.w" || // Added in 7.0
156 Name
== "avx512.kandn.w" || // Added in 7.0
157 Name
== "avx512.knot.w" || // Added in 7.0
158 Name
== "avx512.kor.w" || // Added in 7.0
159 Name
== "avx512.kxor.w" || // Added in 7.0
160 Name
== "avx512.kxnor.w" || // Added in 7.0
161 Name
== "avx512.kortestc.w" || // Added in 7.0
162 Name
== "avx512.kortestz.w" || // Added in 7.0
163 Name
.startswith("avx512.mask.pshuf.b.") || // Added in 4.0
164 Name
.startswith("avx2.pmax") || // Added in 3.9
165 Name
.startswith("avx2.pmin") || // Added in 3.9
166 Name
.startswith("avx512.mask.pmax") || // Added in 4.0
167 Name
.startswith("avx512.mask.pmin") || // Added in 4.0
168 Name
.startswith("avx2.vbroadcast") || // Added in 3.8
169 Name
.startswith("avx2.pbroadcast") || // Added in 3.8
170 Name
.startswith("avx.vpermil.") || // Added in 3.1
171 Name
.startswith("sse2.pshuf") || // Added in 3.9
172 Name
.startswith("avx512.pbroadcast") || // Added in 3.9
173 Name
.startswith("avx512.mask.broadcast.s") || // Added in 3.9
174 Name
.startswith("avx512.mask.movddup") || // Added in 3.9
175 Name
.startswith("avx512.mask.movshdup") || // Added in 3.9
176 Name
.startswith("avx512.mask.movsldup") || // Added in 3.9
177 Name
.startswith("avx512.mask.pshuf.d.") || // Added in 3.9
178 Name
.startswith("avx512.mask.pshufl.w.") || // Added in 3.9
179 Name
.startswith("avx512.mask.pshufh.w.") || // Added in 3.9
180 Name
.startswith("avx512.mask.shuf.p") || // Added in 4.0
181 Name
.startswith("avx512.mask.vpermil.p") || // Added in 3.9
182 Name
.startswith("avx512.mask.perm.df.") || // Added in 3.9
183 Name
.startswith("avx512.mask.perm.di.") || // Added in 3.9
184 Name
.startswith("avx512.mask.punpckl") || // Added in 3.9
185 Name
.startswith("avx512.mask.punpckh") || // Added in 3.9
186 Name
.startswith("avx512.mask.unpckl.") || // Added in 3.9
187 Name
.startswith("avx512.mask.unpckh.") || // Added in 3.9
188 Name
.startswith("avx512.mask.pand.") || // Added in 3.9
189 Name
.startswith("avx512.mask.pandn.") || // Added in 3.9
190 Name
.startswith("avx512.mask.por.") || // Added in 3.9
191 Name
.startswith("avx512.mask.pxor.") || // Added in 3.9
192 Name
.startswith("avx512.mask.and.") || // Added in 3.9
193 Name
.startswith("avx512.mask.andn.") || // Added in 3.9
194 Name
.startswith("avx512.mask.or.") || // Added in 3.9
195 Name
.startswith("avx512.mask.xor.") || // Added in 3.9
196 Name
.startswith("avx512.mask.padd.") || // Added in 4.0
197 Name
.startswith("avx512.mask.psub.") || // Added in 4.0
198 Name
.startswith("avx512.mask.pmull.") || // Added in 4.0
199 Name
.startswith("avx512.mask.cvtdq2pd.") || // Added in 4.0
200 Name
.startswith("avx512.mask.cvtudq2pd.") || // Added in 4.0
201 Name
.startswith("avx512.mask.cvtudq2ps.") || // Added in 7.0 updated 9.0
202 Name
.startswith("avx512.mask.cvtqq2pd.") || // Added in 7.0 updated 9.0
203 Name
.startswith("avx512.mask.cvtuqq2pd.") || // Added in 7.0 updated 9.0
204 Name
.startswith("avx512.mask.cvtdq2ps.") || // Added in 7.0 updated 9.0
205 Name
== "avx512.mask.cvtqq2ps.256" || // Added in 9.0
206 Name
== "avx512.mask.cvtqq2ps.512" || // Added in 9.0
207 Name
== "avx512.mask.cvtuqq2ps.256" || // Added in 9.0
208 Name
== "avx512.mask.cvtuqq2ps.512" || // Added in 9.0
209 Name
== "avx512.mask.cvtpd2dq.256" || // Added in 7.0
210 Name
== "avx512.mask.cvtpd2ps.256" || // Added in 7.0
211 Name
== "avx512.mask.cvttpd2dq.256" || // Added in 7.0
212 Name
== "avx512.mask.cvttps2dq.128" || // Added in 7.0
213 Name
== "avx512.mask.cvttps2dq.256" || // Added in 7.0
214 Name
== "avx512.mask.cvtps2pd.128" || // Added in 7.0
215 Name
== "avx512.mask.cvtps2pd.256" || // Added in 7.0
216 Name
== "avx512.cvtusi2sd" || // Added in 7.0
217 Name
.startswith("avx512.mask.permvar.") || // Added in 7.0
218 Name
== "sse2.pmulu.dq" || // Added in 7.0
219 Name
== "sse41.pmuldq" || // Added in 7.0
220 Name
== "avx2.pmulu.dq" || // Added in 7.0
221 Name
== "avx2.pmul.dq" || // Added in 7.0
222 Name
== "avx512.pmulu.dq.512" || // Added in 7.0
223 Name
== "avx512.pmul.dq.512" || // Added in 7.0
224 Name
.startswith("avx512.mask.pmul.dq.") || // Added in 4.0
225 Name
.startswith("avx512.mask.pmulu.dq.") || // Added in 4.0
226 Name
.startswith("avx512.mask.pmul.hr.sw.") || // Added in 7.0
227 Name
.startswith("avx512.mask.pmulh.w.") || // Added in 7.0
228 Name
.startswith("avx512.mask.pmulhu.w.") || // Added in 7.0
229 Name
.startswith("avx512.mask.pmaddw.d.") || // Added in 7.0
230 Name
.startswith("avx512.mask.pmaddubs.w.") || // Added in 7.0
231 Name
.startswith("avx512.mask.packsswb.") || // Added in 5.0
232 Name
.startswith("avx512.mask.packssdw.") || // Added in 5.0
233 Name
.startswith("avx512.mask.packuswb.") || // Added in 5.0
234 Name
.startswith("avx512.mask.packusdw.") || // Added in 5.0
235 Name
.startswith("avx512.mask.cmp.b") || // Added in 5.0
236 Name
.startswith("avx512.mask.cmp.d") || // Added in 5.0
237 Name
.startswith("avx512.mask.cmp.q") || // Added in 5.0
238 Name
.startswith("avx512.mask.cmp.w") || // Added in 5.0
239 Name
.startswith("avx512.mask.cmp.p") || // Added in 7.0
240 Name
.startswith("avx512.mask.ucmp.") || // Added in 5.0
241 Name
.startswith("avx512.cvtb2mask.") || // Added in 7.0
242 Name
.startswith("avx512.cvtw2mask.") || // Added in 7.0
243 Name
.startswith("avx512.cvtd2mask.") || // Added in 7.0
244 Name
.startswith("avx512.cvtq2mask.") || // Added in 7.0
245 Name
.startswith("avx512.mask.vpermilvar.") || // Added in 4.0
246 Name
.startswith("avx512.mask.psll.d") || // Added in 4.0
247 Name
.startswith("avx512.mask.psll.q") || // Added in 4.0
248 Name
.startswith("avx512.mask.psll.w") || // Added in 4.0
249 Name
.startswith("avx512.mask.psra.d") || // Added in 4.0
250 Name
.startswith("avx512.mask.psra.q") || // Added in 4.0
251 Name
.startswith("avx512.mask.psra.w") || // Added in 4.0
252 Name
.startswith("avx512.mask.psrl.d") || // Added in 4.0
253 Name
.startswith("avx512.mask.psrl.q") || // Added in 4.0
254 Name
.startswith("avx512.mask.psrl.w") || // Added in 4.0
255 Name
.startswith("avx512.mask.pslli") || // Added in 4.0
256 Name
.startswith("avx512.mask.psrai") || // Added in 4.0
257 Name
.startswith("avx512.mask.psrli") || // Added in 4.0
258 Name
.startswith("avx512.mask.psllv") || // Added in 4.0
259 Name
.startswith("avx512.mask.psrav") || // Added in 4.0
260 Name
.startswith("avx512.mask.psrlv") || // Added in 4.0
261 Name
.startswith("sse41.pmovsx") || // Added in 3.8
262 Name
.startswith("sse41.pmovzx") || // Added in 3.9
263 Name
.startswith("avx2.pmovsx") || // Added in 3.9
264 Name
.startswith("avx2.pmovzx") || // Added in 3.9
265 Name
.startswith("avx512.mask.pmovsx") || // Added in 4.0
266 Name
.startswith("avx512.mask.pmovzx") || // Added in 4.0
267 Name
.startswith("avx512.mask.lzcnt.") || // Added in 5.0
268 Name
.startswith("avx512.mask.pternlog.") || // Added in 7.0
269 Name
.startswith("avx512.maskz.pternlog.") || // Added in 7.0
270 Name
.startswith("avx512.mask.vpmadd52") || // Added in 7.0
271 Name
.startswith("avx512.maskz.vpmadd52") || // Added in 7.0
272 Name
.startswith("avx512.mask.vpermi2var.") || // Added in 7.0
273 Name
.startswith("avx512.mask.vpermt2var.") || // Added in 7.0
274 Name
.startswith("avx512.maskz.vpermt2var.") || // Added in 7.0
275 Name
.startswith("avx512.mask.vpdpbusd.") || // Added in 7.0
276 Name
.startswith("avx512.maskz.vpdpbusd.") || // Added in 7.0
277 Name
.startswith("avx512.mask.vpdpbusds.") || // Added in 7.0
278 Name
.startswith("avx512.maskz.vpdpbusds.") || // Added in 7.0
279 Name
.startswith("avx512.mask.vpdpwssd.") || // Added in 7.0
280 Name
.startswith("avx512.maskz.vpdpwssd.") || // Added in 7.0
281 Name
.startswith("avx512.mask.vpdpwssds.") || // Added in 7.0
282 Name
.startswith("avx512.maskz.vpdpwssds.") || // Added in 7.0
283 Name
.startswith("avx512.mask.dbpsadbw.") || // Added in 7.0
284 Name
.startswith("avx512.mask.vpshld.") || // Added in 7.0
285 Name
.startswith("avx512.mask.vpshrd.") || // Added in 7.0
286 Name
.startswith("avx512.mask.vpshldv.") || // Added in 8.0
287 Name
.startswith("avx512.mask.vpshrdv.") || // Added in 8.0
288 Name
.startswith("avx512.maskz.vpshldv.") || // Added in 8.0
289 Name
.startswith("avx512.maskz.vpshrdv.") || // Added in 8.0
290 Name
.startswith("avx512.vpshld.") || // Added in 8.0
291 Name
.startswith("avx512.vpshrd.") || // Added in 8.0
292 Name
.startswith("avx512.mask.add.p") || // Added in 7.0. 128/256 in 4.0
293 Name
.startswith("avx512.mask.sub.p") || // Added in 7.0. 128/256 in 4.0
294 Name
.startswith("avx512.mask.mul.p") || // Added in 7.0. 128/256 in 4.0
295 Name
.startswith("avx512.mask.div.p") || // Added in 7.0. 128/256 in 4.0
296 Name
.startswith("avx512.mask.max.p") || // Added in 7.0. 128/256 in 5.0
297 Name
.startswith("avx512.mask.min.p") || // Added in 7.0. 128/256 in 5.0
298 Name
.startswith("avx512.mask.fpclass.p") || // Added in 7.0
299 Name
.startswith("avx512.mask.vpshufbitqmb.") || // Added in 8.0
300 Name
.startswith("avx512.mask.pmultishift.qb.") || // Added in 8.0
301 Name
.startswith("avx512.mask.conflict.") || // Added in 9.0
302 Name
== "avx512.mask.pmov.qd.256" || // Added in 9.0
303 Name
== "avx512.mask.pmov.qd.512" || // Added in 9.0
304 Name
== "avx512.mask.pmov.wb.256" || // Added in 9.0
305 Name
== "avx512.mask.pmov.wb.512" || // Added in 9.0
306 Name
== "sse.cvtsi2ss" || // Added in 7.0
307 Name
== "sse.cvtsi642ss" || // Added in 7.0
308 Name
== "sse2.cvtsi2sd" || // Added in 7.0
309 Name
== "sse2.cvtsi642sd" || // Added in 7.0
310 Name
== "sse2.cvtss2sd" || // Added in 7.0
311 Name
== "sse2.cvtdq2pd" || // Added in 3.9
312 Name
== "sse2.cvtdq2ps" || // Added in 7.0
313 Name
== "sse2.cvtps2pd" || // Added in 3.9
314 Name
== "avx.cvtdq2.pd.256" || // Added in 3.9
315 Name
== "avx.cvtdq2.ps.256" || // Added in 7.0
316 Name
== "avx.cvt.ps2.pd.256" || // Added in 3.9
317 Name
.startswith("avx.vinsertf128.") || // Added in 3.7
318 Name
== "avx2.vinserti128" || // Added in 3.7
319 Name
.startswith("avx512.mask.insert") || // Added in 4.0
320 Name
.startswith("avx.vextractf128.") || // Added in 3.7
321 Name
== "avx2.vextracti128" || // Added in 3.7
322 Name
.startswith("avx512.mask.vextract") || // Added in 4.0
323 Name
.startswith("sse4a.movnt.") || // Added in 3.9
324 Name
.startswith("avx.movnt.") || // Added in 3.2
325 Name
.startswith("avx512.storent.") || // Added in 3.9
326 Name
== "sse41.movntdqa" || // Added in 5.0
327 Name
== "avx2.movntdqa" || // Added in 5.0
328 Name
== "avx512.movntdqa" || // Added in 5.0
329 Name
== "sse2.storel.dq" || // Added in 3.9
330 Name
.startswith("sse.storeu.") || // Added in 3.9
331 Name
.startswith("sse2.storeu.") || // Added in 3.9
332 Name
.startswith("avx.storeu.") || // Added in 3.9
333 Name
.startswith("avx512.mask.storeu.") || // Added in 3.9
334 Name
.startswith("avx512.mask.store.p") || // Added in 3.9
335 Name
.startswith("avx512.mask.store.b.") || // Added in 3.9
336 Name
.startswith("avx512.mask.store.w.") || // Added in 3.9
337 Name
.startswith("avx512.mask.store.d.") || // Added in 3.9
338 Name
.startswith("avx512.mask.store.q.") || // Added in 3.9
339 Name
== "avx512.mask.store.ss" || // Added in 7.0
340 Name
.startswith("avx512.mask.loadu.") || // Added in 3.9
341 Name
.startswith("avx512.mask.load.") || // Added in 3.9
342 Name
.startswith("avx512.mask.expand.load.") || // Added in 7.0
343 Name
.startswith("avx512.mask.compress.store.") || // Added in 7.0
344 Name
.startswith("avx512.mask.expand.b") || // Added in 9.0
345 Name
.startswith("avx512.mask.expand.w") || // Added in 9.0
346 Name
.startswith("avx512.mask.expand.d") || // Added in 9.0
347 Name
.startswith("avx512.mask.expand.q") || // Added in 9.0
348 Name
.startswith("avx512.mask.expand.p") || // Added in 9.0
349 Name
.startswith("avx512.mask.compress.b") || // Added in 9.0
350 Name
.startswith("avx512.mask.compress.w") || // Added in 9.0
351 Name
.startswith("avx512.mask.compress.d") || // Added in 9.0
352 Name
.startswith("avx512.mask.compress.q") || // Added in 9.0
353 Name
.startswith("avx512.mask.compress.p") || // Added in 9.0
354 Name
== "sse42.crc32.64.8" || // Added in 3.4
355 Name
.startswith("avx.vbroadcast.s") || // Added in 3.5
356 Name
.startswith("avx512.vbroadcast.s") || // Added in 7.0
357 Name
.startswith("avx512.mask.palignr.") || // Added in 3.9
358 Name
.startswith("avx512.mask.valign.") || // Added in 4.0
359 Name
.startswith("sse2.psll.dq") || // Added in 3.7
360 Name
.startswith("sse2.psrl.dq") || // Added in 3.7
361 Name
.startswith("avx2.psll.dq") || // Added in 3.7
362 Name
.startswith("avx2.psrl.dq") || // Added in 3.7
363 Name
.startswith("avx512.psll.dq") || // Added in 3.9
364 Name
.startswith("avx512.psrl.dq") || // Added in 3.9
365 Name
== "sse41.pblendw" || // Added in 3.7
366 Name
.startswith("sse41.blendp") || // Added in 3.7
367 Name
.startswith("avx.blend.p") || // Added in 3.7
368 Name
== "avx2.pblendw" || // Added in 3.7
369 Name
.startswith("avx2.pblendd.") || // Added in 3.7
370 Name
.startswith("avx.vbroadcastf128") || // Added in 4.0
371 Name
== "avx2.vbroadcasti128" || // Added in 3.7
372 Name
.startswith("avx512.mask.broadcastf") || // Added in 6.0
373 Name
.startswith("avx512.mask.broadcasti") || // Added in 6.0
374 Name
== "xop.vpcmov" || // Added in 3.8
375 Name
== "xop.vpcmov.256" || // Added in 5.0
376 Name
.startswith("avx512.mask.move.s") || // Added in 4.0
377 Name
.startswith("avx512.cvtmask2") || // Added in 5.0
378 Name
.startswith("xop.vpcom") || // Added in 3.2, Updated in 9.0
379 Name
.startswith("xop.vprot") || // Added in 8.0
380 Name
.startswith("avx512.prol") || // Added in 8.0
381 Name
.startswith("avx512.pror") || // Added in 8.0
382 Name
.startswith("avx512.mask.prorv.") || // Added in 8.0
383 Name
.startswith("avx512.mask.pror.") || // Added in 8.0
384 Name
.startswith("avx512.mask.prolv.") || // Added in 8.0
385 Name
.startswith("avx512.mask.prol.") || // Added in 8.0
386 Name
.startswith("avx512.ptestm") || //Added in 6.0
387 Name
.startswith("avx512.ptestnm") || //Added in 6.0
388 Name
.startswith("avx512.mask.pavg")) // Added in 6.0
394 static bool UpgradeX86IntrinsicFunction(Function
*F
, StringRef Name
,
396 // Only handle intrinsics that start with "x86.".
397 if (!Name
.startswith("x86."))
399 // Remove "x86." prefix.
400 Name
= Name
.substr(4);
402 if (ShouldUpgradeX86Intrinsic(F
, Name
)) {
407 if (Name
== "rdtscp") { // Added in 8.0
408 // If this intrinsic has 0 operands, it's the new version.
409 if (F
->getFunctionType()->getNumParams() == 0)
413 NewFn
= Intrinsic::getDeclaration(F
->getParent(),
414 Intrinsic::x86_rdtscp
);
418 // SSE4.1 ptest functions may have an old signature.
419 if (Name
.startswith("sse41.ptest")) { // Added in 3.2
420 if (Name
.substr(11) == "c")
421 return UpgradePTESTIntrinsic(F
, Intrinsic::x86_sse41_ptestc
, NewFn
);
422 if (Name
.substr(11) == "z")
423 return UpgradePTESTIntrinsic(F
, Intrinsic::x86_sse41_ptestz
, NewFn
);
424 if (Name
.substr(11) == "nzc")
425 return UpgradePTESTIntrinsic(F
, Intrinsic::x86_sse41_ptestnzc
, NewFn
);
427 // Several blend and other instructions with masks used the wrong number of
429 if (Name
== "sse41.insertps") // Added in 3.6
430 return UpgradeX86IntrinsicsWith8BitMask(F
, Intrinsic::x86_sse41_insertps
,
432 if (Name
== "sse41.dppd") // Added in 3.6
433 return UpgradeX86IntrinsicsWith8BitMask(F
, Intrinsic::x86_sse41_dppd
,
435 if (Name
== "sse41.dpps") // Added in 3.6
436 return UpgradeX86IntrinsicsWith8BitMask(F
, Intrinsic::x86_sse41_dpps
,
438 if (Name
== "sse41.mpsadbw") // Added in 3.6
439 return UpgradeX86IntrinsicsWith8BitMask(F
, Intrinsic::x86_sse41_mpsadbw
,
441 if (Name
== "avx.dp.ps.256") // Added in 3.6
442 return UpgradeX86IntrinsicsWith8BitMask(F
, Intrinsic::x86_avx_dp_ps_256
,
444 if (Name
== "avx2.mpsadbw") // Added in 3.6
445 return UpgradeX86IntrinsicsWith8BitMask(F
, Intrinsic::x86_avx2_mpsadbw
,
448 // frcz.ss/sd may need to have an argument dropped. Added in 3.2
449 if (Name
.startswith("xop.vfrcz.ss") && F
->arg_size() == 2) {
451 NewFn
= Intrinsic::getDeclaration(F
->getParent(),
452 Intrinsic::x86_xop_vfrcz_ss
);
455 if (Name
.startswith("xop.vfrcz.sd") && F
->arg_size() == 2) {
457 NewFn
= Intrinsic::getDeclaration(F
->getParent(),
458 Intrinsic::x86_xop_vfrcz_sd
);
461 // Upgrade any XOP PERMIL2 index operand still using a float/double vector.
462 if (Name
.startswith("xop.vpermil2")) { // Added in 3.9
463 auto Idx
= F
->getFunctionType()->getParamType(2);
464 if (Idx
->isFPOrFPVectorTy()) {
466 unsigned IdxSize
= Idx
->getPrimitiveSizeInBits();
467 unsigned EltSize
= Idx
->getScalarSizeInBits();
468 Intrinsic::ID Permil2ID
;
469 if (EltSize
== 64 && IdxSize
== 128)
470 Permil2ID
= Intrinsic::x86_xop_vpermil2pd
;
471 else if (EltSize
== 32 && IdxSize
== 128)
472 Permil2ID
= Intrinsic::x86_xop_vpermil2ps
;
473 else if (EltSize
== 64 && IdxSize
== 256)
474 Permil2ID
= Intrinsic::x86_xop_vpermil2pd_256
;
476 Permil2ID
= Intrinsic::x86_xop_vpermil2ps_256
;
477 NewFn
= Intrinsic::getDeclaration(F
->getParent(), Permil2ID
);
482 if (Name
== "seh.recoverfp") {
483 NewFn
= Intrinsic::getDeclaration(F
->getParent(), Intrinsic::eh_recoverfp
);
490 static bool UpgradeIntrinsicFunction1(Function
*F
, Function
*&NewFn
) {
491 assert(F
&& "Illegal to upgrade a non-existent Function.");
493 // Upgrade intrinsics "clang.arc.use" which doesn't start with "llvm.".
494 if (F
->getName() == "clang.arc.use") {
499 // Quickly eliminate it, if it's not a candidate.
500 StringRef Name
= F
->getName();
501 if (Name
.size() <= 8 || !Name
.startswith("llvm."))
503 Name
= Name
.substr(5); // Strip off "llvm."
508 if (Name
.startswith("arm.rbit") || Name
.startswith("aarch64.rbit")) {
509 NewFn
= Intrinsic::getDeclaration(F
->getParent(), Intrinsic::bitreverse
,
510 F
->arg_begin()->getType());
513 if (Name
.startswith("arm.neon.vclz")) {
515 F
->arg_begin()->getType(),
516 Type::getInt1Ty(F
->getContext())
518 // Can't use Intrinsic::getDeclaration here as it adds a ".i1" to
519 // the end of the name. Change name from llvm.arm.neon.vclz.* to
521 FunctionType
* fType
= FunctionType::get(F
->getReturnType(), args
, false);
522 NewFn
= Function::Create(fType
, F
->getLinkage(), F
->getAddressSpace(),
523 "llvm.ctlz." + Name
.substr(14), F
->getParent());
526 if (Name
.startswith("arm.neon.vcnt")) {
527 NewFn
= Intrinsic::getDeclaration(F
->getParent(), Intrinsic::ctpop
,
528 F
->arg_begin()->getType());
531 Regex
vldRegex("^arm\\.neon\\.vld([1234]|[234]lane)\\.v[a-z0-9]*$");
532 if (vldRegex
.match(Name
)) {
533 auto fArgs
= F
->getFunctionType()->params();
534 SmallVector
<Type
*, 4> Tys(fArgs
.begin(), fArgs
.end());
535 // Can't use Intrinsic::getDeclaration here as the return types might
536 // then only be structurally equal.
537 FunctionType
* fType
= FunctionType::get(F
->getReturnType(), Tys
, false);
538 NewFn
= Function::Create(fType
, F
->getLinkage(), F
->getAddressSpace(),
539 "llvm." + Name
+ ".p0i8", F
->getParent());
542 Regex
vstRegex("^arm\\.neon\\.vst([1234]|[234]lane)\\.v[a-z0-9]*$");
543 if (vstRegex
.match(Name
)) {
544 static const Intrinsic::ID StoreInts
[] = {Intrinsic::arm_neon_vst1
,
545 Intrinsic::arm_neon_vst2
,
546 Intrinsic::arm_neon_vst3
,
547 Intrinsic::arm_neon_vst4
};
549 static const Intrinsic::ID StoreLaneInts
[] = {
550 Intrinsic::arm_neon_vst2lane
, Intrinsic::arm_neon_vst3lane
,
551 Intrinsic::arm_neon_vst4lane
554 auto fArgs
= F
->getFunctionType()->params();
555 Type
*Tys
[] = {fArgs
[0], fArgs
[1]};
556 if (Name
.find("lane") == StringRef::npos
)
557 NewFn
= Intrinsic::getDeclaration(F
->getParent(),
558 StoreInts
[fArgs
.size() - 3], Tys
);
560 NewFn
= Intrinsic::getDeclaration(F
->getParent(),
561 StoreLaneInts
[fArgs
.size() - 5], Tys
);
564 if (Name
== "aarch64.thread.pointer" || Name
== "arm.thread.pointer") {
565 NewFn
= Intrinsic::getDeclaration(F
->getParent(), Intrinsic::thread_pointer
);
568 if (Name
.startswith("aarch64.neon.addp")) {
569 if (F
->arg_size() != 2)
570 break; // Invalid IR.
571 auto fArgs
= F
->getFunctionType()->params();
572 VectorType
*ArgTy
= dyn_cast
<VectorType
>(fArgs
[0]);
573 if (ArgTy
&& ArgTy
->getElementType()->isFloatingPointTy()) {
574 NewFn
= Intrinsic::getDeclaration(F
->getParent(),
575 Intrinsic::aarch64_neon_faddp
, fArgs
);
583 if (Name
.startswith("ctlz.") && F
->arg_size() == 1) {
585 NewFn
= Intrinsic::getDeclaration(F
->getParent(), Intrinsic::ctlz
,
586 F
->arg_begin()->getType());
589 if (Name
.startswith("cttz.") && F
->arg_size() == 1) {
591 NewFn
= Intrinsic::getDeclaration(F
->getParent(), Intrinsic::cttz
,
592 F
->arg_begin()->getType());
598 if (Name
== "dbg.value" && F
->arg_size() == 4) {
600 NewFn
= Intrinsic::getDeclaration(F
->getParent(), Intrinsic::dbg_value
);
607 bool IsLifetimeStart
= Name
.startswith("lifetime.start");
608 if (IsLifetimeStart
|| Name
.startswith("invariant.start")) {
609 Intrinsic::ID ID
= IsLifetimeStart
?
610 Intrinsic::lifetime_start
: Intrinsic::invariant_start
;
611 auto Args
= F
->getFunctionType()->params();
612 Type
* ObjectPtr
[1] = {Args
[1]};
613 if (F
->getName() != Intrinsic::getName(ID
, ObjectPtr
)) {
615 NewFn
= Intrinsic::getDeclaration(F
->getParent(), ID
, ObjectPtr
);
620 bool IsLifetimeEnd
= Name
.startswith("lifetime.end");
621 if (IsLifetimeEnd
|| Name
.startswith("invariant.end")) {
622 Intrinsic::ID ID
= IsLifetimeEnd
?
623 Intrinsic::lifetime_end
: Intrinsic::invariant_end
;
625 auto Args
= F
->getFunctionType()->params();
626 Type
* ObjectPtr
[1] = {Args
[IsLifetimeEnd
? 1 : 2]};
627 if (F
->getName() != Intrinsic::getName(ID
, ObjectPtr
)) {
629 NewFn
= Intrinsic::getDeclaration(F
->getParent(), ID
, ObjectPtr
);
633 if (Name
.startswith("invariant.group.barrier")) {
634 // Rename invariant.group.barrier to launder.invariant.group
635 auto Args
= F
->getFunctionType()->params();
636 Type
* ObjectPtr
[1] = {Args
[0]};
638 NewFn
= Intrinsic::getDeclaration(F
->getParent(),
639 Intrinsic::launder_invariant_group
, ObjectPtr
);
647 if (Name
.startswith("masked.load.")) {
648 Type
*Tys
[] = { F
->getReturnType(), F
->arg_begin()->getType() };
649 if (F
->getName() != Intrinsic::getName(Intrinsic::masked_load
, Tys
)) {
651 NewFn
= Intrinsic::getDeclaration(F
->getParent(),
652 Intrinsic::masked_load
,
657 if (Name
.startswith("masked.store.")) {
658 auto Args
= F
->getFunctionType()->params();
659 Type
*Tys
[] = { Args
[0], Args
[1] };
660 if (F
->getName() != Intrinsic::getName(Intrinsic::masked_store
, Tys
)) {
662 NewFn
= Intrinsic::getDeclaration(F
->getParent(),
663 Intrinsic::masked_store
,
668 // Renaming gather/scatter intrinsics with no address space overloading
669 // to the new overload which includes an address space
670 if (Name
.startswith("masked.gather.")) {
671 Type
*Tys
[] = {F
->getReturnType(), F
->arg_begin()->getType()};
672 if (F
->getName() != Intrinsic::getName(Intrinsic::masked_gather
, Tys
)) {
674 NewFn
= Intrinsic::getDeclaration(F
->getParent(),
675 Intrinsic::masked_gather
, Tys
);
679 if (Name
.startswith("masked.scatter.")) {
680 auto Args
= F
->getFunctionType()->params();
681 Type
*Tys
[] = {Args
[0], Args
[1]};
682 if (F
->getName() != Intrinsic::getName(Intrinsic::masked_scatter
, Tys
)) {
684 NewFn
= Intrinsic::getDeclaration(F
->getParent(),
685 Intrinsic::masked_scatter
, Tys
);
689 // Updating the memory intrinsics (memcpy/memmove/memset) that have an
690 // alignment parameter to embedding the alignment as an attribute of
692 if (Name
.startswith("memcpy.") && F
->arg_size() == 5) {
694 // Get the types of dest, src, and len
695 ArrayRef
<Type
*> ParamTypes
= F
->getFunctionType()->params().slice(0, 3);
696 NewFn
= Intrinsic::getDeclaration(F
->getParent(), Intrinsic::memcpy
,
700 if (Name
.startswith("memmove.") && F
->arg_size() == 5) {
702 // Get the types of dest, src, and len
703 ArrayRef
<Type
*> ParamTypes
= F
->getFunctionType()->params().slice(0, 3);
704 NewFn
= Intrinsic::getDeclaration(F
->getParent(), Intrinsic::memmove
,
708 if (Name
.startswith("memset.") && F
->arg_size() == 5) {
710 // Get the types of dest, and len
711 const auto *FT
= F
->getFunctionType();
712 Type
*ParamTypes
[2] = {
713 FT
->getParamType(0), // Dest
714 FT
->getParamType(2) // len
716 NewFn
= Intrinsic::getDeclaration(F
->getParent(), Intrinsic::memset
,
723 if (Name
.startswith("nvvm.")) {
724 Name
= Name
.substr(5);
726 // The following nvvm intrinsics correspond exactly to an LLVM intrinsic.
727 Intrinsic::ID IID
= StringSwitch
<Intrinsic::ID
>(Name
)
728 .Cases("brev32", "brev64", Intrinsic::bitreverse
)
729 .Case("clz.i", Intrinsic::ctlz
)
730 .Case("popc.i", Intrinsic::ctpop
)
731 .Default(Intrinsic::not_intrinsic
);
732 if (IID
!= Intrinsic::not_intrinsic
&& F
->arg_size() == 1) {
733 NewFn
= Intrinsic::getDeclaration(F
->getParent(), IID
,
734 {F
->getReturnType()});
738 // The following nvvm intrinsics correspond exactly to an LLVM idiom, but
739 // not to an intrinsic alone. We expand them in UpgradeIntrinsicCall.
741 // TODO: We could add lohi.i2d.
742 bool Expand
= StringSwitch
<bool>(Name
)
743 .Cases("abs.i", "abs.ll", true)
744 .Cases("clz.ll", "popc.ll", "h2f", true)
745 .Cases("max.i", "max.ll", "max.ui", "max.ull", true)
746 .Cases("min.i", "min.ll", "min.ui", "min.ull", true)
756 // We only need to change the name to match the mangling including the
758 if (Name
.startswith("objectsize.")) {
759 Type
*Tys
[2] = { F
->getReturnType(), F
->arg_begin()->getType() };
760 if (F
->arg_size() == 2 || F
->arg_size() == 3 ||
761 F
->getName() != Intrinsic::getName(Intrinsic::objectsize
, Tys
)) {
763 NewFn
= Intrinsic::getDeclaration(F
->getParent(), Intrinsic::objectsize
,
771 if (Name
== "stackprotectorcheck") {
778 if (UpgradeX86IntrinsicFunction(F
, Name
, NewFn
))
781 // Remangle our intrinsic since we upgrade the mangling
782 auto Result
= llvm::Intrinsic::remangleIntrinsicFunction(F
);
783 if (Result
!= None
) {
784 NewFn
= Result
.getValue();
788 // This may not belong here. This function is effectively being overloaded
789 // to both detect an intrinsic which needs upgrading, and to provide the
790 // upgraded form of the intrinsic. We should perhaps have two separate
791 // functions for this.
795 bool llvm::UpgradeIntrinsicFunction(Function
*F
, Function
*&NewFn
) {
797 bool Upgraded
= UpgradeIntrinsicFunction1(F
, NewFn
);
798 assert(F
!= NewFn
&& "Intrinsic function upgraded to the same function");
800 // Upgrade intrinsic attributes. This does not change the function.
803 if (Intrinsic::ID id
= F
->getIntrinsicID())
804 F
->setAttributes(Intrinsic::getAttributes(F
->getContext(), id
));
808 bool llvm::UpgradeGlobalVariable(GlobalVariable
*GV
) {
809 // Nothing to do yet.
813 // Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them
815 static Value
*UpgradeX86PSLLDQIntrinsics(IRBuilder
<> &Builder
,
816 Value
*Op
, unsigned Shift
) {
817 Type
*ResultTy
= Op
->getType();
818 unsigned NumElts
= ResultTy
->getVectorNumElements() * 8;
820 // Bitcast from a 64-bit element type to a byte element type.
821 Type
*VecTy
= VectorType::get(Builder
.getInt8Ty(), NumElts
);
822 Op
= Builder
.CreateBitCast(Op
, VecTy
, "cast");
824 // We'll be shuffling in zeroes.
825 Value
*Res
= Constant::getNullValue(VecTy
);
827 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
828 // we'll just return the zero vector.
831 // 256/512-bit version is split into 2/4 16-byte lanes.
832 for (unsigned l
= 0; l
!= NumElts
; l
+= 16)
833 for (unsigned i
= 0; i
!= 16; ++i
) {
834 unsigned Idx
= NumElts
+ i
- Shift
;
836 Idx
-= NumElts
- 16; // end of lane, switch operand.
837 Idxs
[l
+ i
] = Idx
+ l
;
840 Res
= Builder
.CreateShuffleVector(Res
, Op
, makeArrayRef(Idxs
, NumElts
));
843 // Bitcast back to a 64-bit element type.
844 return Builder
.CreateBitCast(Res
, ResultTy
, "cast");
847 // Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them
849 static Value
*UpgradeX86PSRLDQIntrinsics(IRBuilder
<> &Builder
, Value
*Op
,
851 Type
*ResultTy
= Op
->getType();
852 unsigned NumElts
= ResultTy
->getVectorNumElements() * 8;
854 // Bitcast from a 64-bit element type to a byte element type.
855 Type
*VecTy
= VectorType::get(Builder
.getInt8Ty(), NumElts
);
856 Op
= Builder
.CreateBitCast(Op
, VecTy
, "cast");
858 // We'll be shuffling in zeroes.
859 Value
*Res
= Constant::getNullValue(VecTy
);
861 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
862 // we'll just return the zero vector.
865 // 256/512-bit version is split into 2/4 16-byte lanes.
866 for (unsigned l
= 0; l
!= NumElts
; l
+= 16)
867 for (unsigned i
= 0; i
!= 16; ++i
) {
868 unsigned Idx
= i
+ Shift
;
870 Idx
+= NumElts
- 16; // end of lane, switch operand.
871 Idxs
[l
+ i
] = Idx
+ l
;
874 Res
= Builder
.CreateShuffleVector(Op
, Res
, makeArrayRef(Idxs
, NumElts
));
877 // Bitcast back to a 64-bit element type.
878 return Builder
.CreateBitCast(Res
, ResultTy
, "cast");
881 static Value
*getX86MaskVec(IRBuilder
<> &Builder
, Value
*Mask
,
883 llvm::VectorType
*MaskTy
= llvm::VectorType::get(Builder
.getInt1Ty(),
884 cast
<IntegerType
>(Mask
->getType())->getBitWidth());
885 Mask
= Builder
.CreateBitCast(Mask
, MaskTy
);
887 // If we have less than 8 elements, then the starting mask was an i8 and
888 // we need to extract down to the right number of elements.
891 for (unsigned i
= 0; i
!= NumElts
; ++i
)
893 Mask
= Builder
.CreateShuffleVector(Mask
, Mask
,
894 makeArrayRef(Indices
, NumElts
),
901 static Value
*EmitX86Select(IRBuilder
<> &Builder
, Value
*Mask
,
902 Value
*Op0
, Value
*Op1
) {
903 // If the mask is all ones just emit the first operation.
904 if (const auto *C
= dyn_cast
<Constant
>(Mask
))
905 if (C
->isAllOnesValue())
908 Mask
= getX86MaskVec(Builder
, Mask
, Op0
->getType()->getVectorNumElements());
909 return Builder
.CreateSelect(Mask
, Op0
, Op1
);
912 static Value
*EmitX86ScalarSelect(IRBuilder
<> &Builder
, Value
*Mask
,
913 Value
*Op0
, Value
*Op1
) {
914 // If the mask is all ones just emit the first operation.
915 if (const auto *C
= dyn_cast
<Constant
>(Mask
))
916 if (C
->isAllOnesValue())
919 llvm::VectorType
*MaskTy
=
920 llvm::VectorType::get(Builder
.getInt1Ty(),
921 Mask
->getType()->getIntegerBitWidth());
922 Mask
= Builder
.CreateBitCast(Mask
, MaskTy
);
923 Mask
= Builder
.CreateExtractElement(Mask
, (uint64_t)0);
924 return Builder
.CreateSelect(Mask
, Op0
, Op1
);
927 // Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics.
928 // PALIGNR handles large immediates by shifting while VALIGN masks the immediate
929 // so we need to handle both cases. VALIGN also doesn't have 128-bit lanes.
930 static Value
*UpgradeX86ALIGNIntrinsics(IRBuilder
<> &Builder
, Value
*Op0
,
931 Value
*Op1
, Value
*Shift
,
932 Value
*Passthru
, Value
*Mask
,
934 unsigned ShiftVal
= cast
<llvm::ConstantInt
>(Shift
)->getZExtValue();
936 unsigned NumElts
= Op0
->getType()->getVectorNumElements();
937 assert((IsVALIGN
|| NumElts
% 16 == 0) && "Illegal NumElts for PALIGNR!");
938 assert((!IsVALIGN
|| NumElts
<= 16) && "NumElts too large for VALIGN!");
939 assert(isPowerOf2_32(NumElts
) && "NumElts not a power of 2!");
941 // Mask the immediate for VALIGN.
943 ShiftVal
&= (NumElts
- 1);
945 // If palignr is shifting the pair of vectors more than the size of two
948 return llvm::Constant::getNullValue(Op0
->getType());
950 // If palignr is shifting the pair of input vectors more than one lane,
951 // but less than two lanes, convert to shifting in zeroes.
955 Op0
= llvm::Constant::getNullValue(Op0
->getType());
958 uint32_t Indices
[64];
959 // 256-bit palignr operates on 128-bit lanes so we need to handle that
960 for (unsigned l
= 0; l
< NumElts
; l
+= 16) {
961 for (unsigned i
= 0; i
!= 16; ++i
) {
962 unsigned Idx
= ShiftVal
+ i
;
963 if (!IsVALIGN
&& Idx
>= 16) // Disable wrap for VALIGN.
964 Idx
+= NumElts
- 16; // End of lane, switch operand.
965 Indices
[l
+ i
] = Idx
+ l
;
969 Value
*Align
= Builder
.CreateShuffleVector(Op1
, Op0
,
970 makeArrayRef(Indices
, NumElts
),
973 return EmitX86Select(Builder
, Mask
, Align
, Passthru
);
976 static Value
*UpgradeX86VPERMT2Intrinsics(IRBuilder
<> &Builder
, CallInst
&CI
,
977 bool ZeroMask
, bool IndexForm
) {
978 Type
*Ty
= CI
.getType();
979 unsigned VecWidth
= Ty
->getPrimitiveSizeInBits();
980 unsigned EltWidth
= Ty
->getScalarSizeInBits();
981 bool IsFloat
= Ty
->isFPOrFPVectorTy();
983 if (VecWidth
== 128 && EltWidth
== 32 && IsFloat
)
984 IID
= Intrinsic::x86_avx512_vpermi2var_ps_128
;
985 else if (VecWidth
== 128 && EltWidth
== 32 && !IsFloat
)
986 IID
= Intrinsic::x86_avx512_vpermi2var_d_128
;
987 else if (VecWidth
== 128 && EltWidth
== 64 && IsFloat
)
988 IID
= Intrinsic::x86_avx512_vpermi2var_pd_128
;
989 else if (VecWidth
== 128 && EltWidth
== 64 && !IsFloat
)
990 IID
= Intrinsic::x86_avx512_vpermi2var_q_128
;
991 else if (VecWidth
== 256 && EltWidth
== 32 && IsFloat
)
992 IID
= Intrinsic::x86_avx512_vpermi2var_ps_256
;
993 else if (VecWidth
== 256 && EltWidth
== 32 && !IsFloat
)
994 IID
= Intrinsic::x86_avx512_vpermi2var_d_256
;
995 else if (VecWidth
== 256 && EltWidth
== 64 && IsFloat
)
996 IID
= Intrinsic::x86_avx512_vpermi2var_pd_256
;
997 else if (VecWidth
== 256 && EltWidth
== 64 && !IsFloat
)
998 IID
= Intrinsic::x86_avx512_vpermi2var_q_256
;
999 else if (VecWidth
== 512 && EltWidth
== 32 && IsFloat
)
1000 IID
= Intrinsic::x86_avx512_vpermi2var_ps_512
;
1001 else if (VecWidth
== 512 && EltWidth
== 32 && !IsFloat
)
1002 IID
= Intrinsic::x86_avx512_vpermi2var_d_512
;
1003 else if (VecWidth
== 512 && EltWidth
== 64 && IsFloat
)
1004 IID
= Intrinsic::x86_avx512_vpermi2var_pd_512
;
1005 else if (VecWidth
== 512 && EltWidth
== 64 && !IsFloat
)
1006 IID
= Intrinsic::x86_avx512_vpermi2var_q_512
;
1007 else if (VecWidth
== 128 && EltWidth
== 16)
1008 IID
= Intrinsic::x86_avx512_vpermi2var_hi_128
;
1009 else if (VecWidth
== 256 && EltWidth
== 16)
1010 IID
= Intrinsic::x86_avx512_vpermi2var_hi_256
;
1011 else if (VecWidth
== 512 && EltWidth
== 16)
1012 IID
= Intrinsic::x86_avx512_vpermi2var_hi_512
;
1013 else if (VecWidth
== 128 && EltWidth
== 8)
1014 IID
= Intrinsic::x86_avx512_vpermi2var_qi_128
;
1015 else if (VecWidth
== 256 && EltWidth
== 8)
1016 IID
= Intrinsic::x86_avx512_vpermi2var_qi_256
;
1017 else if (VecWidth
== 512 && EltWidth
== 8)
1018 IID
= Intrinsic::x86_avx512_vpermi2var_qi_512
;
1020 llvm_unreachable("Unexpected intrinsic");
1022 Value
*Args
[] = { CI
.getArgOperand(0) , CI
.getArgOperand(1),
1023 CI
.getArgOperand(2) };
1025 // If this isn't index form we need to swap operand 0 and 1.
1027 std::swap(Args
[0], Args
[1]);
1029 Value
*V
= Builder
.CreateCall(Intrinsic::getDeclaration(CI
.getModule(), IID
),
1031 Value
*PassThru
= ZeroMask
? ConstantAggregateZero::get(Ty
)
1032 : Builder
.CreateBitCast(CI
.getArgOperand(1),
1034 return EmitX86Select(Builder
, CI
.getArgOperand(3), V
, PassThru
);
1037 static Value
*UpgradeX86AddSubSatIntrinsics(IRBuilder
<> &Builder
, CallInst
&CI
,
1038 bool IsSigned
, bool IsAddition
) {
1039 Type
*Ty
= CI
.getType();
1040 Value
*Op0
= CI
.getOperand(0);
1041 Value
*Op1
= CI
.getOperand(1);
1044 IsSigned
? (IsAddition
? Intrinsic::sadd_sat
: Intrinsic::ssub_sat
)
1045 : (IsAddition
? Intrinsic::uadd_sat
: Intrinsic::usub_sat
);
1046 Function
*Intrin
= Intrinsic::getDeclaration(CI
.getModule(), IID
, Ty
);
1047 Value
*Res
= Builder
.CreateCall(Intrin
, {Op0
, Op1
});
1049 if (CI
.getNumArgOperands() == 4) { // For masked intrinsics.
1050 Value
*VecSrc
= CI
.getOperand(2);
1051 Value
*Mask
= CI
.getOperand(3);
1052 Res
= EmitX86Select(Builder
, Mask
, Res
, VecSrc
);
1057 static Value
*upgradeX86Rotate(IRBuilder
<> &Builder
, CallInst
&CI
,
1058 bool IsRotateRight
) {
1059 Type
*Ty
= CI
.getType();
1060 Value
*Src
= CI
.getArgOperand(0);
1061 Value
*Amt
= CI
.getArgOperand(1);
1063 // Amount may be scalar immediate, in which case create a splat vector.
1064 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
1065 // we only care about the lowest log2 bits anyway.
1066 if (Amt
->getType() != Ty
) {
1067 unsigned NumElts
= Ty
->getVectorNumElements();
1068 Amt
= Builder
.CreateIntCast(Amt
, Ty
->getScalarType(), false);
1069 Amt
= Builder
.CreateVectorSplat(NumElts
, Amt
);
1072 Intrinsic::ID IID
= IsRotateRight
? Intrinsic::fshr
: Intrinsic::fshl
;
1073 Function
*Intrin
= Intrinsic::getDeclaration(CI
.getModule(), IID
, Ty
);
1074 Value
*Res
= Builder
.CreateCall(Intrin
, {Src
, Src
, Amt
});
1076 if (CI
.getNumArgOperands() == 4) { // For masked intrinsics.
1077 Value
*VecSrc
= CI
.getOperand(2);
1078 Value
*Mask
= CI
.getOperand(3);
1079 Res
= EmitX86Select(Builder
, Mask
, Res
, VecSrc
);
1084 static Value
*upgradeX86vpcom(IRBuilder
<> &Builder
, CallInst
&CI
, unsigned Imm
,
1086 Type
*Ty
= CI
.getType();
1087 Value
*LHS
= CI
.getArgOperand(0);
1088 Value
*RHS
= CI
.getArgOperand(1);
1090 CmpInst::Predicate Pred
;
1093 Pred
= IsSigned
? ICmpInst::ICMP_SLT
: ICmpInst::ICMP_ULT
;
1096 Pred
= IsSigned
? ICmpInst::ICMP_SLE
: ICmpInst::ICMP_ULE
;
1099 Pred
= IsSigned
? ICmpInst::ICMP_SGT
: ICmpInst::ICMP_UGT
;
1102 Pred
= IsSigned
? ICmpInst::ICMP_SGE
: ICmpInst::ICMP_UGE
;
1105 Pred
= ICmpInst::ICMP_EQ
;
1108 Pred
= ICmpInst::ICMP_NE
;
1111 return Constant::getNullValue(Ty
); // FALSE
1113 return Constant::getAllOnesValue(Ty
); // TRUE
1115 llvm_unreachable("Unknown XOP vpcom/vpcomu predicate");
1118 Value
*Cmp
= Builder
.CreateICmp(Pred
, LHS
, RHS
);
1119 Value
*Ext
= Builder
.CreateSExt(Cmp
, Ty
);
1123 static Value
*upgradeX86ConcatShift(IRBuilder
<> &Builder
, CallInst
&CI
,
1124 bool IsShiftRight
, bool ZeroMask
) {
1125 Type
*Ty
= CI
.getType();
1126 Value
*Op0
= CI
.getArgOperand(0);
1127 Value
*Op1
= CI
.getArgOperand(1);
1128 Value
*Amt
= CI
.getArgOperand(2);
1131 std::swap(Op0
, Op1
);
1133 // Amount may be scalar immediate, in which case create a splat vector.
1134 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
1135 // we only care about the lowest log2 bits anyway.
1136 if (Amt
->getType() != Ty
) {
1137 unsigned NumElts
= Ty
->getVectorNumElements();
1138 Amt
= Builder
.CreateIntCast(Amt
, Ty
->getScalarType(), false);
1139 Amt
= Builder
.CreateVectorSplat(NumElts
, Amt
);
1142 Intrinsic::ID IID
= IsShiftRight
? Intrinsic::fshr
: Intrinsic::fshl
;
1143 Function
*Intrin
= Intrinsic::getDeclaration(CI
.getModule(), IID
, Ty
);
1144 Value
*Res
= Builder
.CreateCall(Intrin
, {Op0
, Op1
, Amt
});
1146 unsigned NumArgs
= CI
.getNumArgOperands();
1147 if (NumArgs
>= 4) { // For masked intrinsics.
1148 Value
*VecSrc
= NumArgs
== 5 ? CI
.getArgOperand(3) :
1149 ZeroMask
? ConstantAggregateZero::get(CI
.getType()) :
1150 CI
.getArgOperand(0);
1151 Value
*Mask
= CI
.getOperand(NumArgs
- 1);
1152 Res
= EmitX86Select(Builder
, Mask
, Res
, VecSrc
);
1157 static Value
*UpgradeMaskedStore(IRBuilder
<> &Builder
,
1158 Value
*Ptr
, Value
*Data
, Value
*Mask
,
1160 // Cast the pointer to the right type.
1161 Ptr
= Builder
.CreateBitCast(Ptr
,
1162 llvm::PointerType::getUnqual(Data
->getType()));
1164 Aligned
? cast
<VectorType
>(Data
->getType())->getBitWidth() / 8 : 1;
1166 // If the mask is all ones just emit a regular store.
1167 if (const auto *C
= dyn_cast
<Constant
>(Mask
))
1168 if (C
->isAllOnesValue())
1169 return Builder
.CreateAlignedStore(Data
, Ptr
, Align
);
1171 // Convert the mask from an integer type to a vector of i1.
1172 unsigned NumElts
= Data
->getType()->getVectorNumElements();
1173 Mask
= getX86MaskVec(Builder
, Mask
, NumElts
);
1174 return Builder
.CreateMaskedStore(Data
, Ptr
, Align
, Mask
);
1177 static Value
*UpgradeMaskedLoad(IRBuilder
<> &Builder
,
1178 Value
*Ptr
, Value
*Passthru
, Value
*Mask
,
1180 Type
*ValTy
= Passthru
->getType();
1181 // Cast the pointer to the right type.
1182 Ptr
= Builder
.CreateBitCast(Ptr
, llvm::PointerType::getUnqual(ValTy
));
1184 Aligned
? cast
<VectorType
>(Passthru
->getType())->getBitWidth() / 8 : 1;
1186 // If the mask is all ones just emit a regular store.
1187 if (const auto *C
= dyn_cast
<Constant
>(Mask
))
1188 if (C
->isAllOnesValue())
1189 return Builder
.CreateAlignedLoad(ValTy
, Ptr
, Align
);
1191 // Convert the mask from an integer type to a vector of i1.
1192 unsigned NumElts
= Passthru
->getType()->getVectorNumElements();
1193 Mask
= getX86MaskVec(Builder
, Mask
, NumElts
);
1194 return Builder
.CreateMaskedLoad(Ptr
, Align
, Mask
, Passthru
);
1197 static Value
*upgradeAbs(IRBuilder
<> &Builder
, CallInst
&CI
) {
1198 Value
*Op0
= CI
.getArgOperand(0);
1199 llvm::Type
*Ty
= Op0
->getType();
1200 Value
*Zero
= llvm::Constant::getNullValue(Ty
);
1201 Value
*Cmp
= Builder
.CreateICmp(ICmpInst::ICMP_SGT
, Op0
, Zero
);
1202 Value
*Neg
= Builder
.CreateNeg(Op0
);
1203 Value
*Res
= Builder
.CreateSelect(Cmp
, Op0
, Neg
);
1205 if (CI
.getNumArgOperands() == 3)
1206 Res
= EmitX86Select(Builder
,CI
.getArgOperand(2), Res
, CI
.getArgOperand(1));
1211 static Value
*upgradeIntMinMax(IRBuilder
<> &Builder
, CallInst
&CI
,
1212 ICmpInst::Predicate Pred
) {
1213 Value
*Op0
= CI
.getArgOperand(0);
1214 Value
*Op1
= CI
.getArgOperand(1);
1215 Value
*Cmp
= Builder
.CreateICmp(Pred
, Op0
, Op1
);
1216 Value
*Res
= Builder
.CreateSelect(Cmp
, Op0
, Op1
);
1218 if (CI
.getNumArgOperands() == 4)
1219 Res
= EmitX86Select(Builder
, CI
.getArgOperand(3), Res
, CI
.getArgOperand(2));
1224 static Value
*upgradePMULDQ(IRBuilder
<> &Builder
, CallInst
&CI
, bool IsSigned
) {
1225 Type
*Ty
= CI
.getType();
1227 // Arguments have a vXi32 type so cast to vXi64.
1228 Value
*LHS
= Builder
.CreateBitCast(CI
.getArgOperand(0), Ty
);
1229 Value
*RHS
= Builder
.CreateBitCast(CI
.getArgOperand(1), Ty
);
1232 // Shift left then arithmetic shift right.
1233 Constant
*ShiftAmt
= ConstantInt::get(Ty
, 32);
1234 LHS
= Builder
.CreateShl(LHS
, ShiftAmt
);
1235 LHS
= Builder
.CreateAShr(LHS
, ShiftAmt
);
1236 RHS
= Builder
.CreateShl(RHS
, ShiftAmt
);
1237 RHS
= Builder
.CreateAShr(RHS
, ShiftAmt
);
1239 // Clear the upper bits.
1240 Constant
*Mask
= ConstantInt::get(Ty
, 0xffffffff);
1241 LHS
= Builder
.CreateAnd(LHS
, Mask
);
1242 RHS
= Builder
.CreateAnd(RHS
, Mask
);
1245 Value
*Res
= Builder
.CreateMul(LHS
, RHS
);
1247 if (CI
.getNumArgOperands() == 4)
1248 Res
= EmitX86Select(Builder
, CI
.getArgOperand(3), Res
, CI
.getArgOperand(2));
1253 // Applying mask on vector of i1's and make sure result is at least 8 bits wide.
1254 static Value
*ApplyX86MaskOn1BitsVec(IRBuilder
<> &Builder
, Value
*Vec
,
1256 unsigned NumElts
= Vec
->getType()->getVectorNumElements();
1258 const auto *C
= dyn_cast
<Constant
>(Mask
);
1259 if (!C
|| !C
->isAllOnesValue())
1260 Vec
= Builder
.CreateAnd(Vec
, getX86MaskVec(Builder
, Mask
, NumElts
));
1264 uint32_t Indices
[8];
1265 for (unsigned i
= 0; i
!= NumElts
; ++i
)
1267 for (unsigned i
= NumElts
; i
!= 8; ++i
)
1268 Indices
[i
] = NumElts
+ i
% NumElts
;
1269 Vec
= Builder
.CreateShuffleVector(Vec
,
1270 Constant::getNullValue(Vec
->getType()),
1273 return Builder
.CreateBitCast(Vec
, Builder
.getIntNTy(std::max(NumElts
, 8U)));
1276 static Value
*upgradeMaskedCompare(IRBuilder
<> &Builder
, CallInst
&CI
,
1277 unsigned CC
, bool Signed
) {
1278 Value
*Op0
= CI
.getArgOperand(0);
1279 unsigned NumElts
= Op0
->getType()->getVectorNumElements();
1283 Cmp
= Constant::getNullValue(llvm::VectorType::get(Builder
.getInt1Ty(), NumElts
));
1284 } else if (CC
== 7) {
1285 Cmp
= Constant::getAllOnesValue(llvm::VectorType::get(Builder
.getInt1Ty(), NumElts
));
1287 ICmpInst::Predicate Pred
;
1289 default: llvm_unreachable("Unknown condition code");
1290 case 0: Pred
= ICmpInst::ICMP_EQ
; break;
1291 case 1: Pred
= Signed
? ICmpInst::ICMP_SLT
: ICmpInst::ICMP_ULT
; break;
1292 case 2: Pred
= Signed
? ICmpInst::ICMP_SLE
: ICmpInst::ICMP_ULE
; break;
1293 case 4: Pred
= ICmpInst::ICMP_NE
; break;
1294 case 5: Pred
= Signed
? ICmpInst::ICMP_SGE
: ICmpInst::ICMP_UGE
; break;
1295 case 6: Pred
= Signed
? ICmpInst::ICMP_SGT
: ICmpInst::ICMP_UGT
; break;
1297 Cmp
= Builder
.CreateICmp(Pred
, Op0
, CI
.getArgOperand(1));
1300 Value
*Mask
= CI
.getArgOperand(CI
.getNumArgOperands() - 1);
1302 return ApplyX86MaskOn1BitsVec(Builder
, Cmp
, Mask
);
1305 // Replace a masked intrinsic with an older unmasked intrinsic.
1306 static Value
*UpgradeX86MaskedShift(IRBuilder
<> &Builder
, CallInst
&CI
,
1307 Intrinsic::ID IID
) {
1308 Function
*Intrin
= Intrinsic::getDeclaration(CI
.getModule(), IID
);
1309 Value
*Rep
= Builder
.CreateCall(Intrin
,
1310 { CI
.getArgOperand(0), CI
.getArgOperand(1) });
1311 return EmitX86Select(Builder
, CI
.getArgOperand(3), Rep
, CI
.getArgOperand(2));
1314 static Value
* upgradeMaskedMove(IRBuilder
<> &Builder
, CallInst
&CI
) {
1315 Value
* A
= CI
.getArgOperand(0);
1316 Value
* B
= CI
.getArgOperand(1);
1317 Value
* Src
= CI
.getArgOperand(2);
1318 Value
* Mask
= CI
.getArgOperand(3);
1320 Value
* AndNode
= Builder
.CreateAnd(Mask
, APInt(8, 1));
1321 Value
* Cmp
= Builder
.CreateIsNotNull(AndNode
);
1322 Value
* Extract1
= Builder
.CreateExtractElement(B
, (uint64_t)0);
1323 Value
* Extract2
= Builder
.CreateExtractElement(Src
, (uint64_t)0);
1324 Value
* Select
= Builder
.CreateSelect(Cmp
, Extract1
, Extract2
);
1325 return Builder
.CreateInsertElement(A
, Select
, (uint64_t)0);
1329 static Value
* UpgradeMaskToInt(IRBuilder
<> &Builder
, CallInst
&CI
) {
1330 Value
* Op
= CI
.getArgOperand(0);
1331 Type
* ReturnOp
= CI
.getType();
1332 unsigned NumElts
= CI
.getType()->getVectorNumElements();
1333 Value
*Mask
= getX86MaskVec(Builder
, Op
, NumElts
);
1334 return Builder
.CreateSExt(Mask
, ReturnOp
, "vpmovm2");
1337 // Replace intrinsic with unmasked version and a select.
1338 static bool upgradeAVX512MaskToSelect(StringRef Name
, IRBuilder
<> &Builder
,
1339 CallInst
&CI
, Value
*&Rep
) {
1340 Name
= Name
.substr(12); // Remove avx512.mask.
1342 unsigned VecWidth
= CI
.getType()->getPrimitiveSizeInBits();
1343 unsigned EltWidth
= CI
.getType()->getScalarSizeInBits();
1345 if (Name
.startswith("max.p")) {
1346 if (VecWidth
== 128 && EltWidth
== 32)
1347 IID
= Intrinsic::x86_sse_max_ps
;
1348 else if (VecWidth
== 128 && EltWidth
== 64)
1349 IID
= Intrinsic::x86_sse2_max_pd
;
1350 else if (VecWidth
== 256 && EltWidth
== 32)
1351 IID
= Intrinsic::x86_avx_max_ps_256
;
1352 else if (VecWidth
== 256 && EltWidth
== 64)
1353 IID
= Intrinsic::x86_avx_max_pd_256
;
1355 llvm_unreachable("Unexpected intrinsic");
1356 } else if (Name
.startswith("min.p")) {
1357 if (VecWidth
== 128 && EltWidth
== 32)
1358 IID
= Intrinsic::x86_sse_min_ps
;
1359 else if (VecWidth
== 128 && EltWidth
== 64)
1360 IID
= Intrinsic::x86_sse2_min_pd
;
1361 else if (VecWidth
== 256 && EltWidth
== 32)
1362 IID
= Intrinsic::x86_avx_min_ps_256
;
1363 else if (VecWidth
== 256 && EltWidth
== 64)
1364 IID
= Intrinsic::x86_avx_min_pd_256
;
1366 llvm_unreachable("Unexpected intrinsic");
1367 } else if (Name
.startswith("pshuf.b.")) {
1368 if (VecWidth
== 128)
1369 IID
= Intrinsic::x86_ssse3_pshuf_b_128
;
1370 else if (VecWidth
== 256)
1371 IID
= Intrinsic::x86_avx2_pshuf_b
;
1372 else if (VecWidth
== 512)
1373 IID
= Intrinsic::x86_avx512_pshuf_b_512
;
1375 llvm_unreachable("Unexpected intrinsic");
1376 } else if (Name
.startswith("pmul.hr.sw.")) {
1377 if (VecWidth
== 128)
1378 IID
= Intrinsic::x86_ssse3_pmul_hr_sw_128
;
1379 else if (VecWidth
== 256)
1380 IID
= Intrinsic::x86_avx2_pmul_hr_sw
;
1381 else if (VecWidth
== 512)
1382 IID
= Intrinsic::x86_avx512_pmul_hr_sw_512
;
1384 llvm_unreachable("Unexpected intrinsic");
1385 } else if (Name
.startswith("pmulh.w.")) {
1386 if (VecWidth
== 128)
1387 IID
= Intrinsic::x86_sse2_pmulh_w
;
1388 else if (VecWidth
== 256)
1389 IID
= Intrinsic::x86_avx2_pmulh_w
;
1390 else if (VecWidth
== 512)
1391 IID
= Intrinsic::x86_avx512_pmulh_w_512
;
1393 llvm_unreachable("Unexpected intrinsic");
1394 } else if (Name
.startswith("pmulhu.w.")) {
1395 if (VecWidth
== 128)
1396 IID
= Intrinsic::x86_sse2_pmulhu_w
;
1397 else if (VecWidth
== 256)
1398 IID
= Intrinsic::x86_avx2_pmulhu_w
;
1399 else if (VecWidth
== 512)
1400 IID
= Intrinsic::x86_avx512_pmulhu_w_512
;
1402 llvm_unreachable("Unexpected intrinsic");
1403 } else if (Name
.startswith("pmaddw.d.")) {
1404 if (VecWidth
== 128)
1405 IID
= Intrinsic::x86_sse2_pmadd_wd
;
1406 else if (VecWidth
== 256)
1407 IID
= Intrinsic::x86_avx2_pmadd_wd
;
1408 else if (VecWidth
== 512)
1409 IID
= Intrinsic::x86_avx512_pmaddw_d_512
;
1411 llvm_unreachable("Unexpected intrinsic");
1412 } else if (Name
.startswith("pmaddubs.w.")) {
1413 if (VecWidth
== 128)
1414 IID
= Intrinsic::x86_ssse3_pmadd_ub_sw_128
;
1415 else if (VecWidth
== 256)
1416 IID
= Intrinsic::x86_avx2_pmadd_ub_sw
;
1417 else if (VecWidth
== 512)
1418 IID
= Intrinsic::x86_avx512_pmaddubs_w_512
;
1420 llvm_unreachable("Unexpected intrinsic");
1421 } else if (Name
.startswith("packsswb.")) {
1422 if (VecWidth
== 128)
1423 IID
= Intrinsic::x86_sse2_packsswb_128
;
1424 else if (VecWidth
== 256)
1425 IID
= Intrinsic::x86_avx2_packsswb
;
1426 else if (VecWidth
== 512)
1427 IID
= Intrinsic::x86_avx512_packsswb_512
;
1429 llvm_unreachable("Unexpected intrinsic");
1430 } else if (Name
.startswith("packssdw.")) {
1431 if (VecWidth
== 128)
1432 IID
= Intrinsic::x86_sse2_packssdw_128
;
1433 else if (VecWidth
== 256)
1434 IID
= Intrinsic::x86_avx2_packssdw
;
1435 else if (VecWidth
== 512)
1436 IID
= Intrinsic::x86_avx512_packssdw_512
;
1438 llvm_unreachable("Unexpected intrinsic");
1439 } else if (Name
.startswith("packuswb.")) {
1440 if (VecWidth
== 128)
1441 IID
= Intrinsic::x86_sse2_packuswb_128
;
1442 else if (VecWidth
== 256)
1443 IID
= Intrinsic::x86_avx2_packuswb
;
1444 else if (VecWidth
== 512)
1445 IID
= Intrinsic::x86_avx512_packuswb_512
;
1447 llvm_unreachable("Unexpected intrinsic");
1448 } else if (Name
.startswith("packusdw.")) {
1449 if (VecWidth
== 128)
1450 IID
= Intrinsic::x86_sse41_packusdw
;
1451 else if (VecWidth
== 256)
1452 IID
= Intrinsic::x86_avx2_packusdw
;
1453 else if (VecWidth
== 512)
1454 IID
= Intrinsic::x86_avx512_packusdw_512
;
1456 llvm_unreachable("Unexpected intrinsic");
1457 } else if (Name
.startswith("vpermilvar.")) {
1458 if (VecWidth
== 128 && EltWidth
== 32)
1459 IID
= Intrinsic::x86_avx_vpermilvar_ps
;
1460 else if (VecWidth
== 128 && EltWidth
== 64)
1461 IID
= Intrinsic::x86_avx_vpermilvar_pd
;
1462 else if (VecWidth
== 256 && EltWidth
== 32)
1463 IID
= Intrinsic::x86_avx_vpermilvar_ps_256
;
1464 else if (VecWidth
== 256 && EltWidth
== 64)
1465 IID
= Intrinsic::x86_avx_vpermilvar_pd_256
;
1466 else if (VecWidth
== 512 && EltWidth
== 32)
1467 IID
= Intrinsic::x86_avx512_vpermilvar_ps_512
;
1468 else if (VecWidth
== 512 && EltWidth
== 64)
1469 IID
= Intrinsic::x86_avx512_vpermilvar_pd_512
;
1471 llvm_unreachable("Unexpected intrinsic");
1472 } else if (Name
== "cvtpd2dq.256") {
1473 IID
= Intrinsic::x86_avx_cvt_pd2dq_256
;
1474 } else if (Name
== "cvtpd2ps.256") {
1475 IID
= Intrinsic::x86_avx_cvt_pd2_ps_256
;
1476 } else if (Name
== "cvttpd2dq.256") {
1477 IID
= Intrinsic::x86_avx_cvtt_pd2dq_256
;
1478 } else if (Name
== "cvttps2dq.128") {
1479 IID
= Intrinsic::x86_sse2_cvttps2dq
;
1480 } else if (Name
== "cvttps2dq.256") {
1481 IID
= Intrinsic::x86_avx_cvtt_ps2dq_256
;
1482 } else if (Name
.startswith("permvar.")) {
1483 bool IsFloat
= CI
.getType()->isFPOrFPVectorTy();
1484 if (VecWidth
== 256 && EltWidth
== 32 && IsFloat
)
1485 IID
= Intrinsic::x86_avx2_permps
;
1486 else if (VecWidth
== 256 && EltWidth
== 32 && !IsFloat
)
1487 IID
= Intrinsic::x86_avx2_permd
;
1488 else if (VecWidth
== 256 && EltWidth
== 64 && IsFloat
)
1489 IID
= Intrinsic::x86_avx512_permvar_df_256
;
1490 else if (VecWidth
== 256 && EltWidth
== 64 && !IsFloat
)
1491 IID
= Intrinsic::x86_avx512_permvar_di_256
;
1492 else if (VecWidth
== 512 && EltWidth
== 32 && IsFloat
)
1493 IID
= Intrinsic::x86_avx512_permvar_sf_512
;
1494 else if (VecWidth
== 512 && EltWidth
== 32 && !IsFloat
)
1495 IID
= Intrinsic::x86_avx512_permvar_si_512
;
1496 else if (VecWidth
== 512 && EltWidth
== 64 && IsFloat
)
1497 IID
= Intrinsic::x86_avx512_permvar_df_512
;
1498 else if (VecWidth
== 512 && EltWidth
== 64 && !IsFloat
)
1499 IID
= Intrinsic::x86_avx512_permvar_di_512
;
1500 else if (VecWidth
== 128 && EltWidth
== 16)
1501 IID
= Intrinsic::x86_avx512_permvar_hi_128
;
1502 else if (VecWidth
== 256 && EltWidth
== 16)
1503 IID
= Intrinsic::x86_avx512_permvar_hi_256
;
1504 else if (VecWidth
== 512 && EltWidth
== 16)
1505 IID
= Intrinsic::x86_avx512_permvar_hi_512
;
1506 else if (VecWidth
== 128 && EltWidth
== 8)
1507 IID
= Intrinsic::x86_avx512_permvar_qi_128
;
1508 else if (VecWidth
== 256 && EltWidth
== 8)
1509 IID
= Intrinsic::x86_avx512_permvar_qi_256
;
1510 else if (VecWidth
== 512 && EltWidth
== 8)
1511 IID
= Intrinsic::x86_avx512_permvar_qi_512
;
1513 llvm_unreachable("Unexpected intrinsic");
1514 } else if (Name
.startswith("dbpsadbw.")) {
1515 if (VecWidth
== 128)
1516 IID
= Intrinsic::x86_avx512_dbpsadbw_128
;
1517 else if (VecWidth
== 256)
1518 IID
= Intrinsic::x86_avx512_dbpsadbw_256
;
1519 else if (VecWidth
== 512)
1520 IID
= Intrinsic::x86_avx512_dbpsadbw_512
;
1522 llvm_unreachable("Unexpected intrinsic");
1523 } else if (Name
.startswith("pmultishift.qb.")) {
1524 if (VecWidth
== 128)
1525 IID
= Intrinsic::x86_avx512_pmultishift_qb_128
;
1526 else if (VecWidth
== 256)
1527 IID
= Intrinsic::x86_avx512_pmultishift_qb_256
;
1528 else if (VecWidth
== 512)
1529 IID
= Intrinsic::x86_avx512_pmultishift_qb_512
;
1531 llvm_unreachable("Unexpected intrinsic");
1532 } else if (Name
.startswith("conflict.")) {
1533 if (Name
[9] == 'd' && VecWidth
== 128)
1534 IID
= Intrinsic::x86_avx512_conflict_d_128
;
1535 else if (Name
[9] == 'd' && VecWidth
== 256)
1536 IID
= Intrinsic::x86_avx512_conflict_d_256
;
1537 else if (Name
[9] == 'd' && VecWidth
== 512)
1538 IID
= Intrinsic::x86_avx512_conflict_d_512
;
1539 else if (Name
[9] == 'q' && VecWidth
== 128)
1540 IID
= Intrinsic::x86_avx512_conflict_q_128
;
1541 else if (Name
[9] == 'q' && VecWidth
== 256)
1542 IID
= Intrinsic::x86_avx512_conflict_q_256
;
1543 else if (Name
[9] == 'q' && VecWidth
== 512)
1544 IID
= Intrinsic::x86_avx512_conflict_q_512
;
1546 llvm_unreachable("Unexpected intrinsic");
1547 } else if (Name
.startswith("pavg.")) {
1548 if (Name
[5] == 'b' && VecWidth
== 128)
1549 IID
= Intrinsic::x86_sse2_pavg_b
;
1550 else if (Name
[5] == 'b' && VecWidth
== 256)
1551 IID
= Intrinsic::x86_avx2_pavg_b
;
1552 else if (Name
[5] == 'b' && VecWidth
== 512)
1553 IID
= Intrinsic::x86_avx512_pavg_b_512
;
1554 else if (Name
[5] == 'w' && VecWidth
== 128)
1555 IID
= Intrinsic::x86_sse2_pavg_w
;
1556 else if (Name
[5] == 'w' && VecWidth
== 256)
1557 IID
= Intrinsic::x86_avx2_pavg_w
;
1558 else if (Name
[5] == 'w' && VecWidth
== 512)
1559 IID
= Intrinsic::x86_avx512_pavg_w_512
;
1561 llvm_unreachable("Unexpected intrinsic");
1565 SmallVector
<Value
*, 4> Args(CI
.arg_operands().begin(),
1566 CI
.arg_operands().end());
1569 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(CI
.getModule(), IID
),
1571 unsigned NumArgs
= CI
.getNumArgOperands();
1572 Rep
= EmitX86Select(Builder
, CI
.getArgOperand(NumArgs
- 1), Rep
,
1573 CI
.getArgOperand(NumArgs
- 2));
1577 /// Upgrade comment in call to inline asm that represents an objc retain release
1579 void llvm::UpgradeInlineAsmString(std::string
*AsmStr
) {
1581 if (AsmStr
->find("mov\tfp") == 0 &&
1582 AsmStr
->find("objc_retainAutoreleaseReturnValue") != std::string::npos
&&
1583 (Pos
= AsmStr
->find("# marker")) != std::string::npos
) {
1584 AsmStr
->replace(Pos
, 1, ";");
1589 /// Upgrade a call to an old intrinsic. All argument and return casting must be
1590 /// provided to seamlessly integrate with existing context.
1591 void llvm::UpgradeIntrinsicCall(CallInst
*CI
, Function
*NewFn
) {
1592 Function
*F
= CI
->getCalledFunction();
1593 LLVMContext
&C
= CI
->getContext();
1594 IRBuilder
<> Builder(C
);
1595 Builder
.SetInsertPoint(CI
->getParent(), CI
->getIterator());
1597 assert(F
&& "Intrinsic call is not direct?");
1600 // Get the Function's name.
1601 StringRef Name
= F
->getName();
1603 // clang.arc.use is an old name for llvm.arc.clang.arc.use. It is dropped
1604 // from upgrader because the optimizer now only recognizes intrinsics for
1605 // ARC runtime calls.
1606 if (Name
== "clang.arc.use") {
1607 CI
->eraseFromParent();
1611 assert(Name
.startswith("llvm.") && "Intrinsic doesn't start with 'llvm.'");
1612 Name
= Name
.substr(5);
1614 bool IsX86
= Name
.startswith("x86.");
1616 Name
= Name
.substr(4);
1617 bool IsNVVM
= Name
.startswith("nvvm.");
1619 Name
= Name
.substr(5);
1621 if (IsX86
&& Name
.startswith("sse4a.movnt.")) {
1622 Module
*M
= F
->getParent();
1623 SmallVector
<Metadata
*, 1> Elts
;
1625 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C
), 1)));
1626 MDNode
*Node
= MDNode::get(C
, Elts
);
1628 Value
*Arg0
= CI
->getArgOperand(0);
1629 Value
*Arg1
= CI
->getArgOperand(1);
1631 // Nontemporal (unaligned) store of the 0'th element of the float/double
1633 Type
*SrcEltTy
= cast
<VectorType
>(Arg1
->getType())->getElementType();
1634 PointerType
*EltPtrTy
= PointerType::getUnqual(SrcEltTy
);
1635 Value
*Addr
= Builder
.CreateBitCast(Arg0
, EltPtrTy
, "cast");
1637 Builder
.CreateExtractElement(Arg1
, (uint64_t)0, "extractelement");
1639 StoreInst
*SI
= Builder
.CreateAlignedStore(Extract
, Addr
, 1);
1640 SI
->setMetadata(M
->getMDKindID("nontemporal"), Node
);
1642 // Remove intrinsic.
1643 CI
->eraseFromParent();
1647 if (IsX86
&& (Name
.startswith("avx.movnt.") ||
1648 Name
.startswith("avx512.storent."))) {
1649 Module
*M
= F
->getParent();
1650 SmallVector
<Metadata
*, 1> Elts
;
1652 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C
), 1)));
1653 MDNode
*Node
= MDNode::get(C
, Elts
);
1655 Value
*Arg0
= CI
->getArgOperand(0);
1656 Value
*Arg1
= CI
->getArgOperand(1);
1658 // Convert the type of the pointer to a pointer to the stored type.
1659 Value
*BC
= Builder
.CreateBitCast(Arg0
,
1660 PointerType::getUnqual(Arg1
->getType()),
1662 VectorType
*VTy
= cast
<VectorType
>(Arg1
->getType());
1663 StoreInst
*SI
= Builder
.CreateAlignedStore(Arg1
, BC
,
1664 VTy
->getBitWidth() / 8);
1665 SI
->setMetadata(M
->getMDKindID("nontemporal"), Node
);
1667 // Remove intrinsic.
1668 CI
->eraseFromParent();
1672 if (IsX86
&& Name
== "sse2.storel.dq") {
1673 Value
*Arg0
= CI
->getArgOperand(0);
1674 Value
*Arg1
= CI
->getArgOperand(1);
1676 Type
*NewVecTy
= VectorType::get(Type::getInt64Ty(C
), 2);
1677 Value
*BC0
= Builder
.CreateBitCast(Arg1
, NewVecTy
, "cast");
1678 Value
*Elt
= Builder
.CreateExtractElement(BC0
, (uint64_t)0);
1679 Value
*BC
= Builder
.CreateBitCast(Arg0
,
1680 PointerType::getUnqual(Elt
->getType()),
1682 Builder
.CreateAlignedStore(Elt
, BC
, 1);
1684 // Remove intrinsic.
1685 CI
->eraseFromParent();
1689 if (IsX86
&& (Name
.startswith("sse.storeu.") ||
1690 Name
.startswith("sse2.storeu.") ||
1691 Name
.startswith("avx.storeu."))) {
1692 Value
*Arg0
= CI
->getArgOperand(0);
1693 Value
*Arg1
= CI
->getArgOperand(1);
1695 Arg0
= Builder
.CreateBitCast(Arg0
,
1696 PointerType::getUnqual(Arg1
->getType()),
1698 Builder
.CreateAlignedStore(Arg1
, Arg0
, 1);
1700 // Remove intrinsic.
1701 CI
->eraseFromParent();
1705 if (IsX86
&& Name
== "avx512.mask.store.ss") {
1706 Value
*Mask
= Builder
.CreateAnd(CI
->getArgOperand(2), Builder
.getInt8(1));
1707 UpgradeMaskedStore(Builder
, CI
->getArgOperand(0), CI
->getArgOperand(1),
1710 // Remove intrinsic.
1711 CI
->eraseFromParent();
1715 if (IsX86
&& (Name
.startswith("avx512.mask.store"))) {
1716 // "avx512.mask.storeu." or "avx512.mask.store."
1717 bool Aligned
= Name
[17] != 'u'; // "avx512.mask.storeu".
1718 UpgradeMaskedStore(Builder
, CI
->getArgOperand(0), CI
->getArgOperand(1),
1719 CI
->getArgOperand(2), Aligned
);
1721 // Remove intrinsic.
1722 CI
->eraseFromParent();
1727 // Upgrade packed integer vector compare intrinsics to compare instructions.
1728 if (IsX86
&& (Name
.startswith("sse2.pcmp") ||
1729 Name
.startswith("avx2.pcmp"))) {
1730 // "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt."
1731 bool CmpEq
= Name
[9] == 'e';
1732 Rep
= Builder
.CreateICmp(CmpEq
? ICmpInst::ICMP_EQ
: ICmpInst::ICMP_SGT
,
1733 CI
->getArgOperand(0), CI
->getArgOperand(1));
1734 Rep
= Builder
.CreateSExt(Rep
, CI
->getType(), "");
1735 } else if (IsX86
&& (Name
.startswith("avx512.broadcastm"))) {
1736 Type
*ExtTy
= Type::getInt32Ty(C
);
1737 if (CI
->getOperand(0)->getType()->isIntegerTy(8))
1738 ExtTy
= Type::getInt64Ty(C
);
1739 unsigned NumElts
= CI
->getType()->getPrimitiveSizeInBits() /
1740 ExtTy
->getPrimitiveSizeInBits();
1741 Rep
= Builder
.CreateZExt(CI
->getArgOperand(0), ExtTy
);
1742 Rep
= Builder
.CreateVectorSplat(NumElts
, Rep
);
1743 } else if (IsX86
&& (Name
== "sse.sqrt.ss" ||
1744 Name
== "sse2.sqrt.sd")) {
1745 Value
*Vec
= CI
->getArgOperand(0);
1746 Value
*Elt0
= Builder
.CreateExtractElement(Vec
, (uint64_t)0);
1747 Function
*Intr
= Intrinsic::getDeclaration(F
->getParent(),
1748 Intrinsic::sqrt
, Elt0
->getType());
1749 Elt0
= Builder
.CreateCall(Intr
, Elt0
);
1750 Rep
= Builder
.CreateInsertElement(Vec
, Elt0
, (uint64_t)0);
1751 } else if (IsX86
&& (Name
.startswith("avx.sqrt.p") ||
1752 Name
.startswith("sse2.sqrt.p") ||
1753 Name
.startswith("sse.sqrt.p"))) {
1754 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(F
->getParent(),
1757 {CI
->getArgOperand(0)});
1758 } else if (IsX86
&& (Name
.startswith("avx512.mask.sqrt.p"))) {
1759 if (CI
->getNumArgOperands() == 4 &&
1760 (!isa
<ConstantInt
>(CI
->getArgOperand(3)) ||
1761 cast
<ConstantInt
>(CI
->getArgOperand(3))->getZExtValue() != 4)) {
1762 Intrinsic::ID IID
= Name
[18] == 's' ? Intrinsic::x86_avx512_sqrt_ps_512
1763 : Intrinsic::x86_avx512_sqrt_pd_512
;
1765 Value
*Args
[] = { CI
->getArgOperand(0), CI
->getArgOperand(3) };
1766 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(CI
->getModule(),
1769 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(F
->getParent(),
1772 {CI
->getArgOperand(0)});
1774 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(2), Rep
,
1775 CI
->getArgOperand(1));
1776 } else if (IsX86
&& (Name
.startswith("avx512.ptestm") ||
1777 Name
.startswith("avx512.ptestnm"))) {
1778 Value
*Op0
= CI
->getArgOperand(0);
1779 Value
*Op1
= CI
->getArgOperand(1);
1780 Value
*Mask
= CI
->getArgOperand(2);
1781 Rep
= Builder
.CreateAnd(Op0
, Op1
);
1782 llvm::Type
*Ty
= Op0
->getType();
1783 Value
*Zero
= llvm::Constant::getNullValue(Ty
);
1784 ICmpInst::Predicate Pred
=
1785 Name
.startswith("avx512.ptestm") ? ICmpInst::ICMP_NE
: ICmpInst::ICMP_EQ
;
1786 Rep
= Builder
.CreateICmp(Pred
, Rep
, Zero
);
1787 Rep
= ApplyX86MaskOn1BitsVec(Builder
, Rep
, Mask
);
1788 } else if (IsX86
&& (Name
.startswith("avx512.mask.pbroadcast"))){
1790 CI
->getArgOperand(1)->getType()->getVectorNumElements();
1791 Rep
= Builder
.CreateVectorSplat(NumElts
, CI
->getArgOperand(0));
1792 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(2), Rep
,
1793 CI
->getArgOperand(1));
1794 } else if (IsX86
&& (Name
.startswith("avx512.kunpck"))) {
1795 unsigned NumElts
= CI
->getType()->getScalarSizeInBits();
1796 Value
*LHS
= getX86MaskVec(Builder
, CI
->getArgOperand(0), NumElts
);
1797 Value
*RHS
= getX86MaskVec(Builder
, CI
->getArgOperand(1), NumElts
);
1798 uint32_t Indices
[64];
1799 for (unsigned i
= 0; i
!= NumElts
; ++i
)
1802 // First extract half of each vector. This gives better codegen than
1803 // doing it in a single shuffle.
1804 LHS
= Builder
.CreateShuffleVector(LHS
, LHS
,
1805 makeArrayRef(Indices
, NumElts
/ 2));
1806 RHS
= Builder
.CreateShuffleVector(RHS
, RHS
,
1807 makeArrayRef(Indices
, NumElts
/ 2));
1808 // Concat the vectors.
1809 // NOTE: Operands have to be swapped to match intrinsic definition.
1810 Rep
= Builder
.CreateShuffleVector(RHS
, LHS
,
1811 makeArrayRef(Indices
, NumElts
));
1812 Rep
= Builder
.CreateBitCast(Rep
, CI
->getType());
1813 } else if (IsX86
&& Name
== "avx512.kand.w") {
1814 Value
*LHS
= getX86MaskVec(Builder
, CI
->getArgOperand(0), 16);
1815 Value
*RHS
= getX86MaskVec(Builder
, CI
->getArgOperand(1), 16);
1816 Rep
= Builder
.CreateAnd(LHS
, RHS
);
1817 Rep
= Builder
.CreateBitCast(Rep
, CI
->getType());
1818 } else if (IsX86
&& Name
== "avx512.kandn.w") {
1819 Value
*LHS
= getX86MaskVec(Builder
, CI
->getArgOperand(0), 16);
1820 Value
*RHS
= getX86MaskVec(Builder
, CI
->getArgOperand(1), 16);
1821 LHS
= Builder
.CreateNot(LHS
);
1822 Rep
= Builder
.CreateAnd(LHS
, RHS
);
1823 Rep
= Builder
.CreateBitCast(Rep
, CI
->getType());
1824 } else if (IsX86
&& Name
== "avx512.kor.w") {
1825 Value
*LHS
= getX86MaskVec(Builder
, CI
->getArgOperand(0), 16);
1826 Value
*RHS
= getX86MaskVec(Builder
, CI
->getArgOperand(1), 16);
1827 Rep
= Builder
.CreateOr(LHS
, RHS
);
1828 Rep
= Builder
.CreateBitCast(Rep
, CI
->getType());
1829 } else if (IsX86
&& Name
== "avx512.kxor.w") {
1830 Value
*LHS
= getX86MaskVec(Builder
, CI
->getArgOperand(0), 16);
1831 Value
*RHS
= getX86MaskVec(Builder
, CI
->getArgOperand(1), 16);
1832 Rep
= Builder
.CreateXor(LHS
, RHS
);
1833 Rep
= Builder
.CreateBitCast(Rep
, CI
->getType());
1834 } else if (IsX86
&& Name
== "avx512.kxnor.w") {
1835 Value
*LHS
= getX86MaskVec(Builder
, CI
->getArgOperand(0), 16);
1836 Value
*RHS
= getX86MaskVec(Builder
, CI
->getArgOperand(1), 16);
1837 LHS
= Builder
.CreateNot(LHS
);
1838 Rep
= Builder
.CreateXor(LHS
, RHS
);
1839 Rep
= Builder
.CreateBitCast(Rep
, CI
->getType());
1840 } else if (IsX86
&& Name
== "avx512.knot.w") {
1841 Rep
= getX86MaskVec(Builder
, CI
->getArgOperand(0), 16);
1842 Rep
= Builder
.CreateNot(Rep
);
1843 Rep
= Builder
.CreateBitCast(Rep
, CI
->getType());
1845 (Name
== "avx512.kortestz.w" || Name
== "avx512.kortestc.w")) {
1846 Value
*LHS
= getX86MaskVec(Builder
, CI
->getArgOperand(0), 16);
1847 Value
*RHS
= getX86MaskVec(Builder
, CI
->getArgOperand(1), 16);
1848 Rep
= Builder
.CreateOr(LHS
, RHS
);
1849 Rep
= Builder
.CreateBitCast(Rep
, Builder
.getInt16Ty());
1851 if (Name
[14] == 'c')
1852 C
= ConstantInt::getAllOnesValue(Builder
.getInt16Ty());
1854 C
= ConstantInt::getNullValue(Builder
.getInt16Ty());
1855 Rep
= Builder
.CreateICmpEQ(Rep
, C
);
1856 Rep
= Builder
.CreateZExt(Rep
, Builder
.getInt32Ty());
1857 } else if (IsX86
&& (Name
== "sse.add.ss" || Name
== "sse2.add.sd" ||
1858 Name
== "sse.sub.ss" || Name
== "sse2.sub.sd" ||
1859 Name
== "sse.mul.ss" || Name
== "sse2.mul.sd" ||
1860 Name
== "sse.div.ss" || Name
== "sse2.div.sd")) {
1861 Type
*I32Ty
= Type::getInt32Ty(C
);
1862 Value
*Elt0
= Builder
.CreateExtractElement(CI
->getArgOperand(0),
1863 ConstantInt::get(I32Ty
, 0));
1864 Value
*Elt1
= Builder
.CreateExtractElement(CI
->getArgOperand(1),
1865 ConstantInt::get(I32Ty
, 0));
1867 if (Name
.contains(".add."))
1868 EltOp
= Builder
.CreateFAdd(Elt0
, Elt1
);
1869 else if (Name
.contains(".sub."))
1870 EltOp
= Builder
.CreateFSub(Elt0
, Elt1
);
1871 else if (Name
.contains(".mul."))
1872 EltOp
= Builder
.CreateFMul(Elt0
, Elt1
);
1874 EltOp
= Builder
.CreateFDiv(Elt0
, Elt1
);
1875 Rep
= Builder
.CreateInsertElement(CI
->getArgOperand(0), EltOp
,
1876 ConstantInt::get(I32Ty
, 0));
1877 } else if (IsX86
&& Name
.startswith("avx512.mask.pcmp")) {
1878 // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt."
1879 bool CmpEq
= Name
[16] == 'e';
1880 Rep
= upgradeMaskedCompare(Builder
, *CI
, CmpEq
? 0 : 6, true);
1881 } else if (IsX86
&& Name
.startswith("avx512.mask.vpshufbitqmb.")) {
1882 Type
*OpTy
= CI
->getArgOperand(0)->getType();
1883 unsigned VecWidth
= OpTy
->getPrimitiveSizeInBits();
1886 default: llvm_unreachable("Unexpected intrinsic");
1887 case 128: IID
= Intrinsic::x86_avx512_vpshufbitqmb_128
; break;
1888 case 256: IID
= Intrinsic::x86_avx512_vpshufbitqmb_256
; break;
1889 case 512: IID
= Intrinsic::x86_avx512_vpshufbitqmb_512
; break;
1892 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(F
->getParent(), IID
),
1893 { CI
->getOperand(0), CI
->getArgOperand(1) });
1894 Rep
= ApplyX86MaskOn1BitsVec(Builder
, Rep
, CI
->getArgOperand(2));
1895 } else if (IsX86
&& Name
.startswith("avx512.mask.fpclass.p")) {
1896 Type
*OpTy
= CI
->getArgOperand(0)->getType();
1897 unsigned VecWidth
= OpTy
->getPrimitiveSizeInBits();
1898 unsigned EltWidth
= OpTy
->getScalarSizeInBits();
1900 if (VecWidth
== 128 && EltWidth
== 32)
1901 IID
= Intrinsic::x86_avx512_fpclass_ps_128
;
1902 else if (VecWidth
== 256 && EltWidth
== 32)
1903 IID
= Intrinsic::x86_avx512_fpclass_ps_256
;
1904 else if (VecWidth
== 512 && EltWidth
== 32)
1905 IID
= Intrinsic::x86_avx512_fpclass_ps_512
;
1906 else if (VecWidth
== 128 && EltWidth
== 64)
1907 IID
= Intrinsic::x86_avx512_fpclass_pd_128
;
1908 else if (VecWidth
== 256 && EltWidth
== 64)
1909 IID
= Intrinsic::x86_avx512_fpclass_pd_256
;
1910 else if (VecWidth
== 512 && EltWidth
== 64)
1911 IID
= Intrinsic::x86_avx512_fpclass_pd_512
;
1913 llvm_unreachable("Unexpected intrinsic");
1915 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(F
->getParent(), IID
),
1916 { CI
->getOperand(0), CI
->getArgOperand(1) });
1917 Rep
= ApplyX86MaskOn1BitsVec(Builder
, Rep
, CI
->getArgOperand(2));
1918 } else if (IsX86
&& Name
.startswith("avx512.mask.cmp.p")) {
1919 Type
*OpTy
= CI
->getArgOperand(0)->getType();
1920 unsigned VecWidth
= OpTy
->getPrimitiveSizeInBits();
1921 unsigned EltWidth
= OpTy
->getScalarSizeInBits();
1923 if (VecWidth
== 128 && EltWidth
== 32)
1924 IID
= Intrinsic::x86_avx512_cmp_ps_128
;
1925 else if (VecWidth
== 256 && EltWidth
== 32)
1926 IID
= Intrinsic::x86_avx512_cmp_ps_256
;
1927 else if (VecWidth
== 512 && EltWidth
== 32)
1928 IID
= Intrinsic::x86_avx512_cmp_ps_512
;
1929 else if (VecWidth
== 128 && EltWidth
== 64)
1930 IID
= Intrinsic::x86_avx512_cmp_pd_128
;
1931 else if (VecWidth
== 256 && EltWidth
== 64)
1932 IID
= Intrinsic::x86_avx512_cmp_pd_256
;
1933 else if (VecWidth
== 512 && EltWidth
== 64)
1934 IID
= Intrinsic::x86_avx512_cmp_pd_512
;
1936 llvm_unreachable("Unexpected intrinsic");
1938 SmallVector
<Value
*, 4> Args
;
1939 Args
.push_back(CI
->getArgOperand(0));
1940 Args
.push_back(CI
->getArgOperand(1));
1941 Args
.push_back(CI
->getArgOperand(2));
1942 if (CI
->getNumArgOperands() == 5)
1943 Args
.push_back(CI
->getArgOperand(4));
1945 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(F
->getParent(), IID
),
1947 Rep
= ApplyX86MaskOn1BitsVec(Builder
, Rep
, CI
->getArgOperand(3));
1948 } else if (IsX86
&& Name
.startswith("avx512.mask.cmp.") &&
1950 // Integer compare intrinsics.
1951 unsigned Imm
= cast
<ConstantInt
>(CI
->getArgOperand(2))->getZExtValue();
1952 Rep
= upgradeMaskedCompare(Builder
, *CI
, Imm
, true);
1953 } else if (IsX86
&& Name
.startswith("avx512.mask.ucmp.")) {
1954 unsigned Imm
= cast
<ConstantInt
>(CI
->getArgOperand(2))->getZExtValue();
1955 Rep
= upgradeMaskedCompare(Builder
, *CI
, Imm
, false);
1956 } else if (IsX86
&& (Name
.startswith("avx512.cvtb2mask.") ||
1957 Name
.startswith("avx512.cvtw2mask.") ||
1958 Name
.startswith("avx512.cvtd2mask.") ||
1959 Name
.startswith("avx512.cvtq2mask."))) {
1960 Value
*Op
= CI
->getArgOperand(0);
1961 Value
*Zero
= llvm::Constant::getNullValue(Op
->getType());
1962 Rep
= Builder
.CreateICmp(ICmpInst::ICMP_SLT
, Op
, Zero
);
1963 Rep
= ApplyX86MaskOn1BitsVec(Builder
, Rep
, nullptr);
1964 } else if(IsX86
&& (Name
== "ssse3.pabs.b.128" ||
1965 Name
== "ssse3.pabs.w.128" ||
1966 Name
== "ssse3.pabs.d.128" ||
1967 Name
.startswith("avx2.pabs") ||
1968 Name
.startswith("avx512.mask.pabs"))) {
1969 Rep
= upgradeAbs(Builder
, *CI
);
1970 } else if (IsX86
&& (Name
== "sse41.pmaxsb" ||
1971 Name
== "sse2.pmaxs.w" ||
1972 Name
== "sse41.pmaxsd" ||
1973 Name
.startswith("avx2.pmaxs") ||
1974 Name
.startswith("avx512.mask.pmaxs"))) {
1975 Rep
= upgradeIntMinMax(Builder
, *CI
, ICmpInst::ICMP_SGT
);
1976 } else if (IsX86
&& (Name
== "sse2.pmaxu.b" ||
1977 Name
== "sse41.pmaxuw" ||
1978 Name
== "sse41.pmaxud" ||
1979 Name
.startswith("avx2.pmaxu") ||
1980 Name
.startswith("avx512.mask.pmaxu"))) {
1981 Rep
= upgradeIntMinMax(Builder
, *CI
, ICmpInst::ICMP_UGT
);
1982 } else if (IsX86
&& (Name
== "sse41.pminsb" ||
1983 Name
== "sse2.pmins.w" ||
1984 Name
== "sse41.pminsd" ||
1985 Name
.startswith("avx2.pmins") ||
1986 Name
.startswith("avx512.mask.pmins"))) {
1987 Rep
= upgradeIntMinMax(Builder
, *CI
, ICmpInst::ICMP_SLT
);
1988 } else if (IsX86
&& (Name
== "sse2.pminu.b" ||
1989 Name
== "sse41.pminuw" ||
1990 Name
== "sse41.pminud" ||
1991 Name
.startswith("avx2.pminu") ||
1992 Name
.startswith("avx512.mask.pminu"))) {
1993 Rep
= upgradeIntMinMax(Builder
, *CI
, ICmpInst::ICMP_ULT
);
1994 } else if (IsX86
&& (Name
== "sse2.pmulu.dq" ||
1995 Name
== "avx2.pmulu.dq" ||
1996 Name
== "avx512.pmulu.dq.512" ||
1997 Name
.startswith("avx512.mask.pmulu.dq."))) {
1998 Rep
= upgradePMULDQ(Builder
, *CI
, /*Signed*/false);
1999 } else if (IsX86
&& (Name
== "sse41.pmuldq" ||
2000 Name
== "avx2.pmul.dq" ||
2001 Name
== "avx512.pmul.dq.512" ||
2002 Name
.startswith("avx512.mask.pmul.dq."))) {
2003 Rep
= upgradePMULDQ(Builder
, *CI
, /*Signed*/true);
2004 } else if (IsX86
&& (Name
== "sse.cvtsi2ss" ||
2005 Name
== "sse2.cvtsi2sd" ||
2006 Name
== "sse.cvtsi642ss" ||
2007 Name
== "sse2.cvtsi642sd")) {
2008 Rep
= Builder
.CreateSIToFP(CI
->getArgOperand(1),
2009 CI
->getType()->getVectorElementType());
2010 Rep
= Builder
.CreateInsertElement(CI
->getArgOperand(0), Rep
, (uint64_t)0);
2011 } else if (IsX86
&& Name
== "avx512.cvtusi2sd") {
2012 Rep
= Builder
.CreateUIToFP(CI
->getArgOperand(1),
2013 CI
->getType()->getVectorElementType());
2014 Rep
= Builder
.CreateInsertElement(CI
->getArgOperand(0), Rep
, (uint64_t)0);
2015 } else if (IsX86
&& Name
== "sse2.cvtss2sd") {
2016 Rep
= Builder
.CreateExtractElement(CI
->getArgOperand(1), (uint64_t)0);
2017 Rep
= Builder
.CreateFPExt(Rep
, CI
->getType()->getVectorElementType());
2018 Rep
= Builder
.CreateInsertElement(CI
->getArgOperand(0), Rep
, (uint64_t)0);
2019 } else if (IsX86
&& (Name
== "sse2.cvtdq2pd" ||
2020 Name
== "sse2.cvtdq2ps" ||
2021 Name
== "avx.cvtdq2.pd.256" ||
2022 Name
== "avx.cvtdq2.ps.256" ||
2023 Name
.startswith("avx512.mask.cvtdq2pd.") ||
2024 Name
.startswith("avx512.mask.cvtudq2pd.") ||
2025 Name
.startswith("avx512.mask.cvtdq2ps.") ||
2026 Name
.startswith("avx512.mask.cvtudq2ps.") ||
2027 Name
.startswith("avx512.mask.cvtqq2pd.") ||
2028 Name
.startswith("avx512.mask.cvtuqq2pd.") ||
2029 Name
== "avx512.mask.cvtqq2ps.256" ||
2030 Name
== "avx512.mask.cvtqq2ps.512" ||
2031 Name
== "avx512.mask.cvtuqq2ps.256" ||
2032 Name
== "avx512.mask.cvtuqq2ps.512" ||
2033 Name
== "sse2.cvtps2pd" ||
2034 Name
== "avx.cvt.ps2.pd.256" ||
2035 Name
== "avx512.mask.cvtps2pd.128" ||
2036 Name
== "avx512.mask.cvtps2pd.256")) {
2037 Type
*DstTy
= CI
->getType();
2038 Rep
= CI
->getArgOperand(0);
2039 Type
*SrcTy
= Rep
->getType();
2041 unsigned NumDstElts
= DstTy
->getVectorNumElements();
2042 if (NumDstElts
< SrcTy
->getVectorNumElements()) {
2043 assert(NumDstElts
== 2 && "Unexpected vector size");
2044 uint32_t ShuffleMask
[2] = { 0, 1 };
2045 Rep
= Builder
.CreateShuffleVector(Rep
, Rep
, ShuffleMask
);
2048 bool IsPS2PD
= SrcTy
->getVectorElementType()->isFloatTy();
2049 bool IsUnsigned
= (StringRef::npos
!= Name
.find("cvtu"));
2051 Rep
= Builder
.CreateFPExt(Rep
, DstTy
, "cvtps2pd");
2052 else if (CI
->getNumArgOperands() == 4 &&
2053 (!isa
<ConstantInt
>(CI
->getArgOperand(3)) ||
2054 cast
<ConstantInt
>(CI
->getArgOperand(3))->getZExtValue() != 4)) {
2055 Intrinsic::ID IID
= IsUnsigned
? Intrinsic::x86_avx512_uitofp_round
2056 : Intrinsic::x86_avx512_sitofp_round
;
2057 Function
*F
= Intrinsic::getDeclaration(CI
->getModule(), IID
,
2059 Rep
= Builder
.CreateCall(F
, { Rep
, CI
->getArgOperand(3) });
2061 Rep
= IsUnsigned
? Builder
.CreateUIToFP(Rep
, DstTy
, "cvt")
2062 : Builder
.CreateSIToFP(Rep
, DstTy
, "cvt");
2065 if (CI
->getNumArgOperands() >= 3)
2066 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(2), Rep
,
2067 CI
->getArgOperand(1));
2068 } else if (IsX86
&& (Name
.startswith("avx512.mask.loadu."))) {
2069 Rep
= UpgradeMaskedLoad(Builder
, CI
->getArgOperand(0),
2070 CI
->getArgOperand(1), CI
->getArgOperand(2),
2072 } else if (IsX86
&& (Name
.startswith("avx512.mask.load."))) {
2073 Rep
= UpgradeMaskedLoad(Builder
, CI
->getArgOperand(0),
2074 CI
->getArgOperand(1),CI
->getArgOperand(2),
2076 } else if (IsX86
&& Name
.startswith("avx512.mask.expand.load.")) {
2077 Type
*ResultTy
= CI
->getType();
2078 Type
*PtrTy
= ResultTy
->getVectorElementType();
2080 // Cast the pointer to element type.
2081 Value
*Ptr
= Builder
.CreateBitCast(CI
->getOperand(0),
2082 llvm::PointerType::getUnqual(PtrTy
));
2084 Value
*MaskVec
= getX86MaskVec(Builder
, CI
->getArgOperand(2),
2085 ResultTy
->getVectorNumElements());
2087 Function
*ELd
= Intrinsic::getDeclaration(F
->getParent(),
2088 Intrinsic::masked_expandload
,
2090 Rep
= Builder
.CreateCall(ELd
, { Ptr
, MaskVec
, CI
->getOperand(1) });
2091 } else if (IsX86
&& Name
.startswith("avx512.mask.compress.store.")) {
2092 Type
*ResultTy
= CI
->getArgOperand(1)->getType();
2093 Type
*PtrTy
= ResultTy
->getVectorElementType();
2095 // Cast the pointer to element type.
2096 Value
*Ptr
= Builder
.CreateBitCast(CI
->getOperand(0),
2097 llvm::PointerType::getUnqual(PtrTy
));
2099 Value
*MaskVec
= getX86MaskVec(Builder
, CI
->getArgOperand(2),
2100 ResultTy
->getVectorNumElements());
2102 Function
*CSt
= Intrinsic::getDeclaration(F
->getParent(),
2103 Intrinsic::masked_compressstore
,
2105 Rep
= Builder
.CreateCall(CSt
, { CI
->getArgOperand(1), Ptr
, MaskVec
});
2106 } else if (IsX86
&& (Name
.startswith("avx512.mask.compress.") ||
2107 Name
.startswith("avx512.mask.expand."))) {
2108 Type
*ResultTy
= CI
->getType();
2110 Value
*MaskVec
= getX86MaskVec(Builder
, CI
->getArgOperand(2),
2111 ResultTy
->getVectorNumElements());
2113 bool IsCompress
= Name
[12] == 'c';
2114 Intrinsic::ID IID
= IsCompress
? Intrinsic::x86_avx512_mask_compress
2115 : Intrinsic::x86_avx512_mask_expand
;
2116 Function
*Intr
= Intrinsic::getDeclaration(F
->getParent(), IID
, ResultTy
);
2117 Rep
= Builder
.CreateCall(Intr
, { CI
->getOperand(0), CI
->getOperand(1),
2119 } else if (IsX86
&& Name
.startswith("xop.vpcom")) {
2121 if (Name
.endswith("ub") || Name
.endswith("uw") || Name
.endswith("ud") ||
2122 Name
.endswith("uq"))
2124 else if (Name
.endswith("b") || Name
.endswith("w") || Name
.endswith("d") ||
2128 llvm_unreachable("Unknown suffix");
2131 if (CI
->getNumArgOperands() == 3) {
2132 Imm
= cast
<ConstantInt
>(CI
->getArgOperand(2))->getZExtValue();
2134 Name
= Name
.substr(9); // strip off "xop.vpcom"
2135 if (Name
.startswith("lt"))
2137 else if (Name
.startswith("le"))
2139 else if (Name
.startswith("gt"))
2141 else if (Name
.startswith("ge"))
2143 else if (Name
.startswith("eq"))
2145 else if (Name
.startswith("ne"))
2147 else if (Name
.startswith("false"))
2149 else if (Name
.startswith("true"))
2152 llvm_unreachable("Unknown condition");
2155 Rep
= upgradeX86vpcom(Builder
, *CI
, Imm
, IsSigned
);
2156 } else if (IsX86
&& Name
.startswith("xop.vpcmov")) {
2157 Value
*Sel
= CI
->getArgOperand(2);
2158 Value
*NotSel
= Builder
.CreateNot(Sel
);
2159 Value
*Sel0
= Builder
.CreateAnd(CI
->getArgOperand(0), Sel
);
2160 Value
*Sel1
= Builder
.CreateAnd(CI
->getArgOperand(1), NotSel
);
2161 Rep
= Builder
.CreateOr(Sel0
, Sel1
);
2162 } else if (IsX86
&& (Name
.startswith("xop.vprot") ||
2163 Name
.startswith("avx512.prol") ||
2164 Name
.startswith("avx512.mask.prol"))) {
2165 Rep
= upgradeX86Rotate(Builder
, *CI
, false);
2166 } else if (IsX86
&& (Name
.startswith("avx512.pror") ||
2167 Name
.startswith("avx512.mask.pror"))) {
2168 Rep
= upgradeX86Rotate(Builder
, *CI
, true);
2169 } else if (IsX86
&& (Name
.startswith("avx512.vpshld.") ||
2170 Name
.startswith("avx512.mask.vpshld") ||
2171 Name
.startswith("avx512.maskz.vpshld"))) {
2172 bool ZeroMask
= Name
[11] == 'z';
2173 Rep
= upgradeX86ConcatShift(Builder
, *CI
, false, ZeroMask
);
2174 } else if (IsX86
&& (Name
.startswith("avx512.vpshrd.") ||
2175 Name
.startswith("avx512.mask.vpshrd") ||
2176 Name
.startswith("avx512.maskz.vpshrd"))) {
2177 bool ZeroMask
= Name
[11] == 'z';
2178 Rep
= upgradeX86ConcatShift(Builder
, *CI
, true, ZeroMask
);
2179 } else if (IsX86
&& Name
== "sse42.crc32.64.8") {
2180 Function
*CRC32
= Intrinsic::getDeclaration(F
->getParent(),
2181 Intrinsic::x86_sse42_crc32_32_8
);
2182 Value
*Trunc0
= Builder
.CreateTrunc(CI
->getArgOperand(0), Type::getInt32Ty(C
));
2183 Rep
= Builder
.CreateCall(CRC32
, {Trunc0
, CI
->getArgOperand(1)});
2184 Rep
= Builder
.CreateZExt(Rep
, CI
->getType(), "");
2185 } else if (IsX86
&& (Name
.startswith("avx.vbroadcast.s") ||
2186 Name
.startswith("avx512.vbroadcast.s"))) {
2187 // Replace broadcasts with a series of insertelements.
2188 Type
*VecTy
= CI
->getType();
2189 Type
*EltTy
= VecTy
->getVectorElementType();
2190 unsigned EltNum
= VecTy
->getVectorNumElements();
2191 Value
*Cast
= Builder
.CreateBitCast(CI
->getArgOperand(0),
2192 EltTy
->getPointerTo());
2193 Value
*Load
= Builder
.CreateLoad(EltTy
, Cast
);
2194 Type
*I32Ty
= Type::getInt32Ty(C
);
2195 Rep
= UndefValue::get(VecTy
);
2196 for (unsigned I
= 0; I
< EltNum
; ++I
)
2197 Rep
= Builder
.CreateInsertElement(Rep
, Load
,
2198 ConstantInt::get(I32Ty
, I
));
2199 } else if (IsX86
&& (Name
.startswith("sse41.pmovsx") ||
2200 Name
.startswith("sse41.pmovzx") ||
2201 Name
.startswith("avx2.pmovsx") ||
2202 Name
.startswith("avx2.pmovzx") ||
2203 Name
.startswith("avx512.mask.pmovsx") ||
2204 Name
.startswith("avx512.mask.pmovzx"))) {
2205 VectorType
*SrcTy
= cast
<VectorType
>(CI
->getArgOperand(0)->getType());
2206 VectorType
*DstTy
= cast
<VectorType
>(CI
->getType());
2207 unsigned NumDstElts
= DstTy
->getNumElements();
2209 // Extract a subvector of the first NumDstElts lanes and sign/zero extend.
2210 SmallVector
<uint32_t, 8> ShuffleMask(NumDstElts
);
2211 for (unsigned i
= 0; i
!= NumDstElts
; ++i
)
2214 Value
*SV
= Builder
.CreateShuffleVector(
2215 CI
->getArgOperand(0), UndefValue::get(SrcTy
), ShuffleMask
);
2217 bool DoSext
= (StringRef::npos
!= Name
.find("pmovsx"));
2218 Rep
= DoSext
? Builder
.CreateSExt(SV
, DstTy
)
2219 : Builder
.CreateZExt(SV
, DstTy
);
2220 // If there are 3 arguments, it's a masked intrinsic so we need a select.
2221 if (CI
->getNumArgOperands() == 3)
2222 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(2), Rep
,
2223 CI
->getArgOperand(1));
2224 } else if (Name
== "avx512.mask.pmov.qd.256" ||
2225 Name
== "avx512.mask.pmov.qd.512" ||
2226 Name
== "avx512.mask.pmov.wb.256" ||
2227 Name
== "avx512.mask.pmov.wb.512") {
2228 Type
*Ty
= CI
->getArgOperand(1)->getType();
2229 Rep
= Builder
.CreateTrunc(CI
->getArgOperand(0), Ty
);
2230 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(2), Rep
,
2231 CI
->getArgOperand(1));
2232 } else if (IsX86
&& (Name
.startswith("avx.vbroadcastf128") ||
2233 Name
== "avx2.vbroadcasti128")) {
2234 // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.
2235 Type
*EltTy
= CI
->getType()->getVectorElementType();
2236 unsigned NumSrcElts
= 128 / EltTy
->getPrimitiveSizeInBits();
2237 Type
*VT
= VectorType::get(EltTy
, NumSrcElts
);
2238 Value
*Op
= Builder
.CreatePointerCast(CI
->getArgOperand(0),
2239 PointerType::getUnqual(VT
));
2240 Value
*Load
= Builder
.CreateAlignedLoad(VT
, Op
, 1);
2241 if (NumSrcElts
== 2)
2242 Rep
= Builder
.CreateShuffleVector(Load
, UndefValue::get(Load
->getType()),
2245 Rep
= Builder
.CreateShuffleVector(Load
, UndefValue::get(Load
->getType()),
2246 { 0, 1, 2, 3, 0, 1, 2, 3 });
2247 } else if (IsX86
&& (Name
.startswith("avx512.mask.shuf.i") ||
2248 Name
.startswith("avx512.mask.shuf.f"))) {
2249 unsigned Imm
= cast
<ConstantInt
>(CI
->getArgOperand(2))->getZExtValue();
2250 Type
*VT
= CI
->getType();
2251 unsigned NumLanes
= VT
->getPrimitiveSizeInBits() / 128;
2252 unsigned NumElementsInLane
= 128 / VT
->getScalarSizeInBits();
2253 unsigned ControlBitsMask
= NumLanes
- 1;
2254 unsigned NumControlBits
= NumLanes
/ 2;
2255 SmallVector
<uint32_t, 8> ShuffleMask(0);
2257 for (unsigned l
= 0; l
!= NumLanes
; ++l
) {
2258 unsigned LaneMask
= (Imm
>> (l
* NumControlBits
)) & ControlBitsMask
;
2259 // We actually need the other source.
2260 if (l
>= NumLanes
/ 2)
2261 LaneMask
+= NumLanes
;
2262 for (unsigned i
= 0; i
!= NumElementsInLane
; ++i
)
2263 ShuffleMask
.push_back(LaneMask
* NumElementsInLane
+ i
);
2265 Rep
= Builder
.CreateShuffleVector(CI
->getArgOperand(0),
2266 CI
->getArgOperand(1), ShuffleMask
);
2267 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(4), Rep
,
2268 CI
->getArgOperand(3));
2269 }else if (IsX86
&& (Name
.startswith("avx512.mask.broadcastf") ||
2270 Name
.startswith("avx512.mask.broadcasti"))) {
2271 unsigned NumSrcElts
=
2272 CI
->getArgOperand(0)->getType()->getVectorNumElements();
2273 unsigned NumDstElts
= CI
->getType()->getVectorNumElements();
2275 SmallVector
<uint32_t, 8> ShuffleMask(NumDstElts
);
2276 for (unsigned i
= 0; i
!= NumDstElts
; ++i
)
2277 ShuffleMask
[i
] = i
% NumSrcElts
;
2279 Rep
= Builder
.CreateShuffleVector(CI
->getArgOperand(0),
2280 CI
->getArgOperand(0),
2282 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(2), Rep
,
2283 CI
->getArgOperand(1));
2284 } else if (IsX86
&& (Name
.startswith("avx2.pbroadcast") ||
2285 Name
.startswith("avx2.vbroadcast") ||
2286 Name
.startswith("avx512.pbroadcast") ||
2287 Name
.startswith("avx512.mask.broadcast.s"))) {
2288 // Replace vp?broadcasts with a vector shuffle.
2289 Value
*Op
= CI
->getArgOperand(0);
2290 unsigned NumElts
= CI
->getType()->getVectorNumElements();
2291 Type
*MaskTy
= VectorType::get(Type::getInt32Ty(C
), NumElts
);
2292 Rep
= Builder
.CreateShuffleVector(Op
, UndefValue::get(Op
->getType()),
2293 Constant::getNullValue(MaskTy
));
2295 if (CI
->getNumArgOperands() == 3)
2296 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(2), Rep
,
2297 CI
->getArgOperand(1));
2298 } else if (IsX86
&& (Name
.startswith("sse2.padds.") ||
2299 Name
.startswith("sse2.psubs.") ||
2300 Name
.startswith("avx2.padds.") ||
2301 Name
.startswith("avx2.psubs.") ||
2302 Name
.startswith("avx512.padds.") ||
2303 Name
.startswith("avx512.psubs.") ||
2304 Name
.startswith("avx512.mask.padds.") ||
2305 Name
.startswith("avx512.mask.psubs."))) {
2306 bool IsAdd
= Name
.contains(".padds");
2307 Rep
= UpgradeX86AddSubSatIntrinsics(Builder
, *CI
, true, IsAdd
);
2308 } else if (IsX86
&& (Name
.startswith("sse2.paddus.") ||
2309 Name
.startswith("sse2.psubus.") ||
2310 Name
.startswith("avx2.paddus.") ||
2311 Name
.startswith("avx2.psubus.") ||
2312 Name
.startswith("avx512.mask.paddus.") ||
2313 Name
.startswith("avx512.mask.psubus."))) {
2314 bool IsAdd
= Name
.contains(".paddus");
2315 Rep
= UpgradeX86AddSubSatIntrinsics(Builder
, *CI
, false, IsAdd
);
2316 } else if (IsX86
&& Name
.startswith("avx512.mask.palignr.")) {
2317 Rep
= UpgradeX86ALIGNIntrinsics(Builder
, CI
->getArgOperand(0),
2318 CI
->getArgOperand(1),
2319 CI
->getArgOperand(2),
2320 CI
->getArgOperand(3),
2321 CI
->getArgOperand(4),
2323 } else if (IsX86
&& Name
.startswith("avx512.mask.valign.")) {
2324 Rep
= UpgradeX86ALIGNIntrinsics(Builder
, CI
->getArgOperand(0),
2325 CI
->getArgOperand(1),
2326 CI
->getArgOperand(2),
2327 CI
->getArgOperand(3),
2328 CI
->getArgOperand(4),
2330 } else if (IsX86
&& (Name
== "sse2.psll.dq" ||
2331 Name
== "avx2.psll.dq")) {
2332 // 128/256-bit shift left specified in bits.
2333 unsigned Shift
= cast
<ConstantInt
>(CI
->getArgOperand(1))->getZExtValue();
2334 Rep
= UpgradeX86PSLLDQIntrinsics(Builder
, CI
->getArgOperand(0),
2335 Shift
/ 8); // Shift is in bits.
2336 } else if (IsX86
&& (Name
== "sse2.psrl.dq" ||
2337 Name
== "avx2.psrl.dq")) {
2338 // 128/256-bit shift right specified in bits.
2339 unsigned Shift
= cast
<ConstantInt
>(CI
->getArgOperand(1))->getZExtValue();
2340 Rep
= UpgradeX86PSRLDQIntrinsics(Builder
, CI
->getArgOperand(0),
2341 Shift
/ 8); // Shift is in bits.
2342 } else if (IsX86
&& (Name
== "sse2.psll.dq.bs" ||
2343 Name
== "avx2.psll.dq.bs" ||
2344 Name
== "avx512.psll.dq.512")) {
2345 // 128/256/512-bit shift left specified in bytes.
2346 unsigned Shift
= cast
<ConstantInt
>(CI
->getArgOperand(1))->getZExtValue();
2347 Rep
= UpgradeX86PSLLDQIntrinsics(Builder
, CI
->getArgOperand(0), Shift
);
2348 } else if (IsX86
&& (Name
== "sse2.psrl.dq.bs" ||
2349 Name
== "avx2.psrl.dq.bs" ||
2350 Name
== "avx512.psrl.dq.512")) {
2351 // 128/256/512-bit shift right specified in bytes.
2352 unsigned Shift
= cast
<ConstantInt
>(CI
->getArgOperand(1))->getZExtValue();
2353 Rep
= UpgradeX86PSRLDQIntrinsics(Builder
, CI
->getArgOperand(0), Shift
);
2354 } else if (IsX86
&& (Name
== "sse41.pblendw" ||
2355 Name
.startswith("sse41.blendp") ||
2356 Name
.startswith("avx.blend.p") ||
2357 Name
== "avx2.pblendw" ||
2358 Name
.startswith("avx2.pblendd."))) {
2359 Value
*Op0
= CI
->getArgOperand(0);
2360 Value
*Op1
= CI
->getArgOperand(1);
2361 unsigned Imm
= cast
<ConstantInt
>(CI
->getArgOperand(2))->getZExtValue();
2362 VectorType
*VecTy
= cast
<VectorType
>(CI
->getType());
2363 unsigned NumElts
= VecTy
->getNumElements();
2365 SmallVector
<uint32_t, 16> Idxs(NumElts
);
2366 for (unsigned i
= 0; i
!= NumElts
; ++i
)
2367 Idxs
[i
] = ((Imm
>> (i
%8)) & 1) ? i
+ NumElts
: i
;
2369 Rep
= Builder
.CreateShuffleVector(Op0
, Op1
, Idxs
);
2370 } else if (IsX86
&& (Name
.startswith("avx.vinsertf128.") ||
2371 Name
== "avx2.vinserti128" ||
2372 Name
.startswith("avx512.mask.insert"))) {
2373 Value
*Op0
= CI
->getArgOperand(0);
2374 Value
*Op1
= CI
->getArgOperand(1);
2375 unsigned Imm
= cast
<ConstantInt
>(CI
->getArgOperand(2))->getZExtValue();
2376 unsigned DstNumElts
= CI
->getType()->getVectorNumElements();
2377 unsigned SrcNumElts
= Op1
->getType()->getVectorNumElements();
2378 unsigned Scale
= DstNumElts
/ SrcNumElts
;
2380 // Mask off the high bits of the immediate value; hardware ignores those.
2383 // Extend the second operand into a vector the size of the destination.
2384 Value
*UndefV
= UndefValue::get(Op1
->getType());
2385 SmallVector
<uint32_t, 8> Idxs(DstNumElts
);
2386 for (unsigned i
= 0; i
!= SrcNumElts
; ++i
)
2388 for (unsigned i
= SrcNumElts
; i
!= DstNumElts
; ++i
)
2389 Idxs
[i
] = SrcNumElts
;
2390 Rep
= Builder
.CreateShuffleVector(Op1
, UndefV
, Idxs
);
2392 // Insert the second operand into the first operand.
2394 // Note that there is no guarantee that instruction lowering will actually
2395 // produce a vinsertf128 instruction for the created shuffles. In
2396 // particular, the 0 immediate case involves no lane changes, so it can
2397 // be handled as a blend.
2399 // Example of shuffle mask for 32-bit elements:
2400 // Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
2401 // Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 >
2403 // First fill with identify mask.
2404 for (unsigned i
= 0; i
!= DstNumElts
; ++i
)
2406 // Then replace the elements where we need to insert.
2407 for (unsigned i
= 0; i
!= SrcNumElts
; ++i
)
2408 Idxs
[i
+ Imm
* SrcNumElts
] = i
+ DstNumElts
;
2409 Rep
= Builder
.CreateShuffleVector(Op0
, Rep
, Idxs
);
2411 // If the intrinsic has a mask operand, handle that.
2412 if (CI
->getNumArgOperands() == 5)
2413 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(4), Rep
,
2414 CI
->getArgOperand(3));
2415 } else if (IsX86
&& (Name
.startswith("avx.vextractf128.") ||
2416 Name
== "avx2.vextracti128" ||
2417 Name
.startswith("avx512.mask.vextract"))) {
2418 Value
*Op0
= CI
->getArgOperand(0);
2419 unsigned Imm
= cast
<ConstantInt
>(CI
->getArgOperand(1))->getZExtValue();
2420 unsigned DstNumElts
= CI
->getType()->getVectorNumElements();
2421 unsigned SrcNumElts
= Op0
->getType()->getVectorNumElements();
2422 unsigned Scale
= SrcNumElts
/ DstNumElts
;
2424 // Mask off the high bits of the immediate value; hardware ignores those.
2427 // Get indexes for the subvector of the input vector.
2428 SmallVector
<uint32_t, 8> Idxs(DstNumElts
);
2429 for (unsigned i
= 0; i
!= DstNumElts
; ++i
) {
2430 Idxs
[i
] = i
+ (Imm
* DstNumElts
);
2432 Rep
= Builder
.CreateShuffleVector(Op0
, Op0
, Idxs
);
2434 // If the intrinsic has a mask operand, handle that.
2435 if (CI
->getNumArgOperands() == 4)
2436 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
2437 CI
->getArgOperand(2));
2438 } else if (!IsX86
&& Name
== "stackprotectorcheck") {
2440 } else if (IsX86
&& (Name
.startswith("avx512.mask.perm.df.") ||
2441 Name
.startswith("avx512.mask.perm.di."))) {
2442 Value
*Op0
= CI
->getArgOperand(0);
2443 unsigned Imm
= cast
<ConstantInt
>(CI
->getArgOperand(1))->getZExtValue();
2444 VectorType
*VecTy
= cast
<VectorType
>(CI
->getType());
2445 unsigned NumElts
= VecTy
->getNumElements();
2447 SmallVector
<uint32_t, 8> Idxs(NumElts
);
2448 for (unsigned i
= 0; i
!= NumElts
; ++i
)
2449 Idxs
[i
] = (i
& ~0x3) + ((Imm
>> (2 * (i
& 0x3))) & 3);
2451 Rep
= Builder
.CreateShuffleVector(Op0
, Op0
, Idxs
);
2453 if (CI
->getNumArgOperands() == 4)
2454 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
2455 CI
->getArgOperand(2));
2456 } else if (IsX86
&& (Name
.startswith("avx.vperm2f128.") ||
2457 Name
== "avx2.vperm2i128")) {
2458 // The immediate permute control byte looks like this:
2459 // [1:0] - select 128 bits from sources for low half of destination
2461 // [3] - zero low half of destination
2462 // [5:4] - select 128 bits from sources for high half of destination
2464 // [7] - zero high half of destination
2466 uint8_t Imm
= cast
<ConstantInt
>(CI
->getArgOperand(2))->getZExtValue();
2468 unsigned NumElts
= CI
->getType()->getVectorNumElements();
2469 unsigned HalfSize
= NumElts
/ 2;
2470 SmallVector
<uint32_t, 8> ShuffleMask(NumElts
);
2472 // Determine which operand(s) are actually in use for this instruction.
2473 Value
*V0
= (Imm
& 0x02) ? CI
->getArgOperand(1) : CI
->getArgOperand(0);
2474 Value
*V1
= (Imm
& 0x20) ? CI
->getArgOperand(1) : CI
->getArgOperand(0);
2476 // If needed, replace operands based on zero mask.
2477 V0
= (Imm
& 0x08) ? ConstantAggregateZero::get(CI
->getType()) : V0
;
2478 V1
= (Imm
& 0x80) ? ConstantAggregateZero::get(CI
->getType()) : V1
;
2480 // Permute low half of result.
2481 unsigned StartIndex
= (Imm
& 0x01) ? HalfSize
: 0;
2482 for (unsigned i
= 0; i
< HalfSize
; ++i
)
2483 ShuffleMask
[i
] = StartIndex
+ i
;
2485 // Permute high half of result.
2486 StartIndex
= (Imm
& 0x10) ? HalfSize
: 0;
2487 for (unsigned i
= 0; i
< HalfSize
; ++i
)
2488 ShuffleMask
[i
+ HalfSize
] = NumElts
+ StartIndex
+ i
;
2490 Rep
= Builder
.CreateShuffleVector(V0
, V1
, ShuffleMask
);
2492 } else if (IsX86
&& (Name
.startswith("avx.vpermil.") ||
2493 Name
== "sse2.pshuf.d" ||
2494 Name
.startswith("avx512.mask.vpermil.p") ||
2495 Name
.startswith("avx512.mask.pshuf.d."))) {
2496 Value
*Op0
= CI
->getArgOperand(0);
2497 unsigned Imm
= cast
<ConstantInt
>(CI
->getArgOperand(1))->getZExtValue();
2498 VectorType
*VecTy
= cast
<VectorType
>(CI
->getType());
2499 unsigned NumElts
= VecTy
->getNumElements();
2500 // Calculate the size of each index in the immediate.
2501 unsigned IdxSize
= 64 / VecTy
->getScalarSizeInBits();
2502 unsigned IdxMask
= ((1 << IdxSize
) - 1);
2504 SmallVector
<uint32_t, 8> Idxs(NumElts
);
2505 // Lookup the bits for this element, wrapping around the immediate every
2506 // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need
2507 // to offset by the first index of each group.
2508 for (unsigned i
= 0; i
!= NumElts
; ++i
)
2509 Idxs
[i
] = ((Imm
>> ((i
* IdxSize
) % 8)) & IdxMask
) | (i
& ~IdxMask
);
2511 Rep
= Builder
.CreateShuffleVector(Op0
, Op0
, Idxs
);
2513 if (CI
->getNumArgOperands() == 4)
2514 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
2515 CI
->getArgOperand(2));
2516 } else if (IsX86
&& (Name
== "sse2.pshufl.w" ||
2517 Name
.startswith("avx512.mask.pshufl.w."))) {
2518 Value
*Op0
= CI
->getArgOperand(0);
2519 unsigned Imm
= cast
<ConstantInt
>(CI
->getArgOperand(1))->getZExtValue();
2520 unsigned NumElts
= CI
->getType()->getVectorNumElements();
2522 SmallVector
<uint32_t, 16> Idxs(NumElts
);
2523 for (unsigned l
= 0; l
!= NumElts
; l
+= 8) {
2524 for (unsigned i
= 0; i
!= 4; ++i
)
2525 Idxs
[i
+ l
] = ((Imm
>> (2 * i
)) & 0x3) + l
;
2526 for (unsigned i
= 4; i
!= 8; ++i
)
2527 Idxs
[i
+ l
] = i
+ l
;
2530 Rep
= Builder
.CreateShuffleVector(Op0
, Op0
, Idxs
);
2532 if (CI
->getNumArgOperands() == 4)
2533 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
2534 CI
->getArgOperand(2));
2535 } else if (IsX86
&& (Name
== "sse2.pshufh.w" ||
2536 Name
.startswith("avx512.mask.pshufh.w."))) {
2537 Value
*Op0
= CI
->getArgOperand(0);
2538 unsigned Imm
= cast
<ConstantInt
>(CI
->getArgOperand(1))->getZExtValue();
2539 unsigned NumElts
= CI
->getType()->getVectorNumElements();
2541 SmallVector
<uint32_t, 16> Idxs(NumElts
);
2542 for (unsigned l
= 0; l
!= NumElts
; l
+= 8) {
2543 for (unsigned i
= 0; i
!= 4; ++i
)
2544 Idxs
[i
+ l
] = i
+ l
;
2545 for (unsigned i
= 0; i
!= 4; ++i
)
2546 Idxs
[i
+ l
+ 4] = ((Imm
>> (2 * i
)) & 0x3) + 4 + l
;
2549 Rep
= Builder
.CreateShuffleVector(Op0
, Op0
, Idxs
);
2551 if (CI
->getNumArgOperands() == 4)
2552 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
2553 CI
->getArgOperand(2));
2554 } else if (IsX86
&& Name
.startswith("avx512.mask.shuf.p")) {
2555 Value
*Op0
= CI
->getArgOperand(0);
2556 Value
*Op1
= CI
->getArgOperand(1);
2557 unsigned Imm
= cast
<ConstantInt
>(CI
->getArgOperand(2))->getZExtValue();
2558 unsigned NumElts
= CI
->getType()->getVectorNumElements();
2560 unsigned NumLaneElts
= 128/CI
->getType()->getScalarSizeInBits();
2561 unsigned HalfLaneElts
= NumLaneElts
/ 2;
2563 SmallVector
<uint32_t, 16> Idxs(NumElts
);
2564 for (unsigned i
= 0; i
!= NumElts
; ++i
) {
2565 // Base index is the starting element of the lane.
2566 Idxs
[i
] = i
- (i
% NumLaneElts
);
2567 // If we are half way through the lane switch to the other source.
2568 if ((i
% NumLaneElts
) >= HalfLaneElts
)
2570 // Now select the specific element. By adding HalfLaneElts bits from
2571 // the immediate. Wrapping around the immediate every 8-bits.
2572 Idxs
[i
] += (Imm
>> ((i
* HalfLaneElts
) % 8)) & ((1 << HalfLaneElts
) - 1);
2575 Rep
= Builder
.CreateShuffleVector(Op0
, Op1
, Idxs
);
2577 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(4), Rep
,
2578 CI
->getArgOperand(3));
2579 } else if (IsX86
&& (Name
.startswith("avx512.mask.movddup") ||
2580 Name
.startswith("avx512.mask.movshdup") ||
2581 Name
.startswith("avx512.mask.movsldup"))) {
2582 Value
*Op0
= CI
->getArgOperand(0);
2583 unsigned NumElts
= CI
->getType()->getVectorNumElements();
2584 unsigned NumLaneElts
= 128/CI
->getType()->getScalarSizeInBits();
2586 unsigned Offset
= 0;
2587 if (Name
.startswith("avx512.mask.movshdup."))
2590 SmallVector
<uint32_t, 16> Idxs(NumElts
);
2591 for (unsigned l
= 0; l
!= NumElts
; l
+= NumLaneElts
)
2592 for (unsigned i
= 0; i
!= NumLaneElts
; i
+= 2) {
2593 Idxs
[i
+ l
+ 0] = i
+ l
+ Offset
;
2594 Idxs
[i
+ l
+ 1] = i
+ l
+ Offset
;
2597 Rep
= Builder
.CreateShuffleVector(Op0
, Op0
, Idxs
);
2599 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(2), Rep
,
2600 CI
->getArgOperand(1));
2601 } else if (IsX86
&& (Name
.startswith("avx512.mask.punpckl") ||
2602 Name
.startswith("avx512.mask.unpckl."))) {
2603 Value
*Op0
= CI
->getArgOperand(0);
2604 Value
*Op1
= CI
->getArgOperand(1);
2605 int NumElts
= CI
->getType()->getVectorNumElements();
2606 int NumLaneElts
= 128/CI
->getType()->getScalarSizeInBits();
2608 SmallVector
<uint32_t, 64> Idxs(NumElts
);
2609 for (int l
= 0; l
!= NumElts
; l
+= NumLaneElts
)
2610 for (int i
= 0; i
!= NumLaneElts
; ++i
)
2611 Idxs
[i
+ l
] = l
+ (i
/ 2) + NumElts
* (i
% 2);
2613 Rep
= Builder
.CreateShuffleVector(Op0
, Op1
, Idxs
);
2615 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
2616 CI
->getArgOperand(2));
2617 } else if (IsX86
&& (Name
.startswith("avx512.mask.punpckh") ||
2618 Name
.startswith("avx512.mask.unpckh."))) {
2619 Value
*Op0
= CI
->getArgOperand(0);
2620 Value
*Op1
= CI
->getArgOperand(1);
2621 int NumElts
= CI
->getType()->getVectorNumElements();
2622 int NumLaneElts
= 128/CI
->getType()->getScalarSizeInBits();
2624 SmallVector
<uint32_t, 64> Idxs(NumElts
);
2625 for (int l
= 0; l
!= NumElts
; l
+= NumLaneElts
)
2626 for (int i
= 0; i
!= NumLaneElts
; ++i
)
2627 Idxs
[i
+ l
] = (NumLaneElts
/ 2) + l
+ (i
/ 2) + NumElts
* (i
% 2);
2629 Rep
= Builder
.CreateShuffleVector(Op0
, Op1
, Idxs
);
2631 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
2632 CI
->getArgOperand(2));
2633 } else if (IsX86
&& (Name
.startswith("avx512.mask.and.") ||
2634 Name
.startswith("avx512.mask.pand."))) {
2635 VectorType
*FTy
= cast
<VectorType
>(CI
->getType());
2636 VectorType
*ITy
= VectorType::getInteger(FTy
);
2637 Rep
= Builder
.CreateAnd(Builder
.CreateBitCast(CI
->getArgOperand(0), ITy
),
2638 Builder
.CreateBitCast(CI
->getArgOperand(1), ITy
));
2639 Rep
= Builder
.CreateBitCast(Rep
, FTy
);
2640 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
2641 CI
->getArgOperand(2));
2642 } else if (IsX86
&& (Name
.startswith("avx512.mask.andn.") ||
2643 Name
.startswith("avx512.mask.pandn."))) {
2644 VectorType
*FTy
= cast
<VectorType
>(CI
->getType());
2645 VectorType
*ITy
= VectorType::getInteger(FTy
);
2646 Rep
= Builder
.CreateNot(Builder
.CreateBitCast(CI
->getArgOperand(0), ITy
));
2647 Rep
= Builder
.CreateAnd(Rep
,
2648 Builder
.CreateBitCast(CI
->getArgOperand(1), ITy
));
2649 Rep
= Builder
.CreateBitCast(Rep
, FTy
);
2650 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
2651 CI
->getArgOperand(2));
2652 } else if (IsX86
&& (Name
.startswith("avx512.mask.or.") ||
2653 Name
.startswith("avx512.mask.por."))) {
2654 VectorType
*FTy
= cast
<VectorType
>(CI
->getType());
2655 VectorType
*ITy
= VectorType::getInteger(FTy
);
2656 Rep
= Builder
.CreateOr(Builder
.CreateBitCast(CI
->getArgOperand(0), ITy
),
2657 Builder
.CreateBitCast(CI
->getArgOperand(1), ITy
));
2658 Rep
= Builder
.CreateBitCast(Rep
, FTy
);
2659 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
2660 CI
->getArgOperand(2));
2661 } else if (IsX86
&& (Name
.startswith("avx512.mask.xor.") ||
2662 Name
.startswith("avx512.mask.pxor."))) {
2663 VectorType
*FTy
= cast
<VectorType
>(CI
->getType());
2664 VectorType
*ITy
= VectorType::getInteger(FTy
);
2665 Rep
= Builder
.CreateXor(Builder
.CreateBitCast(CI
->getArgOperand(0), ITy
),
2666 Builder
.CreateBitCast(CI
->getArgOperand(1), ITy
));
2667 Rep
= Builder
.CreateBitCast(Rep
, FTy
);
2668 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
2669 CI
->getArgOperand(2));
2670 } else if (IsX86
&& Name
.startswith("avx512.mask.padd.")) {
2671 Rep
= Builder
.CreateAdd(CI
->getArgOperand(0), CI
->getArgOperand(1));
2672 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
2673 CI
->getArgOperand(2));
2674 } else if (IsX86
&& Name
.startswith("avx512.mask.psub.")) {
2675 Rep
= Builder
.CreateSub(CI
->getArgOperand(0), CI
->getArgOperand(1));
2676 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
2677 CI
->getArgOperand(2));
2678 } else if (IsX86
&& Name
.startswith("avx512.mask.pmull.")) {
2679 Rep
= Builder
.CreateMul(CI
->getArgOperand(0), CI
->getArgOperand(1));
2680 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
2681 CI
->getArgOperand(2));
2682 } else if (IsX86
&& Name
.startswith("avx512.mask.add.p")) {
2683 if (Name
.endswith(".512")) {
2685 if (Name
[17] == 's')
2686 IID
= Intrinsic::x86_avx512_add_ps_512
;
2688 IID
= Intrinsic::x86_avx512_add_pd_512
;
2690 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(F
->getParent(), IID
),
2691 { CI
->getArgOperand(0), CI
->getArgOperand(1),
2692 CI
->getArgOperand(4) });
2694 Rep
= Builder
.CreateFAdd(CI
->getArgOperand(0), CI
->getArgOperand(1));
2696 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
2697 CI
->getArgOperand(2));
2698 } else if (IsX86
&& Name
.startswith("avx512.mask.div.p")) {
2699 if (Name
.endswith(".512")) {
2701 if (Name
[17] == 's')
2702 IID
= Intrinsic::x86_avx512_div_ps_512
;
2704 IID
= Intrinsic::x86_avx512_div_pd_512
;
2706 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(F
->getParent(), IID
),
2707 { CI
->getArgOperand(0), CI
->getArgOperand(1),
2708 CI
->getArgOperand(4) });
2710 Rep
= Builder
.CreateFDiv(CI
->getArgOperand(0), CI
->getArgOperand(1));
2712 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
2713 CI
->getArgOperand(2));
2714 } else if (IsX86
&& Name
.startswith("avx512.mask.mul.p")) {
2715 if (Name
.endswith(".512")) {
2717 if (Name
[17] == 's')
2718 IID
= Intrinsic::x86_avx512_mul_ps_512
;
2720 IID
= Intrinsic::x86_avx512_mul_pd_512
;
2722 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(F
->getParent(), IID
),
2723 { CI
->getArgOperand(0), CI
->getArgOperand(1),
2724 CI
->getArgOperand(4) });
2726 Rep
= Builder
.CreateFMul(CI
->getArgOperand(0), CI
->getArgOperand(1));
2728 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
2729 CI
->getArgOperand(2));
2730 } else if (IsX86
&& Name
.startswith("avx512.mask.sub.p")) {
2731 if (Name
.endswith(".512")) {
2733 if (Name
[17] == 's')
2734 IID
= Intrinsic::x86_avx512_sub_ps_512
;
2736 IID
= Intrinsic::x86_avx512_sub_pd_512
;
2738 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(F
->getParent(), IID
),
2739 { CI
->getArgOperand(0), CI
->getArgOperand(1),
2740 CI
->getArgOperand(4) });
2742 Rep
= Builder
.CreateFSub(CI
->getArgOperand(0), CI
->getArgOperand(1));
2744 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
2745 CI
->getArgOperand(2));
2746 } else if (IsX86
&& (Name
.startswith("avx512.mask.max.p") ||
2747 Name
.startswith("avx512.mask.min.p")) &&
2748 Name
.drop_front(18) == ".512") {
2749 bool IsDouble
= Name
[17] == 'd';
2750 bool IsMin
= Name
[13] == 'i';
2751 static const Intrinsic::ID MinMaxTbl
[2][2] = {
2752 { Intrinsic::x86_avx512_max_ps_512
, Intrinsic::x86_avx512_max_pd_512
},
2753 { Intrinsic::x86_avx512_min_ps_512
, Intrinsic::x86_avx512_min_pd_512
}
2755 Intrinsic::ID IID
= MinMaxTbl
[IsMin
][IsDouble
];
2757 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(F
->getParent(), IID
),
2758 { CI
->getArgOperand(0), CI
->getArgOperand(1),
2759 CI
->getArgOperand(4) });
2760 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
2761 CI
->getArgOperand(2));
2762 } else if (IsX86
&& Name
.startswith("avx512.mask.lzcnt.")) {
2763 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(F
->getParent(),
2766 { CI
->getArgOperand(0), Builder
.getInt1(false) });
2767 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(2), Rep
,
2768 CI
->getArgOperand(1));
2769 } else if (IsX86
&& Name
.startswith("avx512.mask.psll")) {
2770 bool IsImmediate
= Name
[16] == 'i' ||
2771 (Name
.size() > 18 && Name
[18] == 'i');
2772 bool IsVariable
= Name
[16] == 'v';
2773 char Size
= Name
[16] == '.' ? Name
[17] :
2774 Name
[17] == '.' ? Name
[18] :
2775 Name
[18] == '.' ? Name
[19] :
2779 if (IsVariable
&& Name
[17] != '.') {
2780 if (Size
== 'd' && Name
[17] == '2') // avx512.mask.psllv2.di
2781 IID
= Intrinsic::x86_avx2_psllv_q
;
2782 else if (Size
== 'd' && Name
[17] == '4') // avx512.mask.psllv4.di
2783 IID
= Intrinsic::x86_avx2_psllv_q_256
;
2784 else if (Size
== 's' && Name
[17] == '4') // avx512.mask.psllv4.si
2785 IID
= Intrinsic::x86_avx2_psllv_d
;
2786 else if (Size
== 's' && Name
[17] == '8') // avx512.mask.psllv8.si
2787 IID
= Intrinsic::x86_avx2_psllv_d_256
;
2788 else if (Size
== 'h' && Name
[17] == '8') // avx512.mask.psllv8.hi
2789 IID
= Intrinsic::x86_avx512_psllv_w_128
;
2790 else if (Size
== 'h' && Name
[17] == '1') // avx512.mask.psllv16.hi
2791 IID
= Intrinsic::x86_avx512_psllv_w_256
;
2792 else if (Name
[17] == '3' && Name
[18] == '2') // avx512.mask.psllv32hi
2793 IID
= Intrinsic::x86_avx512_psllv_w_512
;
2795 llvm_unreachable("Unexpected size");
2796 } else if (Name
.endswith(".128")) {
2797 if (Size
== 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128
2798 IID
= IsImmediate
? Intrinsic::x86_sse2_pslli_d
2799 : Intrinsic::x86_sse2_psll_d
;
2800 else if (Size
== 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.128
2801 IID
= IsImmediate
? Intrinsic::x86_sse2_pslli_q
2802 : Intrinsic::x86_sse2_psll_q
;
2803 else if (Size
== 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.128
2804 IID
= IsImmediate
? Intrinsic::x86_sse2_pslli_w
2805 : Intrinsic::x86_sse2_psll_w
;
2807 llvm_unreachable("Unexpected size");
2808 } else if (Name
.endswith(".256")) {
2809 if (Size
== 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256
2810 IID
= IsImmediate
? Intrinsic::x86_avx2_pslli_d
2811 : Intrinsic::x86_avx2_psll_d
;
2812 else if (Size
== 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.256
2813 IID
= IsImmediate
? Intrinsic::x86_avx2_pslli_q
2814 : Intrinsic::x86_avx2_psll_q
;
2815 else if (Size
== 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.256
2816 IID
= IsImmediate
? Intrinsic::x86_avx2_pslli_w
2817 : Intrinsic::x86_avx2_psll_w
;
2819 llvm_unreachable("Unexpected size");
2821 if (Size
== 'd') // psll.di.512, pslli.d, psll.d, psllv.d.512
2822 IID
= IsImmediate
? Intrinsic::x86_avx512_pslli_d_512
:
2823 IsVariable
? Intrinsic::x86_avx512_psllv_d_512
:
2824 Intrinsic::x86_avx512_psll_d_512
;
2825 else if (Size
== 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.512
2826 IID
= IsImmediate
? Intrinsic::x86_avx512_pslli_q_512
:
2827 IsVariable
? Intrinsic::x86_avx512_psllv_q_512
:
2828 Intrinsic::x86_avx512_psll_q_512
;
2829 else if (Size
== 'w') // psll.wi.512, pslli.w, psll.w
2830 IID
= IsImmediate
? Intrinsic::x86_avx512_pslli_w_512
2831 : Intrinsic::x86_avx512_psll_w_512
;
2833 llvm_unreachable("Unexpected size");
2836 Rep
= UpgradeX86MaskedShift(Builder
, *CI
, IID
);
2837 } else if (IsX86
&& Name
.startswith("avx512.mask.psrl")) {
2838 bool IsImmediate
= Name
[16] == 'i' ||
2839 (Name
.size() > 18 && Name
[18] == 'i');
2840 bool IsVariable
= Name
[16] == 'v';
2841 char Size
= Name
[16] == '.' ? Name
[17] :
2842 Name
[17] == '.' ? Name
[18] :
2843 Name
[18] == '.' ? Name
[19] :
2847 if (IsVariable
&& Name
[17] != '.') {
2848 if (Size
== 'd' && Name
[17] == '2') // avx512.mask.psrlv2.di
2849 IID
= Intrinsic::x86_avx2_psrlv_q
;
2850 else if (Size
== 'd' && Name
[17] == '4') // avx512.mask.psrlv4.di
2851 IID
= Intrinsic::x86_avx2_psrlv_q_256
;
2852 else if (Size
== 's' && Name
[17] == '4') // avx512.mask.psrlv4.si
2853 IID
= Intrinsic::x86_avx2_psrlv_d
;
2854 else if (Size
== 's' && Name
[17] == '8') // avx512.mask.psrlv8.si
2855 IID
= Intrinsic::x86_avx2_psrlv_d_256
;
2856 else if (Size
== 'h' && Name
[17] == '8') // avx512.mask.psrlv8.hi
2857 IID
= Intrinsic::x86_avx512_psrlv_w_128
;
2858 else if (Size
== 'h' && Name
[17] == '1') // avx512.mask.psrlv16.hi
2859 IID
= Intrinsic::x86_avx512_psrlv_w_256
;
2860 else if (Name
[17] == '3' && Name
[18] == '2') // avx512.mask.psrlv32hi
2861 IID
= Intrinsic::x86_avx512_psrlv_w_512
;
2863 llvm_unreachable("Unexpected size");
2864 } else if (Name
.endswith(".128")) {
2865 if (Size
== 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128
2866 IID
= IsImmediate
? Intrinsic::x86_sse2_psrli_d
2867 : Intrinsic::x86_sse2_psrl_d
;
2868 else if (Size
== 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.128
2869 IID
= IsImmediate
? Intrinsic::x86_sse2_psrli_q
2870 : Intrinsic::x86_sse2_psrl_q
;
2871 else if (Size
== 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.128
2872 IID
= IsImmediate
? Intrinsic::x86_sse2_psrli_w
2873 : Intrinsic::x86_sse2_psrl_w
;
2875 llvm_unreachable("Unexpected size");
2876 } else if (Name
.endswith(".256")) {
2877 if (Size
== 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256
2878 IID
= IsImmediate
? Intrinsic::x86_avx2_psrli_d
2879 : Intrinsic::x86_avx2_psrl_d
;
2880 else if (Size
== 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.256
2881 IID
= IsImmediate
? Intrinsic::x86_avx2_psrli_q
2882 : Intrinsic::x86_avx2_psrl_q
;
2883 else if (Size
== 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.256
2884 IID
= IsImmediate
? Intrinsic::x86_avx2_psrli_w
2885 : Intrinsic::x86_avx2_psrl_w
;
2887 llvm_unreachable("Unexpected size");
2889 if (Size
== 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.512
2890 IID
= IsImmediate
? Intrinsic::x86_avx512_psrli_d_512
:
2891 IsVariable
? Intrinsic::x86_avx512_psrlv_d_512
:
2892 Intrinsic::x86_avx512_psrl_d_512
;
2893 else if (Size
== 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.512
2894 IID
= IsImmediate
? Intrinsic::x86_avx512_psrli_q_512
:
2895 IsVariable
? Intrinsic::x86_avx512_psrlv_q_512
:
2896 Intrinsic::x86_avx512_psrl_q_512
;
2897 else if (Size
== 'w') // psrl.wi.512, psrli.w, psrl.w)
2898 IID
= IsImmediate
? Intrinsic::x86_avx512_psrli_w_512
2899 : Intrinsic::x86_avx512_psrl_w_512
;
2901 llvm_unreachable("Unexpected size");
2904 Rep
= UpgradeX86MaskedShift(Builder
, *CI
, IID
);
2905 } else if (IsX86
&& Name
.startswith("avx512.mask.psra")) {
2906 bool IsImmediate
= Name
[16] == 'i' ||
2907 (Name
.size() > 18 && Name
[18] == 'i');
2908 bool IsVariable
= Name
[16] == 'v';
2909 char Size
= Name
[16] == '.' ? Name
[17] :
2910 Name
[17] == '.' ? Name
[18] :
2911 Name
[18] == '.' ? Name
[19] :
2915 if (IsVariable
&& Name
[17] != '.') {
2916 if (Size
== 's' && Name
[17] == '4') // avx512.mask.psrav4.si
2917 IID
= Intrinsic::x86_avx2_psrav_d
;
2918 else if (Size
== 's' && Name
[17] == '8') // avx512.mask.psrav8.si
2919 IID
= Intrinsic::x86_avx2_psrav_d_256
;
2920 else if (Size
== 'h' && Name
[17] == '8') // avx512.mask.psrav8.hi
2921 IID
= Intrinsic::x86_avx512_psrav_w_128
;
2922 else if (Size
== 'h' && Name
[17] == '1') // avx512.mask.psrav16.hi
2923 IID
= Intrinsic::x86_avx512_psrav_w_256
;
2924 else if (Name
[17] == '3' && Name
[18] == '2') // avx512.mask.psrav32hi
2925 IID
= Intrinsic::x86_avx512_psrav_w_512
;
2927 llvm_unreachable("Unexpected size");
2928 } else if (Name
.endswith(".128")) {
2929 if (Size
== 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128
2930 IID
= IsImmediate
? Intrinsic::x86_sse2_psrai_d
2931 : Intrinsic::x86_sse2_psra_d
;
2932 else if (Size
== 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.128
2933 IID
= IsImmediate
? Intrinsic::x86_avx512_psrai_q_128
:
2934 IsVariable
? Intrinsic::x86_avx512_psrav_q_128
:
2935 Intrinsic::x86_avx512_psra_q_128
;
2936 else if (Size
== 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.128
2937 IID
= IsImmediate
? Intrinsic::x86_sse2_psrai_w
2938 : Intrinsic::x86_sse2_psra_w
;
2940 llvm_unreachable("Unexpected size");
2941 } else if (Name
.endswith(".256")) {
2942 if (Size
== 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256
2943 IID
= IsImmediate
? Intrinsic::x86_avx2_psrai_d
2944 : Intrinsic::x86_avx2_psra_d
;
2945 else if (Size
== 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.256
2946 IID
= IsImmediate
? Intrinsic::x86_avx512_psrai_q_256
:
2947 IsVariable
? Intrinsic::x86_avx512_psrav_q_256
:
2948 Intrinsic::x86_avx512_psra_q_256
;
2949 else if (Size
== 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.256
2950 IID
= IsImmediate
? Intrinsic::x86_avx2_psrai_w
2951 : Intrinsic::x86_avx2_psra_w
;
2953 llvm_unreachable("Unexpected size");
2955 if (Size
== 'd') // psra.di.512, psrai.d, psra.d, psrav.d.512
2956 IID
= IsImmediate
? Intrinsic::x86_avx512_psrai_d_512
:
2957 IsVariable
? Intrinsic::x86_avx512_psrav_d_512
:
2958 Intrinsic::x86_avx512_psra_d_512
;
2959 else if (Size
== 'q') // psra.qi.512, psrai.q, psra.q
2960 IID
= IsImmediate
? Intrinsic::x86_avx512_psrai_q_512
:
2961 IsVariable
? Intrinsic::x86_avx512_psrav_q_512
:
2962 Intrinsic::x86_avx512_psra_q_512
;
2963 else if (Size
== 'w') // psra.wi.512, psrai.w, psra.w
2964 IID
= IsImmediate
? Intrinsic::x86_avx512_psrai_w_512
2965 : Intrinsic::x86_avx512_psra_w_512
;
2967 llvm_unreachable("Unexpected size");
2970 Rep
= UpgradeX86MaskedShift(Builder
, *CI
, IID
);
2971 } else if (IsX86
&& Name
.startswith("avx512.mask.move.s")) {
2972 Rep
= upgradeMaskedMove(Builder
, *CI
);
2973 } else if (IsX86
&& Name
.startswith("avx512.cvtmask2")) {
2974 Rep
= UpgradeMaskToInt(Builder
, *CI
);
2975 } else if (IsX86
&& Name
.endswith(".movntdqa")) {
2976 Module
*M
= F
->getParent();
2977 MDNode
*Node
= MDNode::get(
2978 C
, ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C
), 1)));
2980 Value
*Ptr
= CI
->getArgOperand(0);
2981 VectorType
*VTy
= cast
<VectorType
>(CI
->getType());
2983 // Convert the type of the pointer to a pointer to the stored type.
2985 Builder
.CreateBitCast(Ptr
, PointerType::getUnqual(VTy
), "cast");
2986 LoadInst
*LI
= Builder
.CreateAlignedLoad(VTy
, BC
, VTy
->getBitWidth() / 8);
2987 LI
->setMetadata(M
->getMDKindID("nontemporal"), Node
);
2989 } else if (IsX86
&& (Name
.startswith("fma.vfmadd.") ||
2990 Name
.startswith("fma.vfmsub.") ||
2991 Name
.startswith("fma.vfnmadd.") ||
2992 Name
.startswith("fma.vfnmsub."))) {
2993 bool NegMul
= Name
[6] == 'n';
2994 bool NegAcc
= NegMul
? Name
[8] == 's' : Name
[7] == 's';
2995 bool IsScalar
= NegMul
? Name
[12] == 's' : Name
[11] == 's';
2997 Value
*Ops
[] = { CI
->getArgOperand(0), CI
->getArgOperand(1),
2998 CI
->getArgOperand(2) };
3001 Ops
[0] = Builder
.CreateExtractElement(Ops
[0], (uint64_t)0);
3002 Ops
[1] = Builder
.CreateExtractElement(Ops
[1], (uint64_t)0);
3003 Ops
[2] = Builder
.CreateExtractElement(Ops
[2], (uint64_t)0);
3006 if (NegMul
&& !IsScalar
)
3007 Ops
[0] = Builder
.CreateFNeg(Ops
[0]);
3008 if (NegMul
&& IsScalar
)
3009 Ops
[1] = Builder
.CreateFNeg(Ops
[1]);
3011 Ops
[2] = Builder
.CreateFNeg(Ops
[2]);
3013 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(CI
->getModule(),
3019 Rep
= Builder
.CreateInsertElement(CI
->getArgOperand(0), Rep
,
3021 } else if (IsX86
&& Name
.startswith("fma4.vfmadd.s")) {
3022 Value
*Ops
[] = { CI
->getArgOperand(0), CI
->getArgOperand(1),
3023 CI
->getArgOperand(2) };
3025 Ops
[0] = Builder
.CreateExtractElement(Ops
[0], (uint64_t)0);
3026 Ops
[1] = Builder
.CreateExtractElement(Ops
[1], (uint64_t)0);
3027 Ops
[2] = Builder
.CreateExtractElement(Ops
[2], (uint64_t)0);
3029 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(CI
->getModule(),
3034 Rep
= Builder
.CreateInsertElement(Constant::getNullValue(CI
->getType()),
3036 } else if (IsX86
&& (Name
.startswith("avx512.mask.vfmadd.s") ||
3037 Name
.startswith("avx512.maskz.vfmadd.s") ||
3038 Name
.startswith("avx512.mask3.vfmadd.s") ||
3039 Name
.startswith("avx512.mask3.vfmsub.s") ||
3040 Name
.startswith("avx512.mask3.vfnmsub.s"))) {
3041 bool IsMask3
= Name
[11] == '3';
3042 bool IsMaskZ
= Name
[11] == 'z';
3043 // Drop the "avx512.mask." to make it easier.
3044 Name
= Name
.drop_front(IsMask3
|| IsMaskZ
? 13 : 12);
3045 bool NegMul
= Name
[2] == 'n';
3046 bool NegAcc
= NegMul
? Name
[4] == 's' : Name
[3] == 's';
3048 Value
*A
= CI
->getArgOperand(0);
3049 Value
*B
= CI
->getArgOperand(1);
3050 Value
*C
= CI
->getArgOperand(2);
3052 if (NegMul
&& (IsMask3
|| IsMaskZ
))
3053 A
= Builder
.CreateFNeg(A
);
3054 if (NegMul
&& !(IsMask3
|| IsMaskZ
))
3055 B
= Builder
.CreateFNeg(B
);
3057 C
= Builder
.CreateFNeg(C
);
3059 A
= Builder
.CreateExtractElement(A
, (uint64_t)0);
3060 B
= Builder
.CreateExtractElement(B
, (uint64_t)0);
3061 C
= Builder
.CreateExtractElement(C
, (uint64_t)0);
3063 if (!isa
<ConstantInt
>(CI
->getArgOperand(4)) ||
3064 cast
<ConstantInt
>(CI
->getArgOperand(4))->getZExtValue() != 4) {
3065 Value
*Ops
[] = { A
, B
, C
, CI
->getArgOperand(4) };
3068 if (Name
.back() == 'd')
3069 IID
= Intrinsic::x86_avx512_vfmadd_f64
;
3071 IID
= Intrinsic::x86_avx512_vfmadd_f32
;
3072 Function
*FMA
= Intrinsic::getDeclaration(CI
->getModule(), IID
);
3073 Rep
= Builder
.CreateCall(FMA
, Ops
);
3075 Function
*FMA
= Intrinsic::getDeclaration(CI
->getModule(),
3078 Rep
= Builder
.CreateCall(FMA
, { A
, B
, C
});
3081 Value
*PassThru
= IsMaskZ
? Constant::getNullValue(Rep
->getType()) :
3084 // For Mask3 with NegAcc, we need to create a new extractelement that
3085 // avoids the negation above.
3086 if (NegAcc
&& IsMask3
)
3087 PassThru
= Builder
.CreateExtractElement(CI
->getArgOperand(2),
3090 Rep
= EmitX86ScalarSelect(Builder
, CI
->getArgOperand(3),
3092 Rep
= Builder
.CreateInsertElement(CI
->getArgOperand(IsMask3
? 2 : 0),
3094 } else if (IsX86
&& (Name
.startswith("avx512.mask.vfmadd.p") ||
3095 Name
.startswith("avx512.mask.vfnmadd.p") ||
3096 Name
.startswith("avx512.mask.vfnmsub.p") ||
3097 Name
.startswith("avx512.mask3.vfmadd.p") ||
3098 Name
.startswith("avx512.mask3.vfmsub.p") ||
3099 Name
.startswith("avx512.mask3.vfnmsub.p") ||
3100 Name
.startswith("avx512.maskz.vfmadd.p"))) {
3101 bool IsMask3
= Name
[11] == '3';
3102 bool IsMaskZ
= Name
[11] == 'z';
3103 // Drop the "avx512.mask." to make it easier.
3104 Name
= Name
.drop_front(IsMask3
|| IsMaskZ
? 13 : 12);
3105 bool NegMul
= Name
[2] == 'n';
3106 bool NegAcc
= NegMul
? Name
[4] == 's' : Name
[3] == 's';
3108 Value
*A
= CI
->getArgOperand(0);
3109 Value
*B
= CI
->getArgOperand(1);
3110 Value
*C
= CI
->getArgOperand(2);
3112 if (NegMul
&& (IsMask3
|| IsMaskZ
))
3113 A
= Builder
.CreateFNeg(A
);
3114 if (NegMul
&& !(IsMask3
|| IsMaskZ
))
3115 B
= Builder
.CreateFNeg(B
);
3117 C
= Builder
.CreateFNeg(C
);
3119 if (CI
->getNumArgOperands() == 5 &&
3120 (!isa
<ConstantInt
>(CI
->getArgOperand(4)) ||
3121 cast
<ConstantInt
>(CI
->getArgOperand(4))->getZExtValue() != 4)) {
3123 // Check the character before ".512" in string.
3124 if (Name
[Name
.size()-5] == 's')
3125 IID
= Intrinsic::x86_avx512_vfmadd_ps_512
;
3127 IID
= Intrinsic::x86_avx512_vfmadd_pd_512
;
3129 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(F
->getParent(), IID
),
3130 { A
, B
, C
, CI
->getArgOperand(4) });
3132 Function
*FMA
= Intrinsic::getDeclaration(CI
->getModule(),
3135 Rep
= Builder
.CreateCall(FMA
, { A
, B
, C
});
3138 Value
*PassThru
= IsMaskZ
? llvm::Constant::getNullValue(CI
->getType()) :
3139 IsMask3
? CI
->getArgOperand(2) :
3140 CI
->getArgOperand(0);
3142 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
, PassThru
);
3143 } else if (IsX86
&& (Name
.startswith("fma.vfmaddsub.p") ||
3144 Name
.startswith("fma.vfmsubadd.p"))) {
3145 bool IsSubAdd
= Name
[7] == 's';
3146 int NumElts
= CI
->getType()->getVectorNumElements();
3148 Value
*Ops
[] = { CI
->getArgOperand(0), CI
->getArgOperand(1),
3149 CI
->getArgOperand(2) };
3151 Function
*FMA
= Intrinsic::getDeclaration(CI
->getModule(), Intrinsic::fma
,
3153 Value
*Odd
= Builder
.CreateCall(FMA
, Ops
);
3154 Ops
[2] = Builder
.CreateFNeg(Ops
[2]);
3155 Value
*Even
= Builder
.CreateCall(FMA
, Ops
);
3158 std::swap(Even
, Odd
);
3160 SmallVector
<uint32_t, 32> Idxs(NumElts
);
3161 for (int i
= 0; i
!= NumElts
; ++i
)
3162 Idxs
[i
] = i
+ (i
% 2) * NumElts
;
3164 Rep
= Builder
.CreateShuffleVector(Even
, Odd
, Idxs
);
3165 } else if (IsX86
&& (Name
.startswith("avx512.mask.vfmaddsub.p") ||
3166 Name
.startswith("avx512.mask3.vfmaddsub.p") ||
3167 Name
.startswith("avx512.maskz.vfmaddsub.p") ||
3168 Name
.startswith("avx512.mask3.vfmsubadd.p"))) {
3169 bool IsMask3
= Name
[11] == '3';
3170 bool IsMaskZ
= Name
[11] == 'z';
3171 // Drop the "avx512.mask." to make it easier.
3172 Name
= Name
.drop_front(IsMask3
|| IsMaskZ
? 13 : 12);
3173 bool IsSubAdd
= Name
[3] == 's';
3174 if (CI
->getNumArgOperands() == 5 &&
3175 (!isa
<ConstantInt
>(CI
->getArgOperand(4)) ||
3176 cast
<ConstantInt
>(CI
->getArgOperand(4))->getZExtValue() != 4)) {
3178 // Check the character before ".512" in string.
3179 if (Name
[Name
.size()-5] == 's')
3180 IID
= Intrinsic::x86_avx512_vfmaddsub_ps_512
;
3182 IID
= Intrinsic::x86_avx512_vfmaddsub_pd_512
;
3184 Value
*Ops
[] = { CI
->getArgOperand(0), CI
->getArgOperand(1),
3185 CI
->getArgOperand(2), CI
->getArgOperand(4) };
3187 Ops
[2] = Builder
.CreateFNeg(Ops
[2]);
3189 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(F
->getParent(), IID
),
3190 {CI
->getArgOperand(0), CI
->getArgOperand(1),
3191 CI
->getArgOperand(2), CI
->getArgOperand(4)});
3193 int NumElts
= CI
->getType()->getVectorNumElements();
3195 Value
*Ops
[] = { CI
->getArgOperand(0), CI
->getArgOperand(1),
3196 CI
->getArgOperand(2) };
3198 Function
*FMA
= Intrinsic::getDeclaration(CI
->getModule(), Intrinsic::fma
,
3200 Value
*Odd
= Builder
.CreateCall(FMA
, Ops
);
3201 Ops
[2] = Builder
.CreateFNeg(Ops
[2]);
3202 Value
*Even
= Builder
.CreateCall(FMA
, Ops
);
3205 std::swap(Even
, Odd
);
3207 SmallVector
<uint32_t, 32> Idxs(NumElts
);
3208 for (int i
= 0; i
!= NumElts
; ++i
)
3209 Idxs
[i
] = i
+ (i
% 2) * NumElts
;
3211 Rep
= Builder
.CreateShuffleVector(Even
, Odd
, Idxs
);
3214 Value
*PassThru
= IsMaskZ
? llvm::Constant::getNullValue(CI
->getType()) :
3215 IsMask3
? CI
->getArgOperand(2) :
3216 CI
->getArgOperand(0);
3218 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
, PassThru
);
3219 } else if (IsX86
&& (Name
.startswith("avx512.mask.pternlog.") ||
3220 Name
.startswith("avx512.maskz.pternlog."))) {
3221 bool ZeroMask
= Name
[11] == 'z';
3222 unsigned VecWidth
= CI
->getType()->getPrimitiveSizeInBits();
3223 unsigned EltWidth
= CI
->getType()->getScalarSizeInBits();
3225 if (VecWidth
== 128 && EltWidth
== 32)
3226 IID
= Intrinsic::x86_avx512_pternlog_d_128
;
3227 else if (VecWidth
== 256 && EltWidth
== 32)
3228 IID
= Intrinsic::x86_avx512_pternlog_d_256
;
3229 else if (VecWidth
== 512 && EltWidth
== 32)
3230 IID
= Intrinsic::x86_avx512_pternlog_d_512
;
3231 else if (VecWidth
== 128 && EltWidth
== 64)
3232 IID
= Intrinsic::x86_avx512_pternlog_q_128
;
3233 else if (VecWidth
== 256 && EltWidth
== 64)
3234 IID
= Intrinsic::x86_avx512_pternlog_q_256
;
3235 else if (VecWidth
== 512 && EltWidth
== 64)
3236 IID
= Intrinsic::x86_avx512_pternlog_q_512
;
3238 llvm_unreachable("Unexpected intrinsic");
3240 Value
*Args
[] = { CI
->getArgOperand(0) , CI
->getArgOperand(1),
3241 CI
->getArgOperand(2), CI
->getArgOperand(3) };
3242 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(CI
->getModule(), IID
),
3244 Value
*PassThru
= ZeroMask
? ConstantAggregateZero::get(CI
->getType())
3245 : CI
->getArgOperand(0);
3246 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(4), Rep
, PassThru
);
3247 } else if (IsX86
&& (Name
.startswith("avx512.mask.vpmadd52") ||
3248 Name
.startswith("avx512.maskz.vpmadd52"))) {
3249 bool ZeroMask
= Name
[11] == 'z';
3250 bool High
= Name
[20] == 'h' || Name
[21] == 'h';
3251 unsigned VecWidth
= CI
->getType()->getPrimitiveSizeInBits();
3253 if (VecWidth
== 128 && !High
)
3254 IID
= Intrinsic::x86_avx512_vpmadd52l_uq_128
;
3255 else if (VecWidth
== 256 && !High
)
3256 IID
= Intrinsic::x86_avx512_vpmadd52l_uq_256
;
3257 else if (VecWidth
== 512 && !High
)
3258 IID
= Intrinsic::x86_avx512_vpmadd52l_uq_512
;
3259 else if (VecWidth
== 128 && High
)
3260 IID
= Intrinsic::x86_avx512_vpmadd52h_uq_128
;
3261 else if (VecWidth
== 256 && High
)
3262 IID
= Intrinsic::x86_avx512_vpmadd52h_uq_256
;
3263 else if (VecWidth
== 512 && High
)
3264 IID
= Intrinsic::x86_avx512_vpmadd52h_uq_512
;
3266 llvm_unreachable("Unexpected intrinsic");
3268 Value
*Args
[] = { CI
->getArgOperand(0) , CI
->getArgOperand(1),
3269 CI
->getArgOperand(2) };
3270 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(CI
->getModule(), IID
),
3272 Value
*PassThru
= ZeroMask
? ConstantAggregateZero::get(CI
->getType())
3273 : CI
->getArgOperand(0);
3274 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
, PassThru
);
3275 } else if (IsX86
&& (Name
.startswith("avx512.mask.vpermi2var.") ||
3276 Name
.startswith("avx512.mask.vpermt2var.") ||
3277 Name
.startswith("avx512.maskz.vpermt2var."))) {
3278 bool ZeroMask
= Name
[11] == 'z';
3279 bool IndexForm
= Name
[17] == 'i';
3280 Rep
= UpgradeX86VPERMT2Intrinsics(Builder
, *CI
, ZeroMask
, IndexForm
);
3281 } else if (IsX86
&& (Name
.startswith("avx512.mask.vpdpbusd.") ||
3282 Name
.startswith("avx512.maskz.vpdpbusd.") ||
3283 Name
.startswith("avx512.mask.vpdpbusds.") ||
3284 Name
.startswith("avx512.maskz.vpdpbusds."))) {
3285 bool ZeroMask
= Name
[11] == 'z';
3286 bool IsSaturating
= Name
[ZeroMask
? 21 : 20] == 's';
3287 unsigned VecWidth
= CI
->getType()->getPrimitiveSizeInBits();
3289 if (VecWidth
== 128 && !IsSaturating
)
3290 IID
= Intrinsic::x86_avx512_vpdpbusd_128
;
3291 else if (VecWidth
== 256 && !IsSaturating
)
3292 IID
= Intrinsic::x86_avx512_vpdpbusd_256
;
3293 else if (VecWidth
== 512 && !IsSaturating
)
3294 IID
= Intrinsic::x86_avx512_vpdpbusd_512
;
3295 else if (VecWidth
== 128 && IsSaturating
)
3296 IID
= Intrinsic::x86_avx512_vpdpbusds_128
;
3297 else if (VecWidth
== 256 && IsSaturating
)
3298 IID
= Intrinsic::x86_avx512_vpdpbusds_256
;
3299 else if (VecWidth
== 512 && IsSaturating
)
3300 IID
= Intrinsic::x86_avx512_vpdpbusds_512
;
3302 llvm_unreachable("Unexpected intrinsic");
3304 Value
*Args
[] = { CI
->getArgOperand(0), CI
->getArgOperand(1),
3305 CI
->getArgOperand(2) };
3306 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(CI
->getModule(), IID
),
3308 Value
*PassThru
= ZeroMask
? ConstantAggregateZero::get(CI
->getType())
3309 : CI
->getArgOperand(0);
3310 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
, PassThru
);
3311 } else if (IsX86
&& (Name
.startswith("avx512.mask.vpdpwssd.") ||
3312 Name
.startswith("avx512.maskz.vpdpwssd.") ||
3313 Name
.startswith("avx512.mask.vpdpwssds.") ||
3314 Name
.startswith("avx512.maskz.vpdpwssds."))) {
3315 bool ZeroMask
= Name
[11] == 'z';
3316 bool IsSaturating
= Name
[ZeroMask
? 21 : 20] == 's';
3317 unsigned VecWidth
= CI
->getType()->getPrimitiveSizeInBits();
3319 if (VecWidth
== 128 && !IsSaturating
)
3320 IID
= Intrinsic::x86_avx512_vpdpwssd_128
;
3321 else if (VecWidth
== 256 && !IsSaturating
)
3322 IID
= Intrinsic::x86_avx512_vpdpwssd_256
;
3323 else if (VecWidth
== 512 && !IsSaturating
)
3324 IID
= Intrinsic::x86_avx512_vpdpwssd_512
;
3325 else if (VecWidth
== 128 && IsSaturating
)
3326 IID
= Intrinsic::x86_avx512_vpdpwssds_128
;
3327 else if (VecWidth
== 256 && IsSaturating
)
3328 IID
= Intrinsic::x86_avx512_vpdpwssds_256
;
3329 else if (VecWidth
== 512 && IsSaturating
)
3330 IID
= Intrinsic::x86_avx512_vpdpwssds_512
;
3332 llvm_unreachable("Unexpected intrinsic");
3334 Value
*Args
[] = { CI
->getArgOperand(0), CI
->getArgOperand(1),
3335 CI
->getArgOperand(2) };
3336 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(CI
->getModule(), IID
),
3338 Value
*PassThru
= ZeroMask
? ConstantAggregateZero::get(CI
->getType())
3339 : CI
->getArgOperand(0);
3340 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
, PassThru
);
3341 } else if (IsX86
&& (Name
== "addcarryx.u32" || Name
== "addcarryx.u64" ||
3342 Name
== "addcarry.u32" || Name
== "addcarry.u64" ||
3343 Name
== "subborrow.u32" || Name
== "subborrow.u64")) {
3345 if (Name
[0] == 'a' && Name
.back() == '2')
3346 IID
= Intrinsic::x86_addcarry_32
;
3347 else if (Name
[0] == 'a' && Name
.back() == '4')
3348 IID
= Intrinsic::x86_addcarry_64
;
3349 else if (Name
[0] == 's' && Name
.back() == '2')
3350 IID
= Intrinsic::x86_subborrow_32
;
3351 else if (Name
[0] == 's' && Name
.back() == '4')
3352 IID
= Intrinsic::x86_subborrow_64
;
3354 llvm_unreachable("Unexpected intrinsic");
3356 // Make a call with 3 operands.
3357 Value
*Args
[] = { CI
->getArgOperand(0), CI
->getArgOperand(1),
3358 CI
->getArgOperand(2)};
3359 Value
*NewCall
= Builder
.CreateCall(
3360 Intrinsic::getDeclaration(CI
->getModule(), IID
),
3363 // Extract the second result and store it.
3364 Value
*Data
= Builder
.CreateExtractValue(NewCall
, 1);
3365 // Cast the pointer to the right type.
3366 Value
*Ptr
= Builder
.CreateBitCast(CI
->getArgOperand(3),
3367 llvm::PointerType::getUnqual(Data
->getType()));
3368 Builder
.CreateAlignedStore(Data
, Ptr
, 1);
3369 // Replace the original call result with the first result of the new call.
3370 Value
*CF
= Builder
.CreateExtractValue(NewCall
, 0);
3372 CI
->replaceAllUsesWith(CF
);
3374 } else if (IsX86
&& Name
.startswith("avx512.mask.") &&
3375 upgradeAVX512MaskToSelect(Name
, Builder
, *CI
, Rep
)) {
3376 // Rep will be updated by the call in the condition.
3377 } else if (IsNVVM
&& (Name
== "abs.i" || Name
== "abs.ll")) {
3378 Value
*Arg
= CI
->getArgOperand(0);
3379 Value
*Neg
= Builder
.CreateNeg(Arg
, "neg");
3380 Value
*Cmp
= Builder
.CreateICmpSGE(
3381 Arg
, llvm::Constant::getNullValue(Arg
->getType()), "abs.cond");
3382 Rep
= Builder
.CreateSelect(Cmp
, Arg
, Neg
, "abs");
3383 } else if (IsNVVM
&& (Name
== "max.i" || Name
== "max.ll" ||
3384 Name
== "max.ui" || Name
== "max.ull")) {
3385 Value
*Arg0
= CI
->getArgOperand(0);
3386 Value
*Arg1
= CI
->getArgOperand(1);
3387 Value
*Cmp
= Name
.endswith(".ui") || Name
.endswith(".ull")
3388 ? Builder
.CreateICmpUGE(Arg0
, Arg1
, "max.cond")
3389 : Builder
.CreateICmpSGE(Arg0
, Arg1
, "max.cond");
3390 Rep
= Builder
.CreateSelect(Cmp
, Arg0
, Arg1
, "max");
3391 } else if (IsNVVM
&& (Name
== "min.i" || Name
== "min.ll" ||
3392 Name
== "min.ui" || Name
== "min.ull")) {
3393 Value
*Arg0
= CI
->getArgOperand(0);
3394 Value
*Arg1
= CI
->getArgOperand(1);
3395 Value
*Cmp
= Name
.endswith(".ui") || Name
.endswith(".ull")
3396 ? Builder
.CreateICmpULE(Arg0
, Arg1
, "min.cond")
3397 : Builder
.CreateICmpSLE(Arg0
, Arg1
, "min.cond");
3398 Rep
= Builder
.CreateSelect(Cmp
, Arg0
, Arg1
, "min");
3399 } else if (IsNVVM
&& Name
== "clz.ll") {
3400 // llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 and returns an i64.
3401 Value
*Arg
= CI
->getArgOperand(0);
3402 Value
*Ctlz
= Builder
.CreateCall(
3403 Intrinsic::getDeclaration(F
->getParent(), Intrinsic::ctlz
,
3405 {Arg
, Builder
.getFalse()}, "ctlz");
3406 Rep
= Builder
.CreateTrunc(Ctlz
, Builder
.getInt32Ty(), "ctlz.trunc");
3407 } else if (IsNVVM
&& Name
== "popc.ll") {
3408 // llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 and returns an
3410 Value
*Arg
= CI
->getArgOperand(0);
3411 Value
*Popc
= Builder
.CreateCall(
3412 Intrinsic::getDeclaration(F
->getParent(), Intrinsic::ctpop
,
3415 Rep
= Builder
.CreateTrunc(Popc
, Builder
.getInt32Ty(), "ctpop.trunc");
3416 } else if (IsNVVM
&& Name
== "h2f") {
3417 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(
3418 F
->getParent(), Intrinsic::convert_from_fp16
,
3419 {Builder
.getFloatTy()}),
3420 CI
->getArgOperand(0), "h2f");
3422 llvm_unreachable("Unknown function for CallInst upgrade.");
3426 CI
->replaceAllUsesWith(Rep
);
3427 CI
->eraseFromParent();
3431 const auto &DefaultCase
= [&NewFn
, &CI
]() -> void {
3432 // Handle generic mangling change, but nothing else
3434 (CI
->getCalledFunction()->getName() != NewFn
->getName()) &&
3435 "Unknown function for CallInst upgrade and isn't just a name change");
3436 CI
->setCalledFunction(NewFn
);
3438 CallInst
*NewCall
= nullptr;
3439 switch (NewFn
->getIntrinsicID()) {
3445 case Intrinsic::arm_neon_vld1
:
3446 case Intrinsic::arm_neon_vld2
:
3447 case Intrinsic::arm_neon_vld3
:
3448 case Intrinsic::arm_neon_vld4
:
3449 case Intrinsic::arm_neon_vld2lane
:
3450 case Intrinsic::arm_neon_vld3lane
:
3451 case Intrinsic::arm_neon_vld4lane
:
3452 case Intrinsic::arm_neon_vst1
:
3453 case Intrinsic::arm_neon_vst2
:
3454 case Intrinsic::arm_neon_vst3
:
3455 case Intrinsic::arm_neon_vst4
:
3456 case Intrinsic::arm_neon_vst2lane
:
3457 case Intrinsic::arm_neon_vst3lane
:
3458 case Intrinsic::arm_neon_vst4lane
: {
3459 SmallVector
<Value
*, 4> Args(CI
->arg_operands().begin(),
3460 CI
->arg_operands().end());
3461 NewCall
= Builder
.CreateCall(NewFn
, Args
);
3465 case Intrinsic::bitreverse
:
3466 NewCall
= Builder
.CreateCall(NewFn
, {CI
->getArgOperand(0)});
3469 case Intrinsic::ctlz
:
3470 case Intrinsic::cttz
:
3471 assert(CI
->getNumArgOperands() == 1 &&
3472 "Mismatch between function args and call args");
3474 Builder
.CreateCall(NewFn
, {CI
->getArgOperand(0), Builder
.getFalse()});
3477 case Intrinsic::objectsize
: {
3478 Value
*NullIsUnknownSize
= CI
->getNumArgOperands() == 2
3479 ? Builder
.getFalse()
3480 : CI
->getArgOperand(2);
3482 CI
->getNumArgOperands() < 4 ? Builder
.getFalse() : CI
->getArgOperand(3);
3483 NewCall
= Builder
.CreateCall(
3484 NewFn
, {CI
->getArgOperand(0), CI
->getArgOperand(1), NullIsUnknownSize
, Dynamic
});
3488 case Intrinsic::ctpop
:
3489 NewCall
= Builder
.CreateCall(NewFn
, {CI
->getArgOperand(0)});
3492 case Intrinsic::convert_from_fp16
:
3493 NewCall
= Builder
.CreateCall(NewFn
, {CI
->getArgOperand(0)});
3496 case Intrinsic::dbg_value
:
3497 // Upgrade from the old version that had an extra offset argument.
3498 assert(CI
->getNumArgOperands() == 4);
3499 // Drop nonzero offsets instead of attempting to upgrade them.
3500 if (auto *Offset
= dyn_cast_or_null
<Constant
>(CI
->getArgOperand(1)))
3501 if (Offset
->isZeroValue()) {
3502 NewCall
= Builder
.CreateCall(
3504 {CI
->getArgOperand(0), CI
->getArgOperand(2), CI
->getArgOperand(3)});
3507 CI
->eraseFromParent();
3510 case Intrinsic::x86_xop_vfrcz_ss
:
3511 case Intrinsic::x86_xop_vfrcz_sd
:
3512 NewCall
= Builder
.CreateCall(NewFn
, {CI
->getArgOperand(1)});
3515 case Intrinsic::x86_xop_vpermil2pd
:
3516 case Intrinsic::x86_xop_vpermil2ps
:
3517 case Intrinsic::x86_xop_vpermil2pd_256
:
3518 case Intrinsic::x86_xop_vpermil2ps_256
: {
3519 SmallVector
<Value
*, 4> Args(CI
->arg_operands().begin(),
3520 CI
->arg_operands().end());
3521 VectorType
*FltIdxTy
= cast
<VectorType
>(Args
[2]->getType());
3522 VectorType
*IntIdxTy
= VectorType::getInteger(FltIdxTy
);
3523 Args
[2] = Builder
.CreateBitCast(Args
[2], IntIdxTy
);
3524 NewCall
= Builder
.CreateCall(NewFn
, Args
);
3528 case Intrinsic::x86_sse41_ptestc
:
3529 case Intrinsic::x86_sse41_ptestz
:
3530 case Intrinsic::x86_sse41_ptestnzc
: {
3531 // The arguments for these intrinsics used to be v4f32, and changed
3532 // to v2i64. This is purely a nop, since those are bitwise intrinsics.
3533 // So, the only thing required is a bitcast for both arguments.
3534 // First, check the arguments have the old type.
3535 Value
*Arg0
= CI
->getArgOperand(0);
3536 if (Arg0
->getType() != VectorType::get(Type::getFloatTy(C
), 4))
3539 // Old intrinsic, add bitcasts
3540 Value
*Arg1
= CI
->getArgOperand(1);
3542 Type
*NewVecTy
= VectorType::get(Type::getInt64Ty(C
), 2);
3544 Value
*BC0
= Builder
.CreateBitCast(Arg0
, NewVecTy
, "cast");
3545 Value
*BC1
= Builder
.CreateBitCast(Arg1
, NewVecTy
, "cast");
3547 NewCall
= Builder
.CreateCall(NewFn
, {BC0
, BC1
});
3551 case Intrinsic::x86_rdtscp
: {
3552 // This used to take 1 arguments. If we have no arguments, it is already
3554 if (CI
->getNumOperands() == 0)
3557 NewCall
= Builder
.CreateCall(NewFn
);
3558 // Extract the second result and store it.
3559 Value
*Data
= Builder
.CreateExtractValue(NewCall
, 1);
3560 // Cast the pointer to the right type.
3561 Value
*Ptr
= Builder
.CreateBitCast(CI
->getArgOperand(0),
3562 llvm::PointerType::getUnqual(Data
->getType()));
3563 Builder
.CreateAlignedStore(Data
, Ptr
, 1);
3564 // Replace the original call result with the first result of the new call.
3565 Value
*TSC
= Builder
.CreateExtractValue(NewCall
, 0);
3567 std::string Name
= CI
->getName();
3568 if (!Name
.empty()) {
3569 CI
->setName(Name
+ ".old");
3570 NewCall
->setName(Name
);
3572 CI
->replaceAllUsesWith(TSC
);
3573 CI
->eraseFromParent();
3577 case Intrinsic::x86_sse41_insertps
:
3578 case Intrinsic::x86_sse41_dppd
:
3579 case Intrinsic::x86_sse41_dpps
:
3580 case Intrinsic::x86_sse41_mpsadbw
:
3581 case Intrinsic::x86_avx_dp_ps_256
:
3582 case Intrinsic::x86_avx2_mpsadbw
: {
3583 // Need to truncate the last argument from i32 to i8 -- this argument models
3584 // an inherently 8-bit immediate operand to these x86 instructions.
3585 SmallVector
<Value
*, 4> Args(CI
->arg_operands().begin(),
3586 CI
->arg_operands().end());
3588 // Replace the last argument with a trunc.
3589 Args
.back() = Builder
.CreateTrunc(Args
.back(), Type::getInt8Ty(C
), "trunc");
3590 NewCall
= Builder
.CreateCall(NewFn
, Args
);
3594 case Intrinsic::thread_pointer
: {
3595 NewCall
= Builder
.CreateCall(NewFn
, {});
3599 case Intrinsic::invariant_start
:
3600 case Intrinsic::invariant_end
:
3601 case Intrinsic::masked_load
:
3602 case Intrinsic::masked_store
:
3603 case Intrinsic::masked_gather
:
3604 case Intrinsic::masked_scatter
: {
3605 SmallVector
<Value
*, 4> Args(CI
->arg_operands().begin(),
3606 CI
->arg_operands().end());
3607 NewCall
= Builder
.CreateCall(NewFn
, Args
);
3611 case Intrinsic::memcpy
:
3612 case Intrinsic::memmove
:
3613 case Intrinsic::memset
: {
3614 // We have to make sure that the call signature is what we're expecting.
3615 // We only want to change the old signatures by removing the alignment arg:
3616 // @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i32, i1)
3617 // -> @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i1)
3618 // @llvm.memset...(i8*, i8, i[32|64], i32, i1)
3619 // -> @llvm.memset...(i8*, i8, i[32|64], i1)
3620 // Note: i8*'s in the above can be any pointer type
3621 if (CI
->getNumArgOperands() != 5) {
3625 // Remove alignment argument (3), and add alignment attributes to the
3626 // dest/src pointers.
3627 Value
*Args
[4] = {CI
->getArgOperand(0), CI
->getArgOperand(1),
3628 CI
->getArgOperand(2), CI
->getArgOperand(4)};
3629 NewCall
= Builder
.CreateCall(NewFn
, Args
);
3630 auto *MemCI
= cast
<MemIntrinsic
>(NewCall
);
3631 // All mem intrinsics support dest alignment.
3632 const ConstantInt
*Align
= cast
<ConstantInt
>(CI
->getArgOperand(3));
3633 MemCI
->setDestAlignment(Align
->getZExtValue());
3634 // Memcpy/Memmove also support source alignment.
3635 if (auto *MTI
= dyn_cast
<MemTransferInst
>(MemCI
))
3636 MTI
->setSourceAlignment(Align
->getZExtValue());
3640 assert(NewCall
&& "Should have either set this variable or returned through "
3641 "the default case");
3642 std::string Name
= CI
->getName();
3643 if (!Name
.empty()) {
3644 CI
->setName(Name
+ ".old");
3645 NewCall
->setName(Name
);
3647 CI
->replaceAllUsesWith(NewCall
);
3648 CI
->eraseFromParent();
3651 void llvm::UpgradeCallsToIntrinsic(Function
*F
) {
3652 assert(F
&& "Illegal attempt to upgrade a non-existent intrinsic.");
3654 // Check if this function should be upgraded and get the replacement function
3657 if (UpgradeIntrinsicFunction(F
, NewFn
)) {
3658 // Replace all users of the old function with the new function or new
3659 // instructions. This is not a range loop because the call is deleted.
3660 for (auto UI
= F
->user_begin(), UE
= F
->user_end(); UI
!= UE
; )
3661 if (CallInst
*CI
= dyn_cast
<CallInst
>(*UI
++))
3662 UpgradeIntrinsicCall(CI
, NewFn
);
3664 // Remove old function, no longer used, from the module.
3665 F
->eraseFromParent();
3669 MDNode
*llvm::UpgradeTBAANode(MDNode
&MD
) {
3670 // Check if the tag uses struct-path aware TBAA format.
3671 if (isa
<MDNode
>(MD
.getOperand(0)) && MD
.getNumOperands() >= 3)
3674 auto &Context
= MD
.getContext();
3675 if (MD
.getNumOperands() == 3) {
3676 Metadata
*Elts
[] = {MD
.getOperand(0), MD
.getOperand(1)};
3677 MDNode
*ScalarType
= MDNode::get(Context
, Elts
);
3678 // Create a MDNode <ScalarType, ScalarType, offset 0, const>
3679 Metadata
*Elts2
[] = {ScalarType
, ScalarType
,
3680 ConstantAsMetadata::get(
3681 Constant::getNullValue(Type::getInt64Ty(Context
))),
3683 return MDNode::get(Context
, Elts2
);
3685 // Create a MDNode <MD, MD, offset 0>
3686 Metadata
*Elts
[] = {&MD
, &MD
, ConstantAsMetadata::get(Constant::getNullValue(
3687 Type::getInt64Ty(Context
)))};
3688 return MDNode::get(Context
, Elts
);
3691 Instruction
*llvm::UpgradeBitCastInst(unsigned Opc
, Value
*V
, Type
*DestTy
,
3692 Instruction
*&Temp
) {
3693 if (Opc
!= Instruction::BitCast
)
3697 Type
*SrcTy
= V
->getType();
3698 if (SrcTy
->isPtrOrPtrVectorTy() && DestTy
->isPtrOrPtrVectorTy() &&
3699 SrcTy
->getPointerAddressSpace() != DestTy
->getPointerAddressSpace()) {
3700 LLVMContext
&Context
= V
->getContext();
3702 // We have no information about target data layout, so we assume that
3703 // the maximum pointer size is 64bit.
3704 Type
*MidTy
= Type::getInt64Ty(Context
);
3705 Temp
= CastInst::Create(Instruction::PtrToInt
, V
, MidTy
);
3707 return CastInst::Create(Instruction::IntToPtr
, Temp
, DestTy
);
3713 Value
*llvm::UpgradeBitCastExpr(unsigned Opc
, Constant
*C
, Type
*DestTy
) {
3714 if (Opc
!= Instruction::BitCast
)
3717 Type
*SrcTy
= C
->getType();
3718 if (SrcTy
->isPtrOrPtrVectorTy() && DestTy
->isPtrOrPtrVectorTy() &&
3719 SrcTy
->getPointerAddressSpace() != DestTy
->getPointerAddressSpace()) {
3720 LLVMContext
&Context
= C
->getContext();
3722 // We have no information about target data layout, so we assume that
3723 // the maximum pointer size is 64bit.
3724 Type
*MidTy
= Type::getInt64Ty(Context
);
3726 return ConstantExpr::getIntToPtr(ConstantExpr::getPtrToInt(C
, MidTy
),
3733 /// Check the debug info version number, if it is out-dated, drop the debug
3734 /// info. Return true if module is modified.
3735 bool llvm::UpgradeDebugInfo(Module
&M
) {
3736 unsigned Version
= getDebugMetadataVersionFromModule(M
);
3737 if (Version
== DEBUG_METADATA_VERSION
) {
3738 bool BrokenDebugInfo
= false;
3739 if (verifyModule(M
, &llvm::errs(), &BrokenDebugInfo
))
3740 report_fatal_error("Broken module found, compilation aborted!");
3741 if (!BrokenDebugInfo
)
3742 // Everything is ok.
3745 // Diagnose malformed debug info.
3746 DiagnosticInfoIgnoringInvalidDebugMetadata
Diag(M
);
3747 M
.getContext().diagnose(Diag
);
3750 bool Modified
= StripDebugInfo(M
);
3751 if (Modified
&& Version
!= DEBUG_METADATA_VERSION
) {
3752 // Diagnose a version mismatch.
3753 DiagnosticInfoDebugMetadataVersion
DiagVersion(M
, Version
);
3754 M
.getContext().diagnose(DiagVersion
);
3759 bool llvm::UpgradeRetainReleaseMarker(Module
&M
) {
3760 bool Changed
= false;
3761 const char *MarkerKey
= "clang.arc.retainAutoreleasedReturnValueMarker";
3762 NamedMDNode
*ModRetainReleaseMarker
= M
.getNamedMetadata(MarkerKey
);
3763 if (ModRetainReleaseMarker
) {
3764 MDNode
*Op
= ModRetainReleaseMarker
->getOperand(0);
3766 MDString
*ID
= dyn_cast_or_null
<MDString
>(Op
->getOperand(0));
3768 SmallVector
<StringRef
, 4> ValueComp
;
3769 ID
->getString().split(ValueComp
, "#");
3770 if (ValueComp
.size() == 2) {
3771 std::string NewValue
= ValueComp
[0].str() + ";" + ValueComp
[1].str();
3772 ID
= MDString::get(M
.getContext(), NewValue
);
3774 M
.addModuleFlag(Module::Error
, MarkerKey
, ID
);
3775 M
.eraseNamedMetadata(ModRetainReleaseMarker
);
3783 bool llvm::UpgradeModuleFlags(Module
&M
) {
3784 NamedMDNode
*ModFlags
= M
.getModuleFlagsMetadata();
3788 bool HasObjCFlag
= false, HasClassProperties
= false, Changed
= false;
3789 for (unsigned I
= 0, E
= ModFlags
->getNumOperands(); I
!= E
; ++I
) {
3790 MDNode
*Op
= ModFlags
->getOperand(I
);
3791 if (Op
->getNumOperands() != 3)
3793 MDString
*ID
= dyn_cast_or_null
<MDString
>(Op
->getOperand(1));
3796 if (ID
->getString() == "Objective-C Image Info Version")
3798 if (ID
->getString() == "Objective-C Class Properties")
3799 HasClassProperties
= true;
3800 // Upgrade PIC/PIE Module Flags. The module flag behavior for these two
3801 // field was Error and now they are Max.
3802 if (ID
->getString() == "PIC Level" || ID
->getString() == "PIE Level") {
3803 if (auto *Behavior
=
3804 mdconst::dyn_extract_or_null
<ConstantInt
>(Op
->getOperand(0))) {
3805 if (Behavior
->getLimitedValue() == Module::Error
) {
3806 Type
*Int32Ty
= Type::getInt32Ty(M
.getContext());
3807 Metadata
*Ops
[3] = {
3808 ConstantAsMetadata::get(ConstantInt::get(Int32Ty
, Module::Max
)),
3809 MDString::get(M
.getContext(), ID
->getString()),
3811 ModFlags
->setOperand(I
, MDNode::get(M
.getContext(), Ops
));
3816 // Upgrade Objective-C Image Info Section. Removed the whitespce in the
3817 // section name so that llvm-lto will not complain about mismatching
3818 // module flags that is functionally the same.
3819 if (ID
->getString() == "Objective-C Image Info Section") {
3820 if (auto *Value
= dyn_cast_or_null
<MDString
>(Op
->getOperand(2))) {
3821 SmallVector
<StringRef
, 4> ValueComp
;
3822 Value
->getString().split(ValueComp
, " ");
3823 if (ValueComp
.size() != 1) {
3824 std::string NewValue
;
3825 for (auto &S
: ValueComp
)
3826 NewValue
+= S
.str();
3827 Metadata
*Ops
[3] = {Op
->getOperand(0), Op
->getOperand(1),
3828 MDString::get(M
.getContext(), NewValue
)};
3829 ModFlags
->setOperand(I
, MDNode::get(M
.getContext(), Ops
));
3836 // "Objective-C Class Properties" is recently added for Objective-C. We
3837 // upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module
3838 // flag of value 0, so we can correclty downgrade this flag when trying to
3839 // link an ObjC bitcode without this module flag with an ObjC bitcode with
3840 // this module flag.
3841 if (HasObjCFlag
&& !HasClassProperties
) {
3842 M
.addModuleFlag(llvm::Module::Override
, "Objective-C Class Properties",
3850 void llvm::UpgradeSectionAttributes(Module
&M
) {
3851 auto TrimSpaces
= [](StringRef Section
) -> std::string
{
3852 SmallVector
<StringRef
, 5> Components
;
3853 Section
.split(Components
, ',');
3855 SmallString
<32> Buffer
;
3856 raw_svector_ostream
OS(Buffer
);
3858 for (auto Component
: Components
)
3859 OS
<< ',' << Component
.trim();
3861 return OS
.str().substr(1);
3864 for (auto &GV
: M
.globals()) {
3865 if (!GV
.hasSection())
3868 StringRef Section
= GV
.getSection();
3870 if (!Section
.startswith("__DATA, __objc_catlist"))
3873 // __DATA, __objc_catlist, regular, no_dead_strip
3874 // __DATA,__objc_catlist,regular,no_dead_strip
3875 GV
.setSection(TrimSpaces(Section
));
3879 static bool isOldLoopArgument(Metadata
*MD
) {
3880 auto *T
= dyn_cast_or_null
<MDTuple
>(MD
);
3883 if (T
->getNumOperands() < 1)
3885 auto *S
= dyn_cast_or_null
<MDString
>(T
->getOperand(0));
3888 return S
->getString().startswith("llvm.vectorizer.");
3891 static MDString
*upgradeLoopTag(LLVMContext
&C
, StringRef OldTag
) {
3892 StringRef OldPrefix
= "llvm.vectorizer.";
3893 assert(OldTag
.startswith(OldPrefix
) && "Expected old prefix");
3895 if (OldTag
== "llvm.vectorizer.unroll")
3896 return MDString::get(C
, "llvm.loop.interleave.count");
3898 return MDString::get(
3899 C
, (Twine("llvm.loop.vectorize.") + OldTag
.drop_front(OldPrefix
.size()))
3903 static Metadata
*upgradeLoopArgument(Metadata
*MD
) {
3904 auto *T
= dyn_cast_or_null
<MDTuple
>(MD
);
3907 if (T
->getNumOperands() < 1)
3909 auto *OldTag
= dyn_cast_or_null
<MDString
>(T
->getOperand(0));
3912 if (!OldTag
->getString().startswith("llvm.vectorizer."))
3915 // This has an old tag. Upgrade it.
3916 SmallVector
<Metadata
*, 8> Ops
;
3917 Ops
.reserve(T
->getNumOperands());
3918 Ops
.push_back(upgradeLoopTag(T
->getContext(), OldTag
->getString()));
3919 for (unsigned I
= 1, E
= T
->getNumOperands(); I
!= E
; ++I
)
3920 Ops
.push_back(T
->getOperand(I
));
3922 return MDTuple::get(T
->getContext(), Ops
);
3925 MDNode
*llvm::upgradeInstructionLoopAttachment(MDNode
&N
) {
3926 auto *T
= dyn_cast
<MDTuple
>(&N
);
3930 if (none_of(T
->operands(), isOldLoopArgument
))
3933 SmallVector
<Metadata
*, 8> Ops
;
3934 Ops
.reserve(T
->getNumOperands());
3935 for (Metadata
*MD
: T
->operands())
3936 Ops
.push_back(upgradeLoopArgument(MD
));
3938 return MDTuple::get(T
->getContext(), Ops
);