1 //===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements the auto-upgrade helper functions.
10 // This is where deprecated IR intrinsics and other IR features are updated to
11 // current specifications.
13 //===----------------------------------------------------------------------===//
15 #include "llvm/IR/AutoUpgrade.h"
16 #include "llvm/ADT/StringSwitch.h"
17 #include "llvm/IR/Constants.h"
18 #include "llvm/IR/DIBuilder.h"
19 #include "llvm/IR/DebugInfo.h"
20 #include "llvm/IR/DiagnosticInfo.h"
21 #include "llvm/IR/Function.h"
22 #include "llvm/IR/IRBuilder.h"
23 #include "llvm/IR/Instruction.h"
24 #include "llvm/IR/IntrinsicInst.h"
25 #include "llvm/IR/LLVMContext.h"
26 #include "llvm/IR/Module.h"
27 #include "llvm/IR/Verifier.h"
28 #include "llvm/Support/ErrorHandling.h"
29 #include "llvm/Support/Regex.h"
33 static void rename(GlobalValue
*GV
) { GV
->setName(GV
->getName() + ".old"); }
35 // Upgrade the declarations of the SSE4.1 ptest intrinsics whose arguments have
36 // changed their type from v4f32 to v2i64.
37 static bool UpgradePTESTIntrinsic(Function
* F
, Intrinsic::ID IID
,
39 // Check whether this is an old version of the function, which received
41 Type
*Arg0Type
= F
->getFunctionType()->getParamType(0);
42 if (Arg0Type
!= VectorType::get(Type::getFloatTy(F
->getContext()), 4))
45 // Yes, it's old, replace it with new version.
47 NewFn
= Intrinsic::getDeclaration(F
->getParent(), IID
);
51 // Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
52 // arguments have changed their type from i32 to i8.
53 static bool UpgradeX86IntrinsicsWith8BitMask(Function
*F
, Intrinsic::ID IID
,
55 // Check that the last argument is an i32.
56 Type
*LastArgType
= F
->getFunctionType()->getParamType(
57 F
->getFunctionType()->getNumParams() - 1);
58 if (!LastArgType
->isIntegerTy(32))
61 // Move this function aside and map down.
63 NewFn
= Intrinsic::getDeclaration(F
->getParent(), IID
);
67 static bool ShouldUpgradeX86Intrinsic(Function
*F
, StringRef Name
) {
68 // All of the intrinsics matches below should be marked with which llvm
69 // version started autoupgrading them. At some point in the future we would
70 // like to use this information to remove upgrade code for some older
71 // intrinsics. It is currently undecided how we will determine that future
73 if (Name
== "addcarryx.u32" || // Added in 8.0
74 Name
== "addcarryx.u64" || // Added in 8.0
75 Name
== "addcarry.u32" || // Added in 8.0
76 Name
== "addcarry.u64" || // Added in 8.0
77 Name
== "subborrow.u32" || // Added in 8.0
78 Name
== "subborrow.u64" || // Added in 8.0
79 Name
.startswith("sse2.padds.") || // Added in 8.0
80 Name
.startswith("sse2.psubs.") || // Added in 8.0
81 Name
.startswith("sse2.paddus.") || // Added in 8.0
82 Name
.startswith("sse2.psubus.") || // Added in 8.0
83 Name
.startswith("avx2.padds.") || // Added in 8.0
84 Name
.startswith("avx2.psubs.") || // Added in 8.0
85 Name
.startswith("avx2.paddus.") || // Added in 8.0
86 Name
.startswith("avx2.psubus.") || // Added in 8.0
87 Name
.startswith("avx512.padds.") || // Added in 8.0
88 Name
.startswith("avx512.psubs.") || // Added in 8.0
89 Name
.startswith("avx512.mask.padds.") || // Added in 8.0
90 Name
.startswith("avx512.mask.psubs.") || // Added in 8.0
91 Name
.startswith("avx512.mask.paddus.") || // Added in 8.0
92 Name
.startswith("avx512.mask.psubus.") || // Added in 8.0
93 Name
=="ssse3.pabs.b.128" || // Added in 6.0
94 Name
=="ssse3.pabs.w.128" || // Added in 6.0
95 Name
=="ssse3.pabs.d.128" || // Added in 6.0
96 Name
.startswith("fma4.vfmadd.s") || // Added in 7.0
97 Name
.startswith("fma.vfmadd.") || // Added in 7.0
98 Name
.startswith("fma.vfmsub.") || // Added in 7.0
99 Name
.startswith("fma.vfmaddsub.") || // Added in 7.0
100 Name
.startswith("fma.vfmsubadd.") || // Added in 7.0
101 Name
.startswith("fma.vfnmadd.") || // Added in 7.0
102 Name
.startswith("fma.vfnmsub.") || // Added in 7.0
103 Name
.startswith("avx512.mask.vfmadd.") || // Added in 7.0
104 Name
.startswith("avx512.mask.vfnmadd.") || // Added in 7.0
105 Name
.startswith("avx512.mask.vfnmsub.") || // Added in 7.0
106 Name
.startswith("avx512.mask3.vfmadd.") || // Added in 7.0
107 Name
.startswith("avx512.maskz.vfmadd.") || // Added in 7.0
108 Name
.startswith("avx512.mask3.vfmsub.") || // Added in 7.0
109 Name
.startswith("avx512.mask3.vfnmsub.") || // Added in 7.0
110 Name
.startswith("avx512.mask.vfmaddsub.") || // Added in 7.0
111 Name
.startswith("avx512.maskz.vfmaddsub.") || // Added in 7.0
112 Name
.startswith("avx512.mask3.vfmaddsub.") || // Added in 7.0
113 Name
.startswith("avx512.mask3.vfmsubadd.") || // Added in 7.0
114 Name
.startswith("avx512.mask.shuf.i") || // Added in 6.0
115 Name
.startswith("avx512.mask.shuf.f") || // Added in 6.0
116 Name
.startswith("avx512.kunpck") || //added in 6.0
117 Name
.startswith("avx2.pabs.") || // Added in 6.0
118 Name
.startswith("avx512.mask.pabs.") || // Added in 6.0
119 Name
.startswith("avx512.broadcastm") || // Added in 6.0
120 Name
== "sse.sqrt.ss" || // Added in 7.0
121 Name
== "sse2.sqrt.sd" || // Added in 7.0
122 Name
.startswith("avx512.mask.sqrt.p") || // Added in 7.0
123 Name
.startswith("avx.sqrt.p") || // Added in 7.0
124 Name
.startswith("sse2.sqrt.p") || // Added in 7.0
125 Name
.startswith("sse.sqrt.p") || // Added in 7.0
126 Name
.startswith("avx512.mask.pbroadcast") || // Added in 6.0
127 Name
.startswith("sse2.pcmpeq.") || // Added in 3.1
128 Name
.startswith("sse2.pcmpgt.") || // Added in 3.1
129 Name
.startswith("avx2.pcmpeq.") || // Added in 3.1
130 Name
.startswith("avx2.pcmpgt.") || // Added in 3.1
131 Name
.startswith("avx512.mask.pcmpeq.") || // Added in 3.9
132 Name
.startswith("avx512.mask.pcmpgt.") || // Added in 3.9
133 Name
.startswith("avx.vperm2f128.") || // Added in 6.0
134 Name
== "avx2.vperm2i128" || // Added in 6.0
135 Name
== "sse.add.ss" || // Added in 4.0
136 Name
== "sse2.add.sd" || // Added in 4.0
137 Name
== "sse.sub.ss" || // Added in 4.0
138 Name
== "sse2.sub.sd" || // Added in 4.0
139 Name
== "sse.mul.ss" || // Added in 4.0
140 Name
== "sse2.mul.sd" || // Added in 4.0
141 Name
== "sse.div.ss" || // Added in 4.0
142 Name
== "sse2.div.sd" || // Added in 4.0
143 Name
== "sse41.pmaxsb" || // Added in 3.9
144 Name
== "sse2.pmaxs.w" || // Added in 3.9
145 Name
== "sse41.pmaxsd" || // Added in 3.9
146 Name
== "sse2.pmaxu.b" || // Added in 3.9
147 Name
== "sse41.pmaxuw" || // Added in 3.9
148 Name
== "sse41.pmaxud" || // Added in 3.9
149 Name
== "sse41.pminsb" || // Added in 3.9
150 Name
== "sse2.pmins.w" || // Added in 3.9
151 Name
== "sse41.pminsd" || // Added in 3.9
152 Name
== "sse2.pminu.b" || // Added in 3.9
153 Name
== "sse41.pminuw" || // Added in 3.9
154 Name
== "sse41.pminud" || // Added in 3.9
155 Name
== "avx512.kand.w" || // Added in 7.0
156 Name
== "avx512.kandn.w" || // Added in 7.0
157 Name
== "avx512.knot.w" || // Added in 7.0
158 Name
== "avx512.kor.w" || // Added in 7.0
159 Name
== "avx512.kxor.w" || // Added in 7.0
160 Name
== "avx512.kxnor.w" || // Added in 7.0
161 Name
== "avx512.kortestc.w" || // Added in 7.0
162 Name
== "avx512.kortestz.w" || // Added in 7.0
163 Name
.startswith("avx512.mask.pshuf.b.") || // Added in 4.0
164 Name
.startswith("avx2.pmax") || // Added in 3.9
165 Name
.startswith("avx2.pmin") || // Added in 3.9
166 Name
.startswith("avx512.mask.pmax") || // Added in 4.0
167 Name
.startswith("avx512.mask.pmin") || // Added in 4.0
168 Name
.startswith("avx2.vbroadcast") || // Added in 3.8
169 Name
.startswith("avx2.pbroadcast") || // Added in 3.8
170 Name
.startswith("avx.vpermil.") || // Added in 3.1
171 Name
.startswith("sse2.pshuf") || // Added in 3.9
172 Name
.startswith("avx512.pbroadcast") || // Added in 3.9
173 Name
.startswith("avx512.mask.broadcast.s") || // Added in 3.9
174 Name
.startswith("avx512.mask.movddup") || // Added in 3.9
175 Name
.startswith("avx512.mask.movshdup") || // Added in 3.9
176 Name
.startswith("avx512.mask.movsldup") || // Added in 3.9
177 Name
.startswith("avx512.mask.pshuf.d.") || // Added in 3.9
178 Name
.startswith("avx512.mask.pshufl.w.") || // Added in 3.9
179 Name
.startswith("avx512.mask.pshufh.w.") || // Added in 3.9
180 Name
.startswith("avx512.mask.shuf.p") || // Added in 4.0
181 Name
.startswith("avx512.mask.vpermil.p") || // Added in 3.9
182 Name
.startswith("avx512.mask.perm.df.") || // Added in 3.9
183 Name
.startswith("avx512.mask.perm.di.") || // Added in 3.9
184 Name
.startswith("avx512.mask.punpckl") || // Added in 3.9
185 Name
.startswith("avx512.mask.punpckh") || // Added in 3.9
186 Name
.startswith("avx512.mask.unpckl.") || // Added in 3.9
187 Name
.startswith("avx512.mask.unpckh.") || // Added in 3.9
188 Name
.startswith("avx512.mask.pand.") || // Added in 3.9
189 Name
.startswith("avx512.mask.pandn.") || // Added in 3.9
190 Name
.startswith("avx512.mask.por.") || // Added in 3.9
191 Name
.startswith("avx512.mask.pxor.") || // Added in 3.9
192 Name
.startswith("avx512.mask.and.") || // Added in 3.9
193 Name
.startswith("avx512.mask.andn.") || // Added in 3.9
194 Name
.startswith("avx512.mask.or.") || // Added in 3.9
195 Name
.startswith("avx512.mask.xor.") || // Added in 3.9
196 Name
.startswith("avx512.mask.padd.") || // Added in 4.0
197 Name
.startswith("avx512.mask.psub.") || // Added in 4.0
198 Name
.startswith("avx512.mask.pmull.") || // Added in 4.0
199 Name
.startswith("avx512.mask.cvtdq2pd.") || // Added in 4.0
200 Name
.startswith("avx512.mask.cvtudq2pd.") || // Added in 4.0
201 Name
.startswith("avx512.mask.cvtudq2ps.") || // Added in 7.0 updated 9.0
202 Name
.startswith("avx512.mask.cvtqq2pd.") || // Added in 7.0 updated 9.0
203 Name
.startswith("avx512.mask.cvtuqq2pd.") || // Added in 7.0 updated 9.0
204 Name
.startswith("avx512.mask.cvtdq2ps.") || // Added in 7.0 updated 9.0
205 Name
== "avx512.mask.cvtqq2ps.256" || // Added in 9.0
206 Name
== "avx512.mask.cvtqq2ps.512" || // Added in 9.0
207 Name
== "avx512.mask.cvtuqq2ps.256" || // Added in 9.0
208 Name
== "avx512.mask.cvtuqq2ps.512" || // Added in 9.0
209 Name
== "avx512.mask.cvtpd2dq.256" || // Added in 7.0
210 Name
== "avx512.mask.cvtpd2ps.256" || // Added in 7.0
211 Name
== "avx512.mask.cvttpd2dq.256" || // Added in 7.0
212 Name
== "avx512.mask.cvttps2dq.128" || // Added in 7.0
213 Name
== "avx512.mask.cvttps2dq.256" || // Added in 7.0
214 Name
== "avx512.mask.cvtps2pd.128" || // Added in 7.0
215 Name
== "avx512.mask.cvtps2pd.256" || // Added in 7.0
216 Name
== "avx512.cvtusi2sd" || // Added in 7.0
217 Name
.startswith("avx512.mask.permvar.") || // Added in 7.0
218 Name
== "sse2.pmulu.dq" || // Added in 7.0
219 Name
== "sse41.pmuldq" || // Added in 7.0
220 Name
== "avx2.pmulu.dq" || // Added in 7.0
221 Name
== "avx2.pmul.dq" || // Added in 7.0
222 Name
== "avx512.pmulu.dq.512" || // Added in 7.0
223 Name
== "avx512.pmul.dq.512" || // Added in 7.0
224 Name
.startswith("avx512.mask.pmul.dq.") || // Added in 4.0
225 Name
.startswith("avx512.mask.pmulu.dq.") || // Added in 4.0
226 Name
.startswith("avx512.mask.pmul.hr.sw.") || // Added in 7.0
227 Name
.startswith("avx512.mask.pmulh.w.") || // Added in 7.0
228 Name
.startswith("avx512.mask.pmulhu.w.") || // Added in 7.0
229 Name
.startswith("avx512.mask.pmaddw.d.") || // Added in 7.0
230 Name
.startswith("avx512.mask.pmaddubs.w.") || // Added in 7.0
231 Name
.startswith("avx512.mask.packsswb.") || // Added in 5.0
232 Name
.startswith("avx512.mask.packssdw.") || // Added in 5.0
233 Name
.startswith("avx512.mask.packuswb.") || // Added in 5.0
234 Name
.startswith("avx512.mask.packusdw.") || // Added in 5.0
235 Name
.startswith("avx512.mask.cmp.b") || // Added in 5.0
236 Name
.startswith("avx512.mask.cmp.d") || // Added in 5.0
237 Name
.startswith("avx512.mask.cmp.q") || // Added in 5.0
238 Name
.startswith("avx512.mask.cmp.w") || // Added in 5.0
239 Name
.startswith("avx512.mask.cmp.p") || // Added in 7.0
240 Name
.startswith("avx512.mask.ucmp.") || // Added in 5.0
241 Name
.startswith("avx512.cvtb2mask.") || // Added in 7.0
242 Name
.startswith("avx512.cvtw2mask.") || // Added in 7.0
243 Name
.startswith("avx512.cvtd2mask.") || // Added in 7.0
244 Name
.startswith("avx512.cvtq2mask.") || // Added in 7.0
245 Name
.startswith("avx512.mask.vpermilvar.") || // Added in 4.0
246 Name
.startswith("avx512.mask.psll.d") || // Added in 4.0
247 Name
.startswith("avx512.mask.psll.q") || // Added in 4.0
248 Name
.startswith("avx512.mask.psll.w") || // Added in 4.0
249 Name
.startswith("avx512.mask.psra.d") || // Added in 4.0
250 Name
.startswith("avx512.mask.psra.q") || // Added in 4.0
251 Name
.startswith("avx512.mask.psra.w") || // Added in 4.0
252 Name
.startswith("avx512.mask.psrl.d") || // Added in 4.0
253 Name
.startswith("avx512.mask.psrl.q") || // Added in 4.0
254 Name
.startswith("avx512.mask.psrl.w") || // Added in 4.0
255 Name
.startswith("avx512.mask.pslli") || // Added in 4.0
256 Name
.startswith("avx512.mask.psrai") || // Added in 4.0
257 Name
.startswith("avx512.mask.psrli") || // Added in 4.0
258 Name
.startswith("avx512.mask.psllv") || // Added in 4.0
259 Name
.startswith("avx512.mask.psrav") || // Added in 4.0
260 Name
.startswith("avx512.mask.psrlv") || // Added in 4.0
261 Name
.startswith("sse41.pmovsx") || // Added in 3.8
262 Name
.startswith("sse41.pmovzx") || // Added in 3.9
263 Name
.startswith("avx2.pmovsx") || // Added in 3.9
264 Name
.startswith("avx2.pmovzx") || // Added in 3.9
265 Name
.startswith("avx512.mask.pmovsx") || // Added in 4.0
266 Name
.startswith("avx512.mask.pmovzx") || // Added in 4.0
267 Name
.startswith("avx512.mask.lzcnt.") || // Added in 5.0
268 Name
.startswith("avx512.mask.pternlog.") || // Added in 7.0
269 Name
.startswith("avx512.maskz.pternlog.") || // Added in 7.0
270 Name
.startswith("avx512.mask.vpmadd52") || // Added in 7.0
271 Name
.startswith("avx512.maskz.vpmadd52") || // Added in 7.0
272 Name
.startswith("avx512.mask.vpermi2var.") || // Added in 7.0
273 Name
.startswith("avx512.mask.vpermt2var.") || // Added in 7.0
274 Name
.startswith("avx512.maskz.vpermt2var.") || // Added in 7.0
275 Name
.startswith("avx512.mask.vpdpbusd.") || // Added in 7.0
276 Name
.startswith("avx512.maskz.vpdpbusd.") || // Added in 7.0
277 Name
.startswith("avx512.mask.vpdpbusds.") || // Added in 7.0
278 Name
.startswith("avx512.maskz.vpdpbusds.") || // Added in 7.0
279 Name
.startswith("avx512.mask.vpdpwssd.") || // Added in 7.0
280 Name
.startswith("avx512.maskz.vpdpwssd.") || // Added in 7.0
281 Name
.startswith("avx512.mask.vpdpwssds.") || // Added in 7.0
282 Name
.startswith("avx512.maskz.vpdpwssds.") || // Added in 7.0
283 Name
.startswith("avx512.mask.dbpsadbw.") || // Added in 7.0
284 Name
.startswith("avx512.mask.vpshld.") || // Added in 7.0
285 Name
.startswith("avx512.mask.vpshrd.") || // Added in 7.0
286 Name
.startswith("avx512.mask.vpshldv.") || // Added in 8.0
287 Name
.startswith("avx512.mask.vpshrdv.") || // Added in 8.0
288 Name
.startswith("avx512.maskz.vpshldv.") || // Added in 8.0
289 Name
.startswith("avx512.maskz.vpshrdv.") || // Added in 8.0
290 Name
.startswith("avx512.vpshld.") || // Added in 8.0
291 Name
.startswith("avx512.vpshrd.") || // Added in 8.0
292 Name
.startswith("avx512.mask.add.p") || // Added in 7.0. 128/256 in 4.0
293 Name
.startswith("avx512.mask.sub.p") || // Added in 7.0. 128/256 in 4.0
294 Name
.startswith("avx512.mask.mul.p") || // Added in 7.0. 128/256 in 4.0
295 Name
.startswith("avx512.mask.div.p") || // Added in 7.0. 128/256 in 4.0
296 Name
.startswith("avx512.mask.max.p") || // Added in 7.0. 128/256 in 5.0
297 Name
.startswith("avx512.mask.min.p") || // Added in 7.0. 128/256 in 5.0
298 Name
.startswith("avx512.mask.fpclass.p") || // Added in 7.0
299 Name
.startswith("avx512.mask.vpshufbitqmb.") || // Added in 8.0
300 Name
.startswith("avx512.mask.pmultishift.qb.") || // Added in 8.0
301 Name
.startswith("avx512.mask.conflict.") || // Added in 9.0
302 Name
== "avx512.mask.pmov.qd.256" || // Added in 9.0
303 Name
== "avx512.mask.pmov.qd.512" || // Added in 9.0
304 Name
== "avx512.mask.pmov.wb.256" || // Added in 9.0
305 Name
== "avx512.mask.pmov.wb.512" || // Added in 9.0
306 Name
== "sse.cvtsi2ss" || // Added in 7.0
307 Name
== "sse.cvtsi642ss" || // Added in 7.0
308 Name
== "sse2.cvtsi2sd" || // Added in 7.0
309 Name
== "sse2.cvtsi642sd" || // Added in 7.0
310 Name
== "sse2.cvtss2sd" || // Added in 7.0
311 Name
== "sse2.cvtdq2pd" || // Added in 3.9
312 Name
== "sse2.cvtdq2ps" || // Added in 7.0
313 Name
== "sse2.cvtps2pd" || // Added in 3.9
314 Name
== "avx.cvtdq2.pd.256" || // Added in 3.9
315 Name
== "avx.cvtdq2.ps.256" || // Added in 7.0
316 Name
== "avx.cvt.ps2.pd.256" || // Added in 3.9
317 Name
.startswith("avx.vinsertf128.") || // Added in 3.7
318 Name
== "avx2.vinserti128" || // Added in 3.7
319 Name
.startswith("avx512.mask.insert") || // Added in 4.0
320 Name
.startswith("avx.vextractf128.") || // Added in 3.7
321 Name
== "avx2.vextracti128" || // Added in 3.7
322 Name
.startswith("avx512.mask.vextract") || // Added in 4.0
323 Name
.startswith("sse4a.movnt.") || // Added in 3.9
324 Name
.startswith("avx.movnt.") || // Added in 3.2
325 Name
.startswith("avx512.storent.") || // Added in 3.9
326 Name
== "sse41.movntdqa" || // Added in 5.0
327 Name
== "avx2.movntdqa" || // Added in 5.0
328 Name
== "avx512.movntdqa" || // Added in 5.0
329 Name
== "sse2.storel.dq" || // Added in 3.9
330 Name
.startswith("sse.storeu.") || // Added in 3.9
331 Name
.startswith("sse2.storeu.") || // Added in 3.9
332 Name
.startswith("avx.storeu.") || // Added in 3.9
333 Name
.startswith("avx512.mask.storeu.") || // Added in 3.9
334 Name
.startswith("avx512.mask.store.p") || // Added in 3.9
335 Name
.startswith("avx512.mask.store.b.") || // Added in 3.9
336 Name
.startswith("avx512.mask.store.w.") || // Added in 3.9
337 Name
.startswith("avx512.mask.store.d.") || // Added in 3.9
338 Name
.startswith("avx512.mask.store.q.") || // Added in 3.9
339 Name
== "avx512.mask.store.ss" || // Added in 7.0
340 Name
.startswith("avx512.mask.loadu.") || // Added in 3.9
341 Name
.startswith("avx512.mask.load.") || // Added in 3.9
342 Name
.startswith("avx512.mask.expand.load.") || // Added in 7.0
343 Name
.startswith("avx512.mask.compress.store.") || // Added in 7.0
344 Name
.startswith("avx512.mask.expand.b") || // Added in 9.0
345 Name
.startswith("avx512.mask.expand.w") || // Added in 9.0
346 Name
.startswith("avx512.mask.expand.d") || // Added in 9.0
347 Name
.startswith("avx512.mask.expand.q") || // Added in 9.0
348 Name
.startswith("avx512.mask.expand.p") || // Added in 9.0
349 Name
.startswith("avx512.mask.compress.b") || // Added in 9.0
350 Name
.startswith("avx512.mask.compress.w") || // Added in 9.0
351 Name
.startswith("avx512.mask.compress.d") || // Added in 9.0
352 Name
.startswith("avx512.mask.compress.q") || // Added in 9.0
353 Name
.startswith("avx512.mask.compress.p") || // Added in 9.0
354 Name
== "sse42.crc32.64.8" || // Added in 3.4
355 Name
.startswith("avx.vbroadcast.s") || // Added in 3.5
356 Name
.startswith("avx512.vbroadcast.s") || // Added in 7.0
357 Name
.startswith("avx512.mask.palignr.") || // Added in 3.9
358 Name
.startswith("avx512.mask.valign.") || // Added in 4.0
359 Name
.startswith("sse2.psll.dq") || // Added in 3.7
360 Name
.startswith("sse2.psrl.dq") || // Added in 3.7
361 Name
.startswith("avx2.psll.dq") || // Added in 3.7
362 Name
.startswith("avx2.psrl.dq") || // Added in 3.7
363 Name
.startswith("avx512.psll.dq") || // Added in 3.9
364 Name
.startswith("avx512.psrl.dq") || // Added in 3.9
365 Name
== "sse41.pblendw" || // Added in 3.7
366 Name
.startswith("sse41.blendp") || // Added in 3.7
367 Name
.startswith("avx.blend.p") || // Added in 3.7
368 Name
== "avx2.pblendw" || // Added in 3.7
369 Name
.startswith("avx2.pblendd.") || // Added in 3.7
370 Name
.startswith("avx.vbroadcastf128") || // Added in 4.0
371 Name
== "avx2.vbroadcasti128" || // Added in 3.7
372 Name
.startswith("avx512.mask.broadcastf") || // Added in 6.0
373 Name
.startswith("avx512.mask.broadcasti") || // Added in 6.0
374 Name
== "xop.vpcmov" || // Added in 3.8
375 Name
== "xop.vpcmov.256" || // Added in 5.0
376 Name
.startswith("avx512.mask.move.s") || // Added in 4.0
377 Name
.startswith("avx512.cvtmask2") || // Added in 5.0
378 Name
.startswith("xop.vpcom") || // Added in 3.2, Updated in 9.0
379 Name
.startswith("xop.vprot") || // Added in 8.0
380 Name
.startswith("avx512.prol") || // Added in 8.0
381 Name
.startswith("avx512.pror") || // Added in 8.0
382 Name
.startswith("avx512.mask.prorv.") || // Added in 8.0
383 Name
.startswith("avx512.mask.pror.") || // Added in 8.0
384 Name
.startswith("avx512.mask.prolv.") || // Added in 8.0
385 Name
.startswith("avx512.mask.prol.") || // Added in 8.0
386 Name
.startswith("avx512.ptestm") || //Added in 6.0
387 Name
.startswith("avx512.ptestnm") || //Added in 6.0
388 Name
.startswith("avx512.mask.pavg")) // Added in 6.0
394 static bool UpgradeX86IntrinsicFunction(Function
*F
, StringRef Name
,
396 // Only handle intrinsics that start with "x86.".
397 if (!Name
.startswith("x86."))
399 // Remove "x86." prefix.
400 Name
= Name
.substr(4);
402 if (ShouldUpgradeX86Intrinsic(F
, Name
)) {
407 if (Name
== "rdtscp") { // Added in 8.0
408 // If this intrinsic has 0 operands, it's the new version.
409 if (F
->getFunctionType()->getNumParams() == 0)
413 NewFn
= Intrinsic::getDeclaration(F
->getParent(),
414 Intrinsic::x86_rdtscp
);
418 // SSE4.1 ptest functions may have an old signature.
419 if (Name
.startswith("sse41.ptest")) { // Added in 3.2
420 if (Name
.substr(11) == "c")
421 return UpgradePTESTIntrinsic(F
, Intrinsic::x86_sse41_ptestc
, NewFn
);
422 if (Name
.substr(11) == "z")
423 return UpgradePTESTIntrinsic(F
, Intrinsic::x86_sse41_ptestz
, NewFn
);
424 if (Name
.substr(11) == "nzc")
425 return UpgradePTESTIntrinsic(F
, Intrinsic::x86_sse41_ptestnzc
, NewFn
);
427 // Several blend and other instructions with masks used the wrong number of
429 if (Name
== "sse41.insertps") // Added in 3.6
430 return UpgradeX86IntrinsicsWith8BitMask(F
, Intrinsic::x86_sse41_insertps
,
432 if (Name
== "sse41.dppd") // Added in 3.6
433 return UpgradeX86IntrinsicsWith8BitMask(F
, Intrinsic::x86_sse41_dppd
,
435 if (Name
== "sse41.dpps") // Added in 3.6
436 return UpgradeX86IntrinsicsWith8BitMask(F
, Intrinsic::x86_sse41_dpps
,
438 if (Name
== "sse41.mpsadbw") // Added in 3.6
439 return UpgradeX86IntrinsicsWith8BitMask(F
, Intrinsic::x86_sse41_mpsadbw
,
441 if (Name
== "avx.dp.ps.256") // Added in 3.6
442 return UpgradeX86IntrinsicsWith8BitMask(F
, Intrinsic::x86_avx_dp_ps_256
,
444 if (Name
== "avx2.mpsadbw") // Added in 3.6
445 return UpgradeX86IntrinsicsWith8BitMask(F
, Intrinsic::x86_avx2_mpsadbw
,
448 // frcz.ss/sd may need to have an argument dropped. Added in 3.2
449 if (Name
.startswith("xop.vfrcz.ss") && F
->arg_size() == 2) {
451 NewFn
= Intrinsic::getDeclaration(F
->getParent(),
452 Intrinsic::x86_xop_vfrcz_ss
);
455 if (Name
.startswith("xop.vfrcz.sd") && F
->arg_size() == 2) {
457 NewFn
= Intrinsic::getDeclaration(F
->getParent(),
458 Intrinsic::x86_xop_vfrcz_sd
);
461 // Upgrade any XOP PERMIL2 index operand still using a float/double vector.
462 if (Name
.startswith("xop.vpermil2")) { // Added in 3.9
463 auto Idx
= F
->getFunctionType()->getParamType(2);
464 if (Idx
->isFPOrFPVectorTy()) {
466 unsigned IdxSize
= Idx
->getPrimitiveSizeInBits();
467 unsigned EltSize
= Idx
->getScalarSizeInBits();
468 Intrinsic::ID Permil2ID
;
469 if (EltSize
== 64 && IdxSize
== 128)
470 Permil2ID
= Intrinsic::x86_xop_vpermil2pd
;
471 else if (EltSize
== 32 && IdxSize
== 128)
472 Permil2ID
= Intrinsic::x86_xop_vpermil2ps
;
473 else if (EltSize
== 64 && IdxSize
== 256)
474 Permil2ID
= Intrinsic::x86_xop_vpermil2pd_256
;
476 Permil2ID
= Intrinsic::x86_xop_vpermil2ps_256
;
477 NewFn
= Intrinsic::getDeclaration(F
->getParent(), Permil2ID
);
482 if (Name
== "seh.recoverfp") {
483 NewFn
= Intrinsic::getDeclaration(F
->getParent(), Intrinsic::eh_recoverfp
);
490 static bool UpgradeIntrinsicFunction1(Function
*F
, Function
*&NewFn
) {
491 assert(F
&& "Illegal to upgrade a non-existent Function.");
493 // Quickly eliminate it, if it's not a candidate.
494 StringRef Name
= F
->getName();
495 if (Name
.size() <= 8 || !Name
.startswith("llvm."))
497 Name
= Name
.substr(5); // Strip off "llvm."
502 if (Name
.startswith("arm.rbit") || Name
.startswith("aarch64.rbit")) {
503 NewFn
= Intrinsic::getDeclaration(F
->getParent(), Intrinsic::bitreverse
,
504 F
->arg_begin()->getType());
507 if (Name
.startswith("arm.neon.vclz")) {
509 F
->arg_begin()->getType(),
510 Type::getInt1Ty(F
->getContext())
512 // Can't use Intrinsic::getDeclaration here as it adds a ".i1" to
513 // the end of the name. Change name from llvm.arm.neon.vclz.* to
515 FunctionType
* fType
= FunctionType::get(F
->getReturnType(), args
, false);
516 NewFn
= Function::Create(fType
, F
->getLinkage(), F
->getAddressSpace(),
517 "llvm.ctlz." + Name
.substr(14), F
->getParent());
520 if (Name
.startswith("arm.neon.vcnt")) {
521 NewFn
= Intrinsic::getDeclaration(F
->getParent(), Intrinsic::ctpop
,
522 F
->arg_begin()->getType());
525 Regex
vldRegex("^arm\\.neon\\.vld([1234]|[234]lane)\\.v[a-z0-9]*$");
526 if (vldRegex
.match(Name
)) {
527 auto fArgs
= F
->getFunctionType()->params();
528 SmallVector
<Type
*, 4> Tys(fArgs
.begin(), fArgs
.end());
529 // Can't use Intrinsic::getDeclaration here as the return types might
530 // then only be structurally equal.
531 FunctionType
* fType
= FunctionType::get(F
->getReturnType(), Tys
, false);
532 NewFn
= Function::Create(fType
, F
->getLinkage(), F
->getAddressSpace(),
533 "llvm." + Name
+ ".p0i8", F
->getParent());
536 Regex
vstRegex("^arm\\.neon\\.vst([1234]|[234]lane)\\.v[a-z0-9]*$");
537 if (vstRegex
.match(Name
)) {
538 static const Intrinsic::ID StoreInts
[] = {Intrinsic::arm_neon_vst1
,
539 Intrinsic::arm_neon_vst2
,
540 Intrinsic::arm_neon_vst3
,
541 Intrinsic::arm_neon_vst4
};
543 static const Intrinsic::ID StoreLaneInts
[] = {
544 Intrinsic::arm_neon_vst2lane
, Intrinsic::arm_neon_vst3lane
,
545 Intrinsic::arm_neon_vst4lane
548 auto fArgs
= F
->getFunctionType()->params();
549 Type
*Tys
[] = {fArgs
[0], fArgs
[1]};
550 if (Name
.find("lane") == StringRef::npos
)
551 NewFn
= Intrinsic::getDeclaration(F
->getParent(),
552 StoreInts
[fArgs
.size() - 3], Tys
);
554 NewFn
= Intrinsic::getDeclaration(F
->getParent(),
555 StoreLaneInts
[fArgs
.size() - 5], Tys
);
558 if (Name
== "aarch64.thread.pointer" || Name
== "arm.thread.pointer") {
559 NewFn
= Intrinsic::getDeclaration(F
->getParent(), Intrinsic::thread_pointer
);
562 if (Name
.startswith("aarch64.neon.addp")) {
563 if (F
->arg_size() != 2)
564 break; // Invalid IR.
565 auto fArgs
= F
->getFunctionType()->params();
566 VectorType
*ArgTy
= dyn_cast
<VectorType
>(fArgs
[0]);
567 if (ArgTy
&& ArgTy
->getElementType()->isFloatingPointTy()) {
568 NewFn
= Intrinsic::getDeclaration(F
->getParent(),
569 Intrinsic::aarch64_neon_faddp
, fArgs
);
577 if (Name
.startswith("ctlz.") && F
->arg_size() == 1) {
579 NewFn
= Intrinsic::getDeclaration(F
->getParent(), Intrinsic::ctlz
,
580 F
->arg_begin()->getType());
583 if (Name
.startswith("cttz.") && F
->arg_size() == 1) {
585 NewFn
= Intrinsic::getDeclaration(F
->getParent(), Intrinsic::cttz
,
586 F
->arg_begin()->getType());
592 if (Name
== "dbg.value" && F
->arg_size() == 4) {
594 NewFn
= Intrinsic::getDeclaration(F
->getParent(), Intrinsic::dbg_value
);
600 SmallVector
<StringRef
, 2> Groups
;
601 Regex
R("^experimental.vector.reduce.([a-z]+)\\.[fi][0-9]+");
602 if (R
.match(Name
, &Groups
)) {
603 Intrinsic::ID ID
= Intrinsic::not_intrinsic
;
604 if (Groups
[1] == "fadd")
605 ID
= Intrinsic::experimental_vector_reduce_v2_fadd
;
606 if (Groups
[1] == "fmul")
607 ID
= Intrinsic::experimental_vector_reduce_v2_fmul
;
609 if (ID
!= Intrinsic::not_intrinsic
) {
611 auto Args
= F
->getFunctionType()->params();
612 Type
*Tys
[] = {F
->getFunctionType()->getReturnType(), Args
[1]};
613 NewFn
= Intrinsic::getDeclaration(F
->getParent(), ID
, Tys
);
621 bool IsLifetimeStart
= Name
.startswith("lifetime.start");
622 if (IsLifetimeStart
|| Name
.startswith("invariant.start")) {
623 Intrinsic::ID ID
= IsLifetimeStart
?
624 Intrinsic::lifetime_start
: Intrinsic::invariant_start
;
625 auto Args
= F
->getFunctionType()->params();
626 Type
* ObjectPtr
[1] = {Args
[1]};
627 if (F
->getName() != Intrinsic::getName(ID
, ObjectPtr
)) {
629 NewFn
= Intrinsic::getDeclaration(F
->getParent(), ID
, ObjectPtr
);
634 bool IsLifetimeEnd
= Name
.startswith("lifetime.end");
635 if (IsLifetimeEnd
|| Name
.startswith("invariant.end")) {
636 Intrinsic::ID ID
= IsLifetimeEnd
?
637 Intrinsic::lifetime_end
: Intrinsic::invariant_end
;
639 auto Args
= F
->getFunctionType()->params();
640 Type
* ObjectPtr
[1] = {Args
[IsLifetimeEnd
? 1 : 2]};
641 if (F
->getName() != Intrinsic::getName(ID
, ObjectPtr
)) {
643 NewFn
= Intrinsic::getDeclaration(F
->getParent(), ID
, ObjectPtr
);
647 if (Name
.startswith("invariant.group.barrier")) {
648 // Rename invariant.group.barrier to launder.invariant.group
649 auto Args
= F
->getFunctionType()->params();
650 Type
* ObjectPtr
[1] = {Args
[0]};
652 NewFn
= Intrinsic::getDeclaration(F
->getParent(),
653 Intrinsic::launder_invariant_group
, ObjectPtr
);
661 if (Name
.startswith("masked.load.")) {
662 Type
*Tys
[] = { F
->getReturnType(), F
->arg_begin()->getType() };
663 if (F
->getName() != Intrinsic::getName(Intrinsic::masked_load
, Tys
)) {
665 NewFn
= Intrinsic::getDeclaration(F
->getParent(),
666 Intrinsic::masked_load
,
671 if (Name
.startswith("masked.store.")) {
672 auto Args
= F
->getFunctionType()->params();
673 Type
*Tys
[] = { Args
[0], Args
[1] };
674 if (F
->getName() != Intrinsic::getName(Intrinsic::masked_store
, Tys
)) {
676 NewFn
= Intrinsic::getDeclaration(F
->getParent(),
677 Intrinsic::masked_store
,
682 // Renaming gather/scatter intrinsics with no address space overloading
683 // to the new overload which includes an address space
684 if (Name
.startswith("masked.gather.")) {
685 Type
*Tys
[] = {F
->getReturnType(), F
->arg_begin()->getType()};
686 if (F
->getName() != Intrinsic::getName(Intrinsic::masked_gather
, Tys
)) {
688 NewFn
= Intrinsic::getDeclaration(F
->getParent(),
689 Intrinsic::masked_gather
, Tys
);
693 if (Name
.startswith("masked.scatter.")) {
694 auto Args
= F
->getFunctionType()->params();
695 Type
*Tys
[] = {Args
[0], Args
[1]};
696 if (F
->getName() != Intrinsic::getName(Intrinsic::masked_scatter
, Tys
)) {
698 NewFn
= Intrinsic::getDeclaration(F
->getParent(),
699 Intrinsic::masked_scatter
, Tys
);
703 // Updating the memory intrinsics (memcpy/memmove/memset) that have an
704 // alignment parameter to embedding the alignment as an attribute of
706 if (Name
.startswith("memcpy.") && F
->arg_size() == 5) {
708 // Get the types of dest, src, and len
709 ArrayRef
<Type
*> ParamTypes
= F
->getFunctionType()->params().slice(0, 3);
710 NewFn
= Intrinsic::getDeclaration(F
->getParent(), Intrinsic::memcpy
,
714 if (Name
.startswith("memmove.") && F
->arg_size() == 5) {
716 // Get the types of dest, src, and len
717 ArrayRef
<Type
*> ParamTypes
= F
->getFunctionType()->params().slice(0, 3);
718 NewFn
= Intrinsic::getDeclaration(F
->getParent(), Intrinsic::memmove
,
722 if (Name
.startswith("memset.") && F
->arg_size() == 5) {
724 // Get the types of dest, and len
725 const auto *FT
= F
->getFunctionType();
726 Type
*ParamTypes
[2] = {
727 FT
->getParamType(0), // Dest
728 FT
->getParamType(2) // len
730 NewFn
= Intrinsic::getDeclaration(F
->getParent(), Intrinsic::memset
,
737 if (Name
.startswith("nvvm.")) {
738 Name
= Name
.substr(5);
740 // The following nvvm intrinsics correspond exactly to an LLVM intrinsic.
741 Intrinsic::ID IID
= StringSwitch
<Intrinsic::ID
>(Name
)
742 .Cases("brev32", "brev64", Intrinsic::bitreverse
)
743 .Case("clz.i", Intrinsic::ctlz
)
744 .Case("popc.i", Intrinsic::ctpop
)
745 .Default(Intrinsic::not_intrinsic
);
746 if (IID
!= Intrinsic::not_intrinsic
&& F
->arg_size() == 1) {
747 NewFn
= Intrinsic::getDeclaration(F
->getParent(), IID
,
748 {F
->getReturnType()});
752 // The following nvvm intrinsics correspond exactly to an LLVM idiom, but
753 // not to an intrinsic alone. We expand them in UpgradeIntrinsicCall.
755 // TODO: We could add lohi.i2d.
756 bool Expand
= StringSwitch
<bool>(Name
)
757 .Cases("abs.i", "abs.ll", true)
758 .Cases("clz.ll", "popc.ll", "h2f", true)
759 .Cases("max.i", "max.ll", "max.ui", "max.ull", true)
760 .Cases("min.i", "min.ll", "min.ui", "min.ull", true)
761 .StartsWith("atomic.load.add.f32.p", true)
762 .StartsWith("atomic.load.add.f64.p", true)
772 // We only need to change the name to match the mangling including the
774 if (Name
.startswith("objectsize.")) {
775 Type
*Tys
[2] = { F
->getReturnType(), F
->arg_begin()->getType() };
776 if (F
->arg_size() == 2 || F
->arg_size() == 3 ||
777 F
->getName() != Intrinsic::getName(Intrinsic::objectsize
, Tys
)) {
779 NewFn
= Intrinsic::getDeclaration(F
->getParent(), Intrinsic::objectsize
,
787 if (Name
== "prefetch") {
788 // Handle address space overloading.
789 Type
*Tys
[] = {F
->arg_begin()->getType()};
790 if (F
->getName() != Intrinsic::getName(Intrinsic::prefetch
, Tys
)) {
793 Intrinsic::getDeclaration(F
->getParent(), Intrinsic::prefetch
, Tys
);
800 if (Name
== "stackprotectorcheck") {
807 if (UpgradeX86IntrinsicFunction(F
, Name
, NewFn
))
810 // Remangle our intrinsic since we upgrade the mangling
811 auto Result
= llvm::Intrinsic::remangleIntrinsicFunction(F
);
812 if (Result
!= None
) {
813 NewFn
= Result
.getValue();
817 // This may not belong here. This function is effectively being overloaded
818 // to both detect an intrinsic which needs upgrading, and to provide the
819 // upgraded form of the intrinsic. We should perhaps have two separate
820 // functions for this.
824 bool llvm::UpgradeIntrinsicFunction(Function
*F
, Function
*&NewFn
) {
826 bool Upgraded
= UpgradeIntrinsicFunction1(F
, NewFn
);
827 assert(F
!= NewFn
&& "Intrinsic function upgraded to the same function");
829 // Upgrade intrinsic attributes. This does not change the function.
832 if (Intrinsic::ID id
= F
->getIntrinsicID())
833 F
->setAttributes(Intrinsic::getAttributes(F
->getContext(), id
));
837 GlobalVariable
*llvm::UpgradeGlobalVariable(GlobalVariable
*GV
) {
838 if (!(GV
->hasName() && (GV
->getName() == "llvm.global_ctors" ||
839 GV
->getName() == "llvm.global_dtors")) ||
840 !GV
->hasInitializer())
842 ArrayType
*ATy
= dyn_cast
<ArrayType
>(GV
->getValueType());
845 StructType
*STy
= dyn_cast
<StructType
>(ATy
->getElementType());
846 if (!STy
|| STy
->getNumElements() != 2)
849 LLVMContext
&C
= GV
->getContext();
851 auto EltTy
= StructType::get(STy
->getElementType(0), STy
->getElementType(1),
853 Constant
*Init
= GV
->getInitializer();
854 unsigned N
= Init
->getNumOperands();
855 std::vector
<Constant
*> NewCtors(N
);
856 for (unsigned i
= 0; i
!= N
; ++i
) {
857 auto Ctor
= cast
<Constant
>(Init
->getOperand(i
));
858 NewCtors
[i
] = ConstantStruct::get(
859 EltTy
, Ctor
->getAggregateElement(0u), Ctor
->getAggregateElement(1),
860 Constant::getNullValue(IRB
.getInt8PtrTy()));
862 Constant
*NewInit
= ConstantArray::get(ArrayType::get(EltTy
, N
), NewCtors
);
864 return new GlobalVariable(NewInit
->getType(), false, GV
->getLinkage(),
865 NewInit
, GV
->getName());
868 // Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them
870 static Value
*UpgradeX86PSLLDQIntrinsics(IRBuilder
<> &Builder
,
871 Value
*Op
, unsigned Shift
) {
872 Type
*ResultTy
= Op
->getType();
873 unsigned NumElts
= ResultTy
->getVectorNumElements() * 8;
875 // Bitcast from a 64-bit element type to a byte element type.
876 Type
*VecTy
= VectorType::get(Builder
.getInt8Ty(), NumElts
);
877 Op
= Builder
.CreateBitCast(Op
, VecTy
, "cast");
879 // We'll be shuffling in zeroes.
880 Value
*Res
= Constant::getNullValue(VecTy
);
882 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
883 // we'll just return the zero vector.
886 // 256/512-bit version is split into 2/4 16-byte lanes.
887 for (unsigned l
= 0; l
!= NumElts
; l
+= 16)
888 for (unsigned i
= 0; i
!= 16; ++i
) {
889 unsigned Idx
= NumElts
+ i
- Shift
;
891 Idx
-= NumElts
- 16; // end of lane, switch operand.
892 Idxs
[l
+ i
] = Idx
+ l
;
895 Res
= Builder
.CreateShuffleVector(Res
, Op
, makeArrayRef(Idxs
, NumElts
));
898 // Bitcast back to a 64-bit element type.
899 return Builder
.CreateBitCast(Res
, ResultTy
, "cast");
902 // Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them
904 static Value
*UpgradeX86PSRLDQIntrinsics(IRBuilder
<> &Builder
, Value
*Op
,
906 Type
*ResultTy
= Op
->getType();
907 unsigned NumElts
= ResultTy
->getVectorNumElements() * 8;
909 // Bitcast from a 64-bit element type to a byte element type.
910 Type
*VecTy
= VectorType::get(Builder
.getInt8Ty(), NumElts
);
911 Op
= Builder
.CreateBitCast(Op
, VecTy
, "cast");
913 // We'll be shuffling in zeroes.
914 Value
*Res
= Constant::getNullValue(VecTy
);
916 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
917 // we'll just return the zero vector.
920 // 256/512-bit version is split into 2/4 16-byte lanes.
921 for (unsigned l
= 0; l
!= NumElts
; l
+= 16)
922 for (unsigned i
= 0; i
!= 16; ++i
) {
923 unsigned Idx
= i
+ Shift
;
925 Idx
+= NumElts
- 16; // end of lane, switch operand.
926 Idxs
[l
+ i
] = Idx
+ l
;
929 Res
= Builder
.CreateShuffleVector(Op
, Res
, makeArrayRef(Idxs
, NumElts
));
932 // Bitcast back to a 64-bit element type.
933 return Builder
.CreateBitCast(Res
, ResultTy
, "cast");
936 static Value
*getX86MaskVec(IRBuilder
<> &Builder
, Value
*Mask
,
938 llvm::VectorType
*MaskTy
= llvm::VectorType::get(Builder
.getInt1Ty(),
939 cast
<IntegerType
>(Mask
->getType())->getBitWidth());
940 Mask
= Builder
.CreateBitCast(Mask
, MaskTy
);
942 // If we have less than 8 elements, then the starting mask was an i8 and
943 // we need to extract down to the right number of elements.
946 for (unsigned i
= 0; i
!= NumElts
; ++i
)
948 Mask
= Builder
.CreateShuffleVector(Mask
, Mask
,
949 makeArrayRef(Indices
, NumElts
),
956 static Value
*EmitX86Select(IRBuilder
<> &Builder
, Value
*Mask
,
957 Value
*Op0
, Value
*Op1
) {
958 // If the mask is all ones just emit the first operation.
959 if (const auto *C
= dyn_cast
<Constant
>(Mask
))
960 if (C
->isAllOnesValue())
963 Mask
= getX86MaskVec(Builder
, Mask
, Op0
->getType()->getVectorNumElements());
964 return Builder
.CreateSelect(Mask
, Op0
, Op1
);
967 static Value
*EmitX86ScalarSelect(IRBuilder
<> &Builder
, Value
*Mask
,
968 Value
*Op0
, Value
*Op1
) {
969 // If the mask is all ones just emit the first operation.
970 if (const auto *C
= dyn_cast
<Constant
>(Mask
))
971 if (C
->isAllOnesValue())
974 llvm::VectorType
*MaskTy
=
975 llvm::VectorType::get(Builder
.getInt1Ty(),
976 Mask
->getType()->getIntegerBitWidth());
977 Mask
= Builder
.CreateBitCast(Mask
, MaskTy
);
978 Mask
= Builder
.CreateExtractElement(Mask
, (uint64_t)0);
979 return Builder
.CreateSelect(Mask
, Op0
, Op1
);
982 // Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics.
983 // PALIGNR handles large immediates by shifting while VALIGN masks the immediate
984 // so we need to handle both cases. VALIGN also doesn't have 128-bit lanes.
985 static Value
*UpgradeX86ALIGNIntrinsics(IRBuilder
<> &Builder
, Value
*Op0
,
986 Value
*Op1
, Value
*Shift
,
987 Value
*Passthru
, Value
*Mask
,
989 unsigned ShiftVal
= cast
<llvm::ConstantInt
>(Shift
)->getZExtValue();
991 unsigned NumElts
= Op0
->getType()->getVectorNumElements();
992 assert((IsVALIGN
|| NumElts
% 16 == 0) && "Illegal NumElts for PALIGNR!");
993 assert((!IsVALIGN
|| NumElts
<= 16) && "NumElts too large for VALIGN!");
994 assert(isPowerOf2_32(NumElts
) && "NumElts not a power of 2!");
996 // Mask the immediate for VALIGN.
998 ShiftVal
&= (NumElts
- 1);
1000 // If palignr is shifting the pair of vectors more than the size of two
1001 // lanes, emit zero.
1003 return llvm::Constant::getNullValue(Op0
->getType());
1005 // If palignr is shifting the pair of input vectors more than one lane,
1006 // but less than two lanes, convert to shifting in zeroes.
1007 if (ShiftVal
> 16) {
1010 Op0
= llvm::Constant::getNullValue(Op0
->getType());
1013 uint32_t Indices
[64];
1014 // 256-bit palignr operates on 128-bit lanes so we need to handle that
1015 for (unsigned l
= 0; l
< NumElts
; l
+= 16) {
1016 for (unsigned i
= 0; i
!= 16; ++i
) {
1017 unsigned Idx
= ShiftVal
+ i
;
1018 if (!IsVALIGN
&& Idx
>= 16) // Disable wrap for VALIGN.
1019 Idx
+= NumElts
- 16; // End of lane, switch operand.
1020 Indices
[l
+ i
] = Idx
+ l
;
1024 Value
*Align
= Builder
.CreateShuffleVector(Op1
, Op0
,
1025 makeArrayRef(Indices
, NumElts
),
1028 return EmitX86Select(Builder
, Mask
, Align
, Passthru
);
1031 static Value
*UpgradeX86VPERMT2Intrinsics(IRBuilder
<> &Builder
, CallInst
&CI
,
1032 bool ZeroMask
, bool IndexForm
) {
1033 Type
*Ty
= CI
.getType();
1034 unsigned VecWidth
= Ty
->getPrimitiveSizeInBits();
1035 unsigned EltWidth
= Ty
->getScalarSizeInBits();
1036 bool IsFloat
= Ty
->isFPOrFPVectorTy();
1038 if (VecWidth
== 128 && EltWidth
== 32 && IsFloat
)
1039 IID
= Intrinsic::x86_avx512_vpermi2var_ps_128
;
1040 else if (VecWidth
== 128 && EltWidth
== 32 && !IsFloat
)
1041 IID
= Intrinsic::x86_avx512_vpermi2var_d_128
;
1042 else if (VecWidth
== 128 && EltWidth
== 64 && IsFloat
)
1043 IID
= Intrinsic::x86_avx512_vpermi2var_pd_128
;
1044 else if (VecWidth
== 128 && EltWidth
== 64 && !IsFloat
)
1045 IID
= Intrinsic::x86_avx512_vpermi2var_q_128
;
1046 else if (VecWidth
== 256 && EltWidth
== 32 && IsFloat
)
1047 IID
= Intrinsic::x86_avx512_vpermi2var_ps_256
;
1048 else if (VecWidth
== 256 && EltWidth
== 32 && !IsFloat
)
1049 IID
= Intrinsic::x86_avx512_vpermi2var_d_256
;
1050 else if (VecWidth
== 256 && EltWidth
== 64 && IsFloat
)
1051 IID
= Intrinsic::x86_avx512_vpermi2var_pd_256
;
1052 else if (VecWidth
== 256 && EltWidth
== 64 && !IsFloat
)
1053 IID
= Intrinsic::x86_avx512_vpermi2var_q_256
;
1054 else if (VecWidth
== 512 && EltWidth
== 32 && IsFloat
)
1055 IID
= Intrinsic::x86_avx512_vpermi2var_ps_512
;
1056 else if (VecWidth
== 512 && EltWidth
== 32 && !IsFloat
)
1057 IID
= Intrinsic::x86_avx512_vpermi2var_d_512
;
1058 else if (VecWidth
== 512 && EltWidth
== 64 && IsFloat
)
1059 IID
= Intrinsic::x86_avx512_vpermi2var_pd_512
;
1060 else if (VecWidth
== 512 && EltWidth
== 64 && !IsFloat
)
1061 IID
= Intrinsic::x86_avx512_vpermi2var_q_512
;
1062 else if (VecWidth
== 128 && EltWidth
== 16)
1063 IID
= Intrinsic::x86_avx512_vpermi2var_hi_128
;
1064 else if (VecWidth
== 256 && EltWidth
== 16)
1065 IID
= Intrinsic::x86_avx512_vpermi2var_hi_256
;
1066 else if (VecWidth
== 512 && EltWidth
== 16)
1067 IID
= Intrinsic::x86_avx512_vpermi2var_hi_512
;
1068 else if (VecWidth
== 128 && EltWidth
== 8)
1069 IID
= Intrinsic::x86_avx512_vpermi2var_qi_128
;
1070 else if (VecWidth
== 256 && EltWidth
== 8)
1071 IID
= Intrinsic::x86_avx512_vpermi2var_qi_256
;
1072 else if (VecWidth
== 512 && EltWidth
== 8)
1073 IID
= Intrinsic::x86_avx512_vpermi2var_qi_512
;
1075 llvm_unreachable("Unexpected intrinsic");
1077 Value
*Args
[] = { CI
.getArgOperand(0) , CI
.getArgOperand(1),
1078 CI
.getArgOperand(2) };
1080 // If this isn't index form we need to swap operand 0 and 1.
1082 std::swap(Args
[0], Args
[1]);
1084 Value
*V
= Builder
.CreateCall(Intrinsic::getDeclaration(CI
.getModule(), IID
),
1086 Value
*PassThru
= ZeroMask
? ConstantAggregateZero::get(Ty
)
1087 : Builder
.CreateBitCast(CI
.getArgOperand(1),
1089 return EmitX86Select(Builder
, CI
.getArgOperand(3), V
, PassThru
);
1092 static Value
*UpgradeX86AddSubSatIntrinsics(IRBuilder
<> &Builder
, CallInst
&CI
,
1093 bool IsSigned
, bool IsAddition
) {
1094 Type
*Ty
= CI
.getType();
1095 Value
*Op0
= CI
.getOperand(0);
1096 Value
*Op1
= CI
.getOperand(1);
1099 IsSigned
? (IsAddition
? Intrinsic::sadd_sat
: Intrinsic::ssub_sat
)
1100 : (IsAddition
? Intrinsic::uadd_sat
: Intrinsic::usub_sat
);
1101 Function
*Intrin
= Intrinsic::getDeclaration(CI
.getModule(), IID
, Ty
);
1102 Value
*Res
= Builder
.CreateCall(Intrin
, {Op0
, Op1
});
1104 if (CI
.getNumArgOperands() == 4) { // For masked intrinsics.
1105 Value
*VecSrc
= CI
.getOperand(2);
1106 Value
*Mask
= CI
.getOperand(3);
1107 Res
= EmitX86Select(Builder
, Mask
, Res
, VecSrc
);
1112 static Value
*upgradeX86Rotate(IRBuilder
<> &Builder
, CallInst
&CI
,
1113 bool IsRotateRight
) {
1114 Type
*Ty
= CI
.getType();
1115 Value
*Src
= CI
.getArgOperand(0);
1116 Value
*Amt
= CI
.getArgOperand(1);
1118 // Amount may be scalar immediate, in which case create a splat vector.
1119 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
1120 // we only care about the lowest log2 bits anyway.
1121 if (Amt
->getType() != Ty
) {
1122 unsigned NumElts
= Ty
->getVectorNumElements();
1123 Amt
= Builder
.CreateIntCast(Amt
, Ty
->getScalarType(), false);
1124 Amt
= Builder
.CreateVectorSplat(NumElts
, Amt
);
1127 Intrinsic::ID IID
= IsRotateRight
? Intrinsic::fshr
: Intrinsic::fshl
;
1128 Function
*Intrin
= Intrinsic::getDeclaration(CI
.getModule(), IID
, Ty
);
1129 Value
*Res
= Builder
.CreateCall(Intrin
, {Src
, Src
, Amt
});
1131 if (CI
.getNumArgOperands() == 4) { // For masked intrinsics.
1132 Value
*VecSrc
= CI
.getOperand(2);
1133 Value
*Mask
= CI
.getOperand(3);
1134 Res
= EmitX86Select(Builder
, Mask
, Res
, VecSrc
);
1139 static Value
*upgradeX86vpcom(IRBuilder
<> &Builder
, CallInst
&CI
, unsigned Imm
,
1141 Type
*Ty
= CI
.getType();
1142 Value
*LHS
= CI
.getArgOperand(0);
1143 Value
*RHS
= CI
.getArgOperand(1);
1145 CmpInst::Predicate Pred
;
1148 Pred
= IsSigned
? ICmpInst::ICMP_SLT
: ICmpInst::ICMP_ULT
;
1151 Pred
= IsSigned
? ICmpInst::ICMP_SLE
: ICmpInst::ICMP_ULE
;
1154 Pred
= IsSigned
? ICmpInst::ICMP_SGT
: ICmpInst::ICMP_UGT
;
1157 Pred
= IsSigned
? ICmpInst::ICMP_SGE
: ICmpInst::ICMP_UGE
;
1160 Pred
= ICmpInst::ICMP_EQ
;
1163 Pred
= ICmpInst::ICMP_NE
;
1166 return Constant::getNullValue(Ty
); // FALSE
1168 return Constant::getAllOnesValue(Ty
); // TRUE
1170 llvm_unreachable("Unknown XOP vpcom/vpcomu predicate");
1173 Value
*Cmp
= Builder
.CreateICmp(Pred
, LHS
, RHS
);
1174 Value
*Ext
= Builder
.CreateSExt(Cmp
, Ty
);
1178 static Value
*upgradeX86ConcatShift(IRBuilder
<> &Builder
, CallInst
&CI
,
1179 bool IsShiftRight
, bool ZeroMask
) {
1180 Type
*Ty
= CI
.getType();
1181 Value
*Op0
= CI
.getArgOperand(0);
1182 Value
*Op1
= CI
.getArgOperand(1);
1183 Value
*Amt
= CI
.getArgOperand(2);
1186 std::swap(Op0
, Op1
);
1188 // Amount may be scalar immediate, in which case create a splat vector.
1189 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
1190 // we only care about the lowest log2 bits anyway.
1191 if (Amt
->getType() != Ty
) {
1192 unsigned NumElts
= Ty
->getVectorNumElements();
1193 Amt
= Builder
.CreateIntCast(Amt
, Ty
->getScalarType(), false);
1194 Amt
= Builder
.CreateVectorSplat(NumElts
, Amt
);
1197 Intrinsic::ID IID
= IsShiftRight
? Intrinsic::fshr
: Intrinsic::fshl
;
1198 Function
*Intrin
= Intrinsic::getDeclaration(CI
.getModule(), IID
, Ty
);
1199 Value
*Res
= Builder
.CreateCall(Intrin
, {Op0
, Op1
, Amt
});
1201 unsigned NumArgs
= CI
.getNumArgOperands();
1202 if (NumArgs
>= 4) { // For masked intrinsics.
1203 Value
*VecSrc
= NumArgs
== 5 ? CI
.getArgOperand(3) :
1204 ZeroMask
? ConstantAggregateZero::get(CI
.getType()) :
1205 CI
.getArgOperand(0);
1206 Value
*Mask
= CI
.getOperand(NumArgs
- 1);
1207 Res
= EmitX86Select(Builder
, Mask
, Res
, VecSrc
);
1212 static Value
*UpgradeMaskedStore(IRBuilder
<> &Builder
,
1213 Value
*Ptr
, Value
*Data
, Value
*Mask
,
1215 // Cast the pointer to the right type.
1216 Ptr
= Builder
.CreateBitCast(Ptr
,
1217 llvm::PointerType::getUnqual(Data
->getType()));
1219 Aligned
? cast
<VectorType
>(Data
->getType())->getBitWidth() / 8 : 1;
1221 // If the mask is all ones just emit a regular store.
1222 if (const auto *C
= dyn_cast
<Constant
>(Mask
))
1223 if (C
->isAllOnesValue())
1224 return Builder
.CreateAlignedStore(Data
, Ptr
, Align
);
1226 // Convert the mask from an integer type to a vector of i1.
1227 unsigned NumElts
= Data
->getType()->getVectorNumElements();
1228 Mask
= getX86MaskVec(Builder
, Mask
, NumElts
);
1229 return Builder
.CreateMaskedStore(Data
, Ptr
, Align
, Mask
);
1232 static Value
*UpgradeMaskedLoad(IRBuilder
<> &Builder
,
1233 Value
*Ptr
, Value
*Passthru
, Value
*Mask
,
1235 Type
*ValTy
= Passthru
->getType();
1236 // Cast the pointer to the right type.
1237 Ptr
= Builder
.CreateBitCast(Ptr
, llvm::PointerType::getUnqual(ValTy
));
1239 Aligned
? cast
<VectorType
>(Passthru
->getType())->getBitWidth() / 8 : 1;
1241 // If the mask is all ones just emit a regular store.
1242 if (const auto *C
= dyn_cast
<Constant
>(Mask
))
1243 if (C
->isAllOnesValue())
1244 return Builder
.CreateAlignedLoad(ValTy
, Ptr
, Align
);
1246 // Convert the mask from an integer type to a vector of i1.
1247 unsigned NumElts
= Passthru
->getType()->getVectorNumElements();
1248 Mask
= getX86MaskVec(Builder
, Mask
, NumElts
);
1249 return Builder
.CreateMaskedLoad(Ptr
, Align
, Mask
, Passthru
);
1252 static Value
*upgradeAbs(IRBuilder
<> &Builder
, CallInst
&CI
) {
1253 Value
*Op0
= CI
.getArgOperand(0);
1254 llvm::Type
*Ty
= Op0
->getType();
1255 Value
*Zero
= llvm::Constant::getNullValue(Ty
);
1256 Value
*Cmp
= Builder
.CreateICmp(ICmpInst::ICMP_SGT
, Op0
, Zero
);
1257 Value
*Neg
= Builder
.CreateNeg(Op0
);
1258 Value
*Res
= Builder
.CreateSelect(Cmp
, Op0
, Neg
);
1260 if (CI
.getNumArgOperands() == 3)
1261 Res
= EmitX86Select(Builder
,CI
.getArgOperand(2), Res
, CI
.getArgOperand(1));
1266 static Value
*upgradeIntMinMax(IRBuilder
<> &Builder
, CallInst
&CI
,
1267 ICmpInst::Predicate Pred
) {
1268 Value
*Op0
= CI
.getArgOperand(0);
1269 Value
*Op1
= CI
.getArgOperand(1);
1270 Value
*Cmp
= Builder
.CreateICmp(Pred
, Op0
, Op1
);
1271 Value
*Res
= Builder
.CreateSelect(Cmp
, Op0
, Op1
);
1273 if (CI
.getNumArgOperands() == 4)
1274 Res
= EmitX86Select(Builder
, CI
.getArgOperand(3), Res
, CI
.getArgOperand(2));
1279 static Value
*upgradePMULDQ(IRBuilder
<> &Builder
, CallInst
&CI
, bool IsSigned
) {
1280 Type
*Ty
= CI
.getType();
1282 // Arguments have a vXi32 type so cast to vXi64.
1283 Value
*LHS
= Builder
.CreateBitCast(CI
.getArgOperand(0), Ty
);
1284 Value
*RHS
= Builder
.CreateBitCast(CI
.getArgOperand(1), Ty
);
1287 // Shift left then arithmetic shift right.
1288 Constant
*ShiftAmt
= ConstantInt::get(Ty
, 32);
1289 LHS
= Builder
.CreateShl(LHS
, ShiftAmt
);
1290 LHS
= Builder
.CreateAShr(LHS
, ShiftAmt
);
1291 RHS
= Builder
.CreateShl(RHS
, ShiftAmt
);
1292 RHS
= Builder
.CreateAShr(RHS
, ShiftAmt
);
1294 // Clear the upper bits.
1295 Constant
*Mask
= ConstantInt::get(Ty
, 0xffffffff);
1296 LHS
= Builder
.CreateAnd(LHS
, Mask
);
1297 RHS
= Builder
.CreateAnd(RHS
, Mask
);
1300 Value
*Res
= Builder
.CreateMul(LHS
, RHS
);
1302 if (CI
.getNumArgOperands() == 4)
1303 Res
= EmitX86Select(Builder
, CI
.getArgOperand(3), Res
, CI
.getArgOperand(2));
1308 // Applying mask on vector of i1's and make sure result is at least 8 bits wide.
1309 static Value
*ApplyX86MaskOn1BitsVec(IRBuilder
<> &Builder
, Value
*Vec
,
1311 unsigned NumElts
= Vec
->getType()->getVectorNumElements();
1313 const auto *C
= dyn_cast
<Constant
>(Mask
);
1314 if (!C
|| !C
->isAllOnesValue())
1315 Vec
= Builder
.CreateAnd(Vec
, getX86MaskVec(Builder
, Mask
, NumElts
));
1319 uint32_t Indices
[8];
1320 for (unsigned i
= 0; i
!= NumElts
; ++i
)
1322 for (unsigned i
= NumElts
; i
!= 8; ++i
)
1323 Indices
[i
] = NumElts
+ i
% NumElts
;
1324 Vec
= Builder
.CreateShuffleVector(Vec
,
1325 Constant::getNullValue(Vec
->getType()),
1328 return Builder
.CreateBitCast(Vec
, Builder
.getIntNTy(std::max(NumElts
, 8U)));
1331 static Value
*upgradeMaskedCompare(IRBuilder
<> &Builder
, CallInst
&CI
,
1332 unsigned CC
, bool Signed
) {
1333 Value
*Op0
= CI
.getArgOperand(0);
1334 unsigned NumElts
= Op0
->getType()->getVectorNumElements();
1338 Cmp
= Constant::getNullValue(llvm::VectorType::get(Builder
.getInt1Ty(), NumElts
));
1339 } else if (CC
== 7) {
1340 Cmp
= Constant::getAllOnesValue(llvm::VectorType::get(Builder
.getInt1Ty(), NumElts
));
1342 ICmpInst::Predicate Pred
;
1344 default: llvm_unreachable("Unknown condition code");
1345 case 0: Pred
= ICmpInst::ICMP_EQ
; break;
1346 case 1: Pred
= Signed
? ICmpInst::ICMP_SLT
: ICmpInst::ICMP_ULT
; break;
1347 case 2: Pred
= Signed
? ICmpInst::ICMP_SLE
: ICmpInst::ICMP_ULE
; break;
1348 case 4: Pred
= ICmpInst::ICMP_NE
; break;
1349 case 5: Pred
= Signed
? ICmpInst::ICMP_SGE
: ICmpInst::ICMP_UGE
; break;
1350 case 6: Pred
= Signed
? ICmpInst::ICMP_SGT
: ICmpInst::ICMP_UGT
; break;
1352 Cmp
= Builder
.CreateICmp(Pred
, Op0
, CI
.getArgOperand(1));
1355 Value
*Mask
= CI
.getArgOperand(CI
.getNumArgOperands() - 1);
1357 return ApplyX86MaskOn1BitsVec(Builder
, Cmp
, Mask
);
1360 // Replace a masked intrinsic with an older unmasked intrinsic.
1361 static Value
*UpgradeX86MaskedShift(IRBuilder
<> &Builder
, CallInst
&CI
,
1362 Intrinsic::ID IID
) {
1363 Function
*Intrin
= Intrinsic::getDeclaration(CI
.getModule(), IID
);
1364 Value
*Rep
= Builder
.CreateCall(Intrin
,
1365 { CI
.getArgOperand(0), CI
.getArgOperand(1) });
1366 return EmitX86Select(Builder
, CI
.getArgOperand(3), Rep
, CI
.getArgOperand(2));
1369 static Value
* upgradeMaskedMove(IRBuilder
<> &Builder
, CallInst
&CI
) {
1370 Value
* A
= CI
.getArgOperand(0);
1371 Value
* B
= CI
.getArgOperand(1);
1372 Value
* Src
= CI
.getArgOperand(2);
1373 Value
* Mask
= CI
.getArgOperand(3);
1375 Value
* AndNode
= Builder
.CreateAnd(Mask
, APInt(8, 1));
1376 Value
* Cmp
= Builder
.CreateIsNotNull(AndNode
);
1377 Value
* Extract1
= Builder
.CreateExtractElement(B
, (uint64_t)0);
1378 Value
* Extract2
= Builder
.CreateExtractElement(Src
, (uint64_t)0);
1379 Value
* Select
= Builder
.CreateSelect(Cmp
, Extract1
, Extract2
);
1380 return Builder
.CreateInsertElement(A
, Select
, (uint64_t)0);
1384 static Value
* UpgradeMaskToInt(IRBuilder
<> &Builder
, CallInst
&CI
) {
1385 Value
* Op
= CI
.getArgOperand(0);
1386 Type
* ReturnOp
= CI
.getType();
1387 unsigned NumElts
= CI
.getType()->getVectorNumElements();
1388 Value
*Mask
= getX86MaskVec(Builder
, Op
, NumElts
);
1389 return Builder
.CreateSExt(Mask
, ReturnOp
, "vpmovm2");
1392 // Replace intrinsic with unmasked version and a select.
1393 static bool upgradeAVX512MaskToSelect(StringRef Name
, IRBuilder
<> &Builder
,
1394 CallInst
&CI
, Value
*&Rep
) {
1395 Name
= Name
.substr(12); // Remove avx512.mask.
1397 unsigned VecWidth
= CI
.getType()->getPrimitiveSizeInBits();
1398 unsigned EltWidth
= CI
.getType()->getScalarSizeInBits();
1400 if (Name
.startswith("max.p")) {
1401 if (VecWidth
== 128 && EltWidth
== 32)
1402 IID
= Intrinsic::x86_sse_max_ps
;
1403 else if (VecWidth
== 128 && EltWidth
== 64)
1404 IID
= Intrinsic::x86_sse2_max_pd
;
1405 else if (VecWidth
== 256 && EltWidth
== 32)
1406 IID
= Intrinsic::x86_avx_max_ps_256
;
1407 else if (VecWidth
== 256 && EltWidth
== 64)
1408 IID
= Intrinsic::x86_avx_max_pd_256
;
1410 llvm_unreachable("Unexpected intrinsic");
1411 } else if (Name
.startswith("min.p")) {
1412 if (VecWidth
== 128 && EltWidth
== 32)
1413 IID
= Intrinsic::x86_sse_min_ps
;
1414 else if (VecWidth
== 128 && EltWidth
== 64)
1415 IID
= Intrinsic::x86_sse2_min_pd
;
1416 else if (VecWidth
== 256 && EltWidth
== 32)
1417 IID
= Intrinsic::x86_avx_min_ps_256
;
1418 else if (VecWidth
== 256 && EltWidth
== 64)
1419 IID
= Intrinsic::x86_avx_min_pd_256
;
1421 llvm_unreachable("Unexpected intrinsic");
1422 } else if (Name
.startswith("pshuf.b.")) {
1423 if (VecWidth
== 128)
1424 IID
= Intrinsic::x86_ssse3_pshuf_b_128
;
1425 else if (VecWidth
== 256)
1426 IID
= Intrinsic::x86_avx2_pshuf_b
;
1427 else if (VecWidth
== 512)
1428 IID
= Intrinsic::x86_avx512_pshuf_b_512
;
1430 llvm_unreachable("Unexpected intrinsic");
1431 } else if (Name
.startswith("pmul.hr.sw.")) {
1432 if (VecWidth
== 128)
1433 IID
= Intrinsic::x86_ssse3_pmul_hr_sw_128
;
1434 else if (VecWidth
== 256)
1435 IID
= Intrinsic::x86_avx2_pmul_hr_sw
;
1436 else if (VecWidth
== 512)
1437 IID
= Intrinsic::x86_avx512_pmul_hr_sw_512
;
1439 llvm_unreachable("Unexpected intrinsic");
1440 } else if (Name
.startswith("pmulh.w.")) {
1441 if (VecWidth
== 128)
1442 IID
= Intrinsic::x86_sse2_pmulh_w
;
1443 else if (VecWidth
== 256)
1444 IID
= Intrinsic::x86_avx2_pmulh_w
;
1445 else if (VecWidth
== 512)
1446 IID
= Intrinsic::x86_avx512_pmulh_w_512
;
1448 llvm_unreachable("Unexpected intrinsic");
1449 } else if (Name
.startswith("pmulhu.w.")) {
1450 if (VecWidth
== 128)
1451 IID
= Intrinsic::x86_sse2_pmulhu_w
;
1452 else if (VecWidth
== 256)
1453 IID
= Intrinsic::x86_avx2_pmulhu_w
;
1454 else if (VecWidth
== 512)
1455 IID
= Intrinsic::x86_avx512_pmulhu_w_512
;
1457 llvm_unreachable("Unexpected intrinsic");
1458 } else if (Name
.startswith("pmaddw.d.")) {
1459 if (VecWidth
== 128)
1460 IID
= Intrinsic::x86_sse2_pmadd_wd
;
1461 else if (VecWidth
== 256)
1462 IID
= Intrinsic::x86_avx2_pmadd_wd
;
1463 else if (VecWidth
== 512)
1464 IID
= Intrinsic::x86_avx512_pmaddw_d_512
;
1466 llvm_unreachable("Unexpected intrinsic");
1467 } else if (Name
.startswith("pmaddubs.w.")) {
1468 if (VecWidth
== 128)
1469 IID
= Intrinsic::x86_ssse3_pmadd_ub_sw_128
;
1470 else if (VecWidth
== 256)
1471 IID
= Intrinsic::x86_avx2_pmadd_ub_sw
;
1472 else if (VecWidth
== 512)
1473 IID
= Intrinsic::x86_avx512_pmaddubs_w_512
;
1475 llvm_unreachable("Unexpected intrinsic");
1476 } else if (Name
.startswith("packsswb.")) {
1477 if (VecWidth
== 128)
1478 IID
= Intrinsic::x86_sse2_packsswb_128
;
1479 else if (VecWidth
== 256)
1480 IID
= Intrinsic::x86_avx2_packsswb
;
1481 else if (VecWidth
== 512)
1482 IID
= Intrinsic::x86_avx512_packsswb_512
;
1484 llvm_unreachable("Unexpected intrinsic");
1485 } else if (Name
.startswith("packssdw.")) {
1486 if (VecWidth
== 128)
1487 IID
= Intrinsic::x86_sse2_packssdw_128
;
1488 else if (VecWidth
== 256)
1489 IID
= Intrinsic::x86_avx2_packssdw
;
1490 else if (VecWidth
== 512)
1491 IID
= Intrinsic::x86_avx512_packssdw_512
;
1493 llvm_unreachable("Unexpected intrinsic");
1494 } else if (Name
.startswith("packuswb.")) {
1495 if (VecWidth
== 128)
1496 IID
= Intrinsic::x86_sse2_packuswb_128
;
1497 else if (VecWidth
== 256)
1498 IID
= Intrinsic::x86_avx2_packuswb
;
1499 else if (VecWidth
== 512)
1500 IID
= Intrinsic::x86_avx512_packuswb_512
;
1502 llvm_unreachable("Unexpected intrinsic");
1503 } else if (Name
.startswith("packusdw.")) {
1504 if (VecWidth
== 128)
1505 IID
= Intrinsic::x86_sse41_packusdw
;
1506 else if (VecWidth
== 256)
1507 IID
= Intrinsic::x86_avx2_packusdw
;
1508 else if (VecWidth
== 512)
1509 IID
= Intrinsic::x86_avx512_packusdw_512
;
1511 llvm_unreachable("Unexpected intrinsic");
1512 } else if (Name
.startswith("vpermilvar.")) {
1513 if (VecWidth
== 128 && EltWidth
== 32)
1514 IID
= Intrinsic::x86_avx_vpermilvar_ps
;
1515 else if (VecWidth
== 128 && EltWidth
== 64)
1516 IID
= Intrinsic::x86_avx_vpermilvar_pd
;
1517 else if (VecWidth
== 256 && EltWidth
== 32)
1518 IID
= Intrinsic::x86_avx_vpermilvar_ps_256
;
1519 else if (VecWidth
== 256 && EltWidth
== 64)
1520 IID
= Intrinsic::x86_avx_vpermilvar_pd_256
;
1521 else if (VecWidth
== 512 && EltWidth
== 32)
1522 IID
= Intrinsic::x86_avx512_vpermilvar_ps_512
;
1523 else if (VecWidth
== 512 && EltWidth
== 64)
1524 IID
= Intrinsic::x86_avx512_vpermilvar_pd_512
;
1526 llvm_unreachable("Unexpected intrinsic");
1527 } else if (Name
== "cvtpd2dq.256") {
1528 IID
= Intrinsic::x86_avx_cvt_pd2dq_256
;
1529 } else if (Name
== "cvtpd2ps.256") {
1530 IID
= Intrinsic::x86_avx_cvt_pd2_ps_256
;
1531 } else if (Name
== "cvttpd2dq.256") {
1532 IID
= Intrinsic::x86_avx_cvtt_pd2dq_256
;
1533 } else if (Name
== "cvttps2dq.128") {
1534 IID
= Intrinsic::x86_sse2_cvttps2dq
;
1535 } else if (Name
== "cvttps2dq.256") {
1536 IID
= Intrinsic::x86_avx_cvtt_ps2dq_256
;
1537 } else if (Name
.startswith("permvar.")) {
1538 bool IsFloat
= CI
.getType()->isFPOrFPVectorTy();
1539 if (VecWidth
== 256 && EltWidth
== 32 && IsFloat
)
1540 IID
= Intrinsic::x86_avx2_permps
;
1541 else if (VecWidth
== 256 && EltWidth
== 32 && !IsFloat
)
1542 IID
= Intrinsic::x86_avx2_permd
;
1543 else if (VecWidth
== 256 && EltWidth
== 64 && IsFloat
)
1544 IID
= Intrinsic::x86_avx512_permvar_df_256
;
1545 else if (VecWidth
== 256 && EltWidth
== 64 && !IsFloat
)
1546 IID
= Intrinsic::x86_avx512_permvar_di_256
;
1547 else if (VecWidth
== 512 && EltWidth
== 32 && IsFloat
)
1548 IID
= Intrinsic::x86_avx512_permvar_sf_512
;
1549 else if (VecWidth
== 512 && EltWidth
== 32 && !IsFloat
)
1550 IID
= Intrinsic::x86_avx512_permvar_si_512
;
1551 else if (VecWidth
== 512 && EltWidth
== 64 && IsFloat
)
1552 IID
= Intrinsic::x86_avx512_permvar_df_512
;
1553 else if (VecWidth
== 512 && EltWidth
== 64 && !IsFloat
)
1554 IID
= Intrinsic::x86_avx512_permvar_di_512
;
1555 else if (VecWidth
== 128 && EltWidth
== 16)
1556 IID
= Intrinsic::x86_avx512_permvar_hi_128
;
1557 else if (VecWidth
== 256 && EltWidth
== 16)
1558 IID
= Intrinsic::x86_avx512_permvar_hi_256
;
1559 else if (VecWidth
== 512 && EltWidth
== 16)
1560 IID
= Intrinsic::x86_avx512_permvar_hi_512
;
1561 else if (VecWidth
== 128 && EltWidth
== 8)
1562 IID
= Intrinsic::x86_avx512_permvar_qi_128
;
1563 else if (VecWidth
== 256 && EltWidth
== 8)
1564 IID
= Intrinsic::x86_avx512_permvar_qi_256
;
1565 else if (VecWidth
== 512 && EltWidth
== 8)
1566 IID
= Intrinsic::x86_avx512_permvar_qi_512
;
1568 llvm_unreachable("Unexpected intrinsic");
1569 } else if (Name
.startswith("dbpsadbw.")) {
1570 if (VecWidth
== 128)
1571 IID
= Intrinsic::x86_avx512_dbpsadbw_128
;
1572 else if (VecWidth
== 256)
1573 IID
= Intrinsic::x86_avx512_dbpsadbw_256
;
1574 else if (VecWidth
== 512)
1575 IID
= Intrinsic::x86_avx512_dbpsadbw_512
;
1577 llvm_unreachable("Unexpected intrinsic");
1578 } else if (Name
.startswith("pmultishift.qb.")) {
1579 if (VecWidth
== 128)
1580 IID
= Intrinsic::x86_avx512_pmultishift_qb_128
;
1581 else if (VecWidth
== 256)
1582 IID
= Intrinsic::x86_avx512_pmultishift_qb_256
;
1583 else if (VecWidth
== 512)
1584 IID
= Intrinsic::x86_avx512_pmultishift_qb_512
;
1586 llvm_unreachable("Unexpected intrinsic");
1587 } else if (Name
.startswith("conflict.")) {
1588 if (Name
[9] == 'd' && VecWidth
== 128)
1589 IID
= Intrinsic::x86_avx512_conflict_d_128
;
1590 else if (Name
[9] == 'd' && VecWidth
== 256)
1591 IID
= Intrinsic::x86_avx512_conflict_d_256
;
1592 else if (Name
[9] == 'd' && VecWidth
== 512)
1593 IID
= Intrinsic::x86_avx512_conflict_d_512
;
1594 else if (Name
[9] == 'q' && VecWidth
== 128)
1595 IID
= Intrinsic::x86_avx512_conflict_q_128
;
1596 else if (Name
[9] == 'q' && VecWidth
== 256)
1597 IID
= Intrinsic::x86_avx512_conflict_q_256
;
1598 else if (Name
[9] == 'q' && VecWidth
== 512)
1599 IID
= Intrinsic::x86_avx512_conflict_q_512
;
1601 llvm_unreachable("Unexpected intrinsic");
1602 } else if (Name
.startswith("pavg.")) {
1603 if (Name
[5] == 'b' && VecWidth
== 128)
1604 IID
= Intrinsic::x86_sse2_pavg_b
;
1605 else if (Name
[5] == 'b' && VecWidth
== 256)
1606 IID
= Intrinsic::x86_avx2_pavg_b
;
1607 else if (Name
[5] == 'b' && VecWidth
== 512)
1608 IID
= Intrinsic::x86_avx512_pavg_b_512
;
1609 else if (Name
[5] == 'w' && VecWidth
== 128)
1610 IID
= Intrinsic::x86_sse2_pavg_w
;
1611 else if (Name
[5] == 'w' && VecWidth
== 256)
1612 IID
= Intrinsic::x86_avx2_pavg_w
;
1613 else if (Name
[5] == 'w' && VecWidth
== 512)
1614 IID
= Intrinsic::x86_avx512_pavg_w_512
;
1616 llvm_unreachable("Unexpected intrinsic");
1620 SmallVector
<Value
*, 4> Args(CI
.arg_operands().begin(),
1621 CI
.arg_operands().end());
1624 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(CI
.getModule(), IID
),
1626 unsigned NumArgs
= CI
.getNumArgOperands();
1627 Rep
= EmitX86Select(Builder
, CI
.getArgOperand(NumArgs
- 1), Rep
,
1628 CI
.getArgOperand(NumArgs
- 2));
1632 /// Upgrade comment in call to inline asm that represents an objc retain release
1634 void llvm::UpgradeInlineAsmString(std::string
*AsmStr
) {
1636 if (AsmStr
->find("mov\tfp") == 0 &&
1637 AsmStr
->find("objc_retainAutoreleaseReturnValue") != std::string::npos
&&
1638 (Pos
= AsmStr
->find("# marker")) != std::string::npos
) {
1639 AsmStr
->replace(Pos
, 1, ";");
1644 /// Upgrade a call to an old intrinsic. All argument and return casting must be
1645 /// provided to seamlessly integrate with existing context.
1646 void llvm::UpgradeIntrinsicCall(CallInst
*CI
, Function
*NewFn
) {
1647 Function
*F
= CI
->getCalledFunction();
1648 LLVMContext
&C
= CI
->getContext();
1649 IRBuilder
<> Builder(C
);
1650 Builder
.SetInsertPoint(CI
->getParent(), CI
->getIterator());
1652 assert(F
&& "Intrinsic call is not direct?");
1655 // Get the Function's name.
1656 StringRef Name
= F
->getName();
1658 assert(Name
.startswith("llvm.") && "Intrinsic doesn't start with 'llvm.'");
1659 Name
= Name
.substr(5);
1661 bool IsX86
= Name
.startswith("x86.");
1663 Name
= Name
.substr(4);
1664 bool IsNVVM
= Name
.startswith("nvvm.");
1666 Name
= Name
.substr(5);
1668 if (IsX86
&& Name
.startswith("sse4a.movnt.")) {
1669 Module
*M
= F
->getParent();
1670 SmallVector
<Metadata
*, 1> Elts
;
1672 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C
), 1)));
1673 MDNode
*Node
= MDNode::get(C
, Elts
);
1675 Value
*Arg0
= CI
->getArgOperand(0);
1676 Value
*Arg1
= CI
->getArgOperand(1);
1678 // Nontemporal (unaligned) store of the 0'th element of the float/double
1680 Type
*SrcEltTy
= cast
<VectorType
>(Arg1
->getType())->getElementType();
1681 PointerType
*EltPtrTy
= PointerType::getUnqual(SrcEltTy
);
1682 Value
*Addr
= Builder
.CreateBitCast(Arg0
, EltPtrTy
, "cast");
1684 Builder
.CreateExtractElement(Arg1
, (uint64_t)0, "extractelement");
1686 StoreInst
*SI
= Builder
.CreateAlignedStore(Extract
, Addr
, 1);
1687 SI
->setMetadata(M
->getMDKindID("nontemporal"), Node
);
1689 // Remove intrinsic.
1690 CI
->eraseFromParent();
1694 if (IsX86
&& (Name
.startswith("avx.movnt.") ||
1695 Name
.startswith("avx512.storent."))) {
1696 Module
*M
= F
->getParent();
1697 SmallVector
<Metadata
*, 1> Elts
;
1699 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C
), 1)));
1700 MDNode
*Node
= MDNode::get(C
, Elts
);
1702 Value
*Arg0
= CI
->getArgOperand(0);
1703 Value
*Arg1
= CI
->getArgOperand(1);
1705 // Convert the type of the pointer to a pointer to the stored type.
1706 Value
*BC
= Builder
.CreateBitCast(Arg0
,
1707 PointerType::getUnqual(Arg1
->getType()),
1709 VectorType
*VTy
= cast
<VectorType
>(Arg1
->getType());
1710 StoreInst
*SI
= Builder
.CreateAlignedStore(Arg1
, BC
,
1711 VTy
->getBitWidth() / 8);
1712 SI
->setMetadata(M
->getMDKindID("nontemporal"), Node
);
1714 // Remove intrinsic.
1715 CI
->eraseFromParent();
1719 if (IsX86
&& Name
== "sse2.storel.dq") {
1720 Value
*Arg0
= CI
->getArgOperand(0);
1721 Value
*Arg1
= CI
->getArgOperand(1);
1723 Type
*NewVecTy
= VectorType::get(Type::getInt64Ty(C
), 2);
1724 Value
*BC0
= Builder
.CreateBitCast(Arg1
, NewVecTy
, "cast");
1725 Value
*Elt
= Builder
.CreateExtractElement(BC0
, (uint64_t)0);
1726 Value
*BC
= Builder
.CreateBitCast(Arg0
,
1727 PointerType::getUnqual(Elt
->getType()),
1729 Builder
.CreateAlignedStore(Elt
, BC
, 1);
1731 // Remove intrinsic.
1732 CI
->eraseFromParent();
1736 if (IsX86
&& (Name
.startswith("sse.storeu.") ||
1737 Name
.startswith("sse2.storeu.") ||
1738 Name
.startswith("avx.storeu."))) {
1739 Value
*Arg0
= CI
->getArgOperand(0);
1740 Value
*Arg1
= CI
->getArgOperand(1);
1742 Arg0
= Builder
.CreateBitCast(Arg0
,
1743 PointerType::getUnqual(Arg1
->getType()),
1745 Builder
.CreateAlignedStore(Arg1
, Arg0
, 1);
1747 // Remove intrinsic.
1748 CI
->eraseFromParent();
1752 if (IsX86
&& Name
== "avx512.mask.store.ss") {
1753 Value
*Mask
= Builder
.CreateAnd(CI
->getArgOperand(2), Builder
.getInt8(1));
1754 UpgradeMaskedStore(Builder
, CI
->getArgOperand(0), CI
->getArgOperand(1),
1757 // Remove intrinsic.
1758 CI
->eraseFromParent();
1762 if (IsX86
&& (Name
.startswith("avx512.mask.store"))) {
1763 // "avx512.mask.storeu." or "avx512.mask.store."
1764 bool Aligned
= Name
[17] != 'u'; // "avx512.mask.storeu".
1765 UpgradeMaskedStore(Builder
, CI
->getArgOperand(0), CI
->getArgOperand(1),
1766 CI
->getArgOperand(2), Aligned
);
1768 // Remove intrinsic.
1769 CI
->eraseFromParent();
1774 // Upgrade packed integer vector compare intrinsics to compare instructions.
1775 if (IsX86
&& (Name
.startswith("sse2.pcmp") ||
1776 Name
.startswith("avx2.pcmp"))) {
1777 // "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt."
1778 bool CmpEq
= Name
[9] == 'e';
1779 Rep
= Builder
.CreateICmp(CmpEq
? ICmpInst::ICMP_EQ
: ICmpInst::ICMP_SGT
,
1780 CI
->getArgOperand(0), CI
->getArgOperand(1));
1781 Rep
= Builder
.CreateSExt(Rep
, CI
->getType(), "");
1782 } else if (IsX86
&& (Name
.startswith("avx512.broadcastm"))) {
1783 Type
*ExtTy
= Type::getInt32Ty(C
);
1784 if (CI
->getOperand(0)->getType()->isIntegerTy(8))
1785 ExtTy
= Type::getInt64Ty(C
);
1786 unsigned NumElts
= CI
->getType()->getPrimitiveSizeInBits() /
1787 ExtTy
->getPrimitiveSizeInBits();
1788 Rep
= Builder
.CreateZExt(CI
->getArgOperand(0), ExtTy
);
1789 Rep
= Builder
.CreateVectorSplat(NumElts
, Rep
);
1790 } else if (IsX86
&& (Name
== "sse.sqrt.ss" ||
1791 Name
== "sse2.sqrt.sd")) {
1792 Value
*Vec
= CI
->getArgOperand(0);
1793 Value
*Elt0
= Builder
.CreateExtractElement(Vec
, (uint64_t)0);
1794 Function
*Intr
= Intrinsic::getDeclaration(F
->getParent(),
1795 Intrinsic::sqrt
, Elt0
->getType());
1796 Elt0
= Builder
.CreateCall(Intr
, Elt0
);
1797 Rep
= Builder
.CreateInsertElement(Vec
, Elt0
, (uint64_t)0);
1798 } else if (IsX86
&& (Name
.startswith("avx.sqrt.p") ||
1799 Name
.startswith("sse2.sqrt.p") ||
1800 Name
.startswith("sse.sqrt.p"))) {
1801 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(F
->getParent(),
1804 {CI
->getArgOperand(0)});
1805 } else if (IsX86
&& (Name
.startswith("avx512.mask.sqrt.p"))) {
1806 if (CI
->getNumArgOperands() == 4 &&
1807 (!isa
<ConstantInt
>(CI
->getArgOperand(3)) ||
1808 cast
<ConstantInt
>(CI
->getArgOperand(3))->getZExtValue() != 4)) {
1809 Intrinsic::ID IID
= Name
[18] == 's' ? Intrinsic::x86_avx512_sqrt_ps_512
1810 : Intrinsic::x86_avx512_sqrt_pd_512
;
1812 Value
*Args
[] = { CI
->getArgOperand(0), CI
->getArgOperand(3) };
1813 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(CI
->getModule(),
1816 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(F
->getParent(),
1819 {CI
->getArgOperand(0)});
1821 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(2), Rep
,
1822 CI
->getArgOperand(1));
1823 } else if (IsX86
&& (Name
.startswith("avx512.ptestm") ||
1824 Name
.startswith("avx512.ptestnm"))) {
1825 Value
*Op0
= CI
->getArgOperand(0);
1826 Value
*Op1
= CI
->getArgOperand(1);
1827 Value
*Mask
= CI
->getArgOperand(2);
1828 Rep
= Builder
.CreateAnd(Op0
, Op1
);
1829 llvm::Type
*Ty
= Op0
->getType();
1830 Value
*Zero
= llvm::Constant::getNullValue(Ty
);
1831 ICmpInst::Predicate Pred
=
1832 Name
.startswith("avx512.ptestm") ? ICmpInst::ICMP_NE
: ICmpInst::ICMP_EQ
;
1833 Rep
= Builder
.CreateICmp(Pred
, Rep
, Zero
);
1834 Rep
= ApplyX86MaskOn1BitsVec(Builder
, Rep
, Mask
);
1835 } else if (IsX86
&& (Name
.startswith("avx512.mask.pbroadcast"))){
1837 CI
->getArgOperand(1)->getType()->getVectorNumElements();
1838 Rep
= Builder
.CreateVectorSplat(NumElts
, CI
->getArgOperand(0));
1839 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(2), Rep
,
1840 CI
->getArgOperand(1));
1841 } else if (IsX86
&& (Name
.startswith("avx512.kunpck"))) {
1842 unsigned NumElts
= CI
->getType()->getScalarSizeInBits();
1843 Value
*LHS
= getX86MaskVec(Builder
, CI
->getArgOperand(0), NumElts
);
1844 Value
*RHS
= getX86MaskVec(Builder
, CI
->getArgOperand(1), NumElts
);
1845 uint32_t Indices
[64];
1846 for (unsigned i
= 0; i
!= NumElts
; ++i
)
1849 // First extract half of each vector. This gives better codegen than
1850 // doing it in a single shuffle.
1851 LHS
= Builder
.CreateShuffleVector(LHS
, LHS
,
1852 makeArrayRef(Indices
, NumElts
/ 2));
1853 RHS
= Builder
.CreateShuffleVector(RHS
, RHS
,
1854 makeArrayRef(Indices
, NumElts
/ 2));
1855 // Concat the vectors.
1856 // NOTE: Operands have to be swapped to match intrinsic definition.
1857 Rep
= Builder
.CreateShuffleVector(RHS
, LHS
,
1858 makeArrayRef(Indices
, NumElts
));
1859 Rep
= Builder
.CreateBitCast(Rep
, CI
->getType());
1860 } else if (IsX86
&& Name
== "avx512.kand.w") {
1861 Value
*LHS
= getX86MaskVec(Builder
, CI
->getArgOperand(0), 16);
1862 Value
*RHS
= getX86MaskVec(Builder
, CI
->getArgOperand(1), 16);
1863 Rep
= Builder
.CreateAnd(LHS
, RHS
);
1864 Rep
= Builder
.CreateBitCast(Rep
, CI
->getType());
1865 } else if (IsX86
&& Name
== "avx512.kandn.w") {
1866 Value
*LHS
= getX86MaskVec(Builder
, CI
->getArgOperand(0), 16);
1867 Value
*RHS
= getX86MaskVec(Builder
, CI
->getArgOperand(1), 16);
1868 LHS
= Builder
.CreateNot(LHS
);
1869 Rep
= Builder
.CreateAnd(LHS
, RHS
);
1870 Rep
= Builder
.CreateBitCast(Rep
, CI
->getType());
1871 } else if (IsX86
&& Name
== "avx512.kor.w") {
1872 Value
*LHS
= getX86MaskVec(Builder
, CI
->getArgOperand(0), 16);
1873 Value
*RHS
= getX86MaskVec(Builder
, CI
->getArgOperand(1), 16);
1874 Rep
= Builder
.CreateOr(LHS
, RHS
);
1875 Rep
= Builder
.CreateBitCast(Rep
, CI
->getType());
1876 } else if (IsX86
&& Name
== "avx512.kxor.w") {
1877 Value
*LHS
= getX86MaskVec(Builder
, CI
->getArgOperand(0), 16);
1878 Value
*RHS
= getX86MaskVec(Builder
, CI
->getArgOperand(1), 16);
1879 Rep
= Builder
.CreateXor(LHS
, RHS
);
1880 Rep
= Builder
.CreateBitCast(Rep
, CI
->getType());
1881 } else if (IsX86
&& Name
== "avx512.kxnor.w") {
1882 Value
*LHS
= getX86MaskVec(Builder
, CI
->getArgOperand(0), 16);
1883 Value
*RHS
= getX86MaskVec(Builder
, CI
->getArgOperand(1), 16);
1884 LHS
= Builder
.CreateNot(LHS
);
1885 Rep
= Builder
.CreateXor(LHS
, RHS
);
1886 Rep
= Builder
.CreateBitCast(Rep
, CI
->getType());
1887 } else if (IsX86
&& Name
== "avx512.knot.w") {
1888 Rep
= getX86MaskVec(Builder
, CI
->getArgOperand(0), 16);
1889 Rep
= Builder
.CreateNot(Rep
);
1890 Rep
= Builder
.CreateBitCast(Rep
, CI
->getType());
1892 (Name
== "avx512.kortestz.w" || Name
== "avx512.kortestc.w")) {
1893 Value
*LHS
= getX86MaskVec(Builder
, CI
->getArgOperand(0), 16);
1894 Value
*RHS
= getX86MaskVec(Builder
, CI
->getArgOperand(1), 16);
1895 Rep
= Builder
.CreateOr(LHS
, RHS
);
1896 Rep
= Builder
.CreateBitCast(Rep
, Builder
.getInt16Ty());
1898 if (Name
[14] == 'c')
1899 C
= ConstantInt::getAllOnesValue(Builder
.getInt16Ty());
1901 C
= ConstantInt::getNullValue(Builder
.getInt16Ty());
1902 Rep
= Builder
.CreateICmpEQ(Rep
, C
);
1903 Rep
= Builder
.CreateZExt(Rep
, Builder
.getInt32Ty());
1904 } else if (IsX86
&& (Name
== "sse.add.ss" || Name
== "sse2.add.sd" ||
1905 Name
== "sse.sub.ss" || Name
== "sse2.sub.sd" ||
1906 Name
== "sse.mul.ss" || Name
== "sse2.mul.sd" ||
1907 Name
== "sse.div.ss" || Name
== "sse2.div.sd")) {
1908 Type
*I32Ty
= Type::getInt32Ty(C
);
1909 Value
*Elt0
= Builder
.CreateExtractElement(CI
->getArgOperand(0),
1910 ConstantInt::get(I32Ty
, 0));
1911 Value
*Elt1
= Builder
.CreateExtractElement(CI
->getArgOperand(1),
1912 ConstantInt::get(I32Ty
, 0));
1914 if (Name
.contains(".add."))
1915 EltOp
= Builder
.CreateFAdd(Elt0
, Elt1
);
1916 else if (Name
.contains(".sub."))
1917 EltOp
= Builder
.CreateFSub(Elt0
, Elt1
);
1918 else if (Name
.contains(".mul."))
1919 EltOp
= Builder
.CreateFMul(Elt0
, Elt1
);
1921 EltOp
= Builder
.CreateFDiv(Elt0
, Elt1
);
1922 Rep
= Builder
.CreateInsertElement(CI
->getArgOperand(0), EltOp
,
1923 ConstantInt::get(I32Ty
, 0));
1924 } else if (IsX86
&& Name
.startswith("avx512.mask.pcmp")) {
1925 // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt."
1926 bool CmpEq
= Name
[16] == 'e';
1927 Rep
= upgradeMaskedCompare(Builder
, *CI
, CmpEq
? 0 : 6, true);
1928 } else if (IsX86
&& Name
.startswith("avx512.mask.vpshufbitqmb.")) {
1929 Type
*OpTy
= CI
->getArgOperand(0)->getType();
1930 unsigned VecWidth
= OpTy
->getPrimitiveSizeInBits();
1933 default: llvm_unreachable("Unexpected intrinsic");
1934 case 128: IID
= Intrinsic::x86_avx512_vpshufbitqmb_128
; break;
1935 case 256: IID
= Intrinsic::x86_avx512_vpshufbitqmb_256
; break;
1936 case 512: IID
= Intrinsic::x86_avx512_vpshufbitqmb_512
; break;
1939 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(F
->getParent(), IID
),
1940 { CI
->getOperand(0), CI
->getArgOperand(1) });
1941 Rep
= ApplyX86MaskOn1BitsVec(Builder
, Rep
, CI
->getArgOperand(2));
1942 } else if (IsX86
&& Name
.startswith("avx512.mask.fpclass.p")) {
1943 Type
*OpTy
= CI
->getArgOperand(0)->getType();
1944 unsigned VecWidth
= OpTy
->getPrimitiveSizeInBits();
1945 unsigned EltWidth
= OpTy
->getScalarSizeInBits();
1947 if (VecWidth
== 128 && EltWidth
== 32)
1948 IID
= Intrinsic::x86_avx512_fpclass_ps_128
;
1949 else if (VecWidth
== 256 && EltWidth
== 32)
1950 IID
= Intrinsic::x86_avx512_fpclass_ps_256
;
1951 else if (VecWidth
== 512 && EltWidth
== 32)
1952 IID
= Intrinsic::x86_avx512_fpclass_ps_512
;
1953 else if (VecWidth
== 128 && EltWidth
== 64)
1954 IID
= Intrinsic::x86_avx512_fpclass_pd_128
;
1955 else if (VecWidth
== 256 && EltWidth
== 64)
1956 IID
= Intrinsic::x86_avx512_fpclass_pd_256
;
1957 else if (VecWidth
== 512 && EltWidth
== 64)
1958 IID
= Intrinsic::x86_avx512_fpclass_pd_512
;
1960 llvm_unreachable("Unexpected intrinsic");
1962 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(F
->getParent(), IID
),
1963 { CI
->getOperand(0), CI
->getArgOperand(1) });
1964 Rep
= ApplyX86MaskOn1BitsVec(Builder
, Rep
, CI
->getArgOperand(2));
1965 } else if (IsX86
&& Name
.startswith("avx512.mask.cmp.p")) {
1966 Type
*OpTy
= CI
->getArgOperand(0)->getType();
1967 unsigned VecWidth
= OpTy
->getPrimitiveSizeInBits();
1968 unsigned EltWidth
= OpTy
->getScalarSizeInBits();
1970 if (VecWidth
== 128 && EltWidth
== 32)
1971 IID
= Intrinsic::x86_avx512_cmp_ps_128
;
1972 else if (VecWidth
== 256 && EltWidth
== 32)
1973 IID
= Intrinsic::x86_avx512_cmp_ps_256
;
1974 else if (VecWidth
== 512 && EltWidth
== 32)
1975 IID
= Intrinsic::x86_avx512_cmp_ps_512
;
1976 else if (VecWidth
== 128 && EltWidth
== 64)
1977 IID
= Intrinsic::x86_avx512_cmp_pd_128
;
1978 else if (VecWidth
== 256 && EltWidth
== 64)
1979 IID
= Intrinsic::x86_avx512_cmp_pd_256
;
1980 else if (VecWidth
== 512 && EltWidth
== 64)
1981 IID
= Intrinsic::x86_avx512_cmp_pd_512
;
1983 llvm_unreachable("Unexpected intrinsic");
1985 SmallVector
<Value
*, 4> Args
;
1986 Args
.push_back(CI
->getArgOperand(0));
1987 Args
.push_back(CI
->getArgOperand(1));
1988 Args
.push_back(CI
->getArgOperand(2));
1989 if (CI
->getNumArgOperands() == 5)
1990 Args
.push_back(CI
->getArgOperand(4));
1992 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(F
->getParent(), IID
),
1994 Rep
= ApplyX86MaskOn1BitsVec(Builder
, Rep
, CI
->getArgOperand(3));
1995 } else if (IsX86
&& Name
.startswith("avx512.mask.cmp.") &&
1997 // Integer compare intrinsics.
1998 unsigned Imm
= cast
<ConstantInt
>(CI
->getArgOperand(2))->getZExtValue();
1999 Rep
= upgradeMaskedCompare(Builder
, *CI
, Imm
, true);
2000 } else if (IsX86
&& Name
.startswith("avx512.mask.ucmp.")) {
2001 unsigned Imm
= cast
<ConstantInt
>(CI
->getArgOperand(2))->getZExtValue();
2002 Rep
= upgradeMaskedCompare(Builder
, *CI
, Imm
, false);
2003 } else if (IsX86
&& (Name
.startswith("avx512.cvtb2mask.") ||
2004 Name
.startswith("avx512.cvtw2mask.") ||
2005 Name
.startswith("avx512.cvtd2mask.") ||
2006 Name
.startswith("avx512.cvtq2mask."))) {
2007 Value
*Op
= CI
->getArgOperand(0);
2008 Value
*Zero
= llvm::Constant::getNullValue(Op
->getType());
2009 Rep
= Builder
.CreateICmp(ICmpInst::ICMP_SLT
, Op
, Zero
);
2010 Rep
= ApplyX86MaskOn1BitsVec(Builder
, Rep
, nullptr);
2011 } else if(IsX86
&& (Name
== "ssse3.pabs.b.128" ||
2012 Name
== "ssse3.pabs.w.128" ||
2013 Name
== "ssse3.pabs.d.128" ||
2014 Name
.startswith("avx2.pabs") ||
2015 Name
.startswith("avx512.mask.pabs"))) {
2016 Rep
= upgradeAbs(Builder
, *CI
);
2017 } else if (IsX86
&& (Name
== "sse41.pmaxsb" ||
2018 Name
== "sse2.pmaxs.w" ||
2019 Name
== "sse41.pmaxsd" ||
2020 Name
.startswith("avx2.pmaxs") ||
2021 Name
.startswith("avx512.mask.pmaxs"))) {
2022 Rep
= upgradeIntMinMax(Builder
, *CI
, ICmpInst::ICMP_SGT
);
2023 } else if (IsX86
&& (Name
== "sse2.pmaxu.b" ||
2024 Name
== "sse41.pmaxuw" ||
2025 Name
== "sse41.pmaxud" ||
2026 Name
.startswith("avx2.pmaxu") ||
2027 Name
.startswith("avx512.mask.pmaxu"))) {
2028 Rep
= upgradeIntMinMax(Builder
, *CI
, ICmpInst::ICMP_UGT
);
2029 } else if (IsX86
&& (Name
== "sse41.pminsb" ||
2030 Name
== "sse2.pmins.w" ||
2031 Name
== "sse41.pminsd" ||
2032 Name
.startswith("avx2.pmins") ||
2033 Name
.startswith("avx512.mask.pmins"))) {
2034 Rep
= upgradeIntMinMax(Builder
, *CI
, ICmpInst::ICMP_SLT
);
2035 } else if (IsX86
&& (Name
== "sse2.pminu.b" ||
2036 Name
== "sse41.pminuw" ||
2037 Name
== "sse41.pminud" ||
2038 Name
.startswith("avx2.pminu") ||
2039 Name
.startswith("avx512.mask.pminu"))) {
2040 Rep
= upgradeIntMinMax(Builder
, *CI
, ICmpInst::ICMP_ULT
);
2041 } else if (IsX86
&& (Name
== "sse2.pmulu.dq" ||
2042 Name
== "avx2.pmulu.dq" ||
2043 Name
== "avx512.pmulu.dq.512" ||
2044 Name
.startswith("avx512.mask.pmulu.dq."))) {
2045 Rep
= upgradePMULDQ(Builder
, *CI
, /*Signed*/false);
2046 } else if (IsX86
&& (Name
== "sse41.pmuldq" ||
2047 Name
== "avx2.pmul.dq" ||
2048 Name
== "avx512.pmul.dq.512" ||
2049 Name
.startswith("avx512.mask.pmul.dq."))) {
2050 Rep
= upgradePMULDQ(Builder
, *CI
, /*Signed*/true);
2051 } else if (IsX86
&& (Name
== "sse.cvtsi2ss" ||
2052 Name
== "sse2.cvtsi2sd" ||
2053 Name
== "sse.cvtsi642ss" ||
2054 Name
== "sse2.cvtsi642sd")) {
2055 Rep
= Builder
.CreateSIToFP(CI
->getArgOperand(1),
2056 CI
->getType()->getVectorElementType());
2057 Rep
= Builder
.CreateInsertElement(CI
->getArgOperand(0), Rep
, (uint64_t)0);
2058 } else if (IsX86
&& Name
== "avx512.cvtusi2sd") {
2059 Rep
= Builder
.CreateUIToFP(CI
->getArgOperand(1),
2060 CI
->getType()->getVectorElementType());
2061 Rep
= Builder
.CreateInsertElement(CI
->getArgOperand(0), Rep
, (uint64_t)0);
2062 } else if (IsX86
&& Name
== "sse2.cvtss2sd") {
2063 Rep
= Builder
.CreateExtractElement(CI
->getArgOperand(1), (uint64_t)0);
2064 Rep
= Builder
.CreateFPExt(Rep
, CI
->getType()->getVectorElementType());
2065 Rep
= Builder
.CreateInsertElement(CI
->getArgOperand(0), Rep
, (uint64_t)0);
2066 } else if (IsX86
&& (Name
== "sse2.cvtdq2pd" ||
2067 Name
== "sse2.cvtdq2ps" ||
2068 Name
== "avx.cvtdq2.pd.256" ||
2069 Name
== "avx.cvtdq2.ps.256" ||
2070 Name
.startswith("avx512.mask.cvtdq2pd.") ||
2071 Name
.startswith("avx512.mask.cvtudq2pd.") ||
2072 Name
.startswith("avx512.mask.cvtdq2ps.") ||
2073 Name
.startswith("avx512.mask.cvtudq2ps.") ||
2074 Name
.startswith("avx512.mask.cvtqq2pd.") ||
2075 Name
.startswith("avx512.mask.cvtuqq2pd.") ||
2076 Name
== "avx512.mask.cvtqq2ps.256" ||
2077 Name
== "avx512.mask.cvtqq2ps.512" ||
2078 Name
== "avx512.mask.cvtuqq2ps.256" ||
2079 Name
== "avx512.mask.cvtuqq2ps.512" ||
2080 Name
== "sse2.cvtps2pd" ||
2081 Name
== "avx.cvt.ps2.pd.256" ||
2082 Name
== "avx512.mask.cvtps2pd.128" ||
2083 Name
== "avx512.mask.cvtps2pd.256")) {
2084 Type
*DstTy
= CI
->getType();
2085 Rep
= CI
->getArgOperand(0);
2086 Type
*SrcTy
= Rep
->getType();
2088 unsigned NumDstElts
= DstTy
->getVectorNumElements();
2089 if (NumDstElts
< SrcTy
->getVectorNumElements()) {
2090 assert(NumDstElts
== 2 && "Unexpected vector size");
2091 uint32_t ShuffleMask
[2] = { 0, 1 };
2092 Rep
= Builder
.CreateShuffleVector(Rep
, Rep
, ShuffleMask
);
2095 bool IsPS2PD
= SrcTy
->getVectorElementType()->isFloatTy();
2096 bool IsUnsigned
= (StringRef::npos
!= Name
.find("cvtu"));
2098 Rep
= Builder
.CreateFPExt(Rep
, DstTy
, "cvtps2pd");
2099 else if (CI
->getNumArgOperands() == 4 &&
2100 (!isa
<ConstantInt
>(CI
->getArgOperand(3)) ||
2101 cast
<ConstantInt
>(CI
->getArgOperand(3))->getZExtValue() != 4)) {
2102 Intrinsic::ID IID
= IsUnsigned
? Intrinsic::x86_avx512_uitofp_round
2103 : Intrinsic::x86_avx512_sitofp_round
;
2104 Function
*F
= Intrinsic::getDeclaration(CI
->getModule(), IID
,
2106 Rep
= Builder
.CreateCall(F
, { Rep
, CI
->getArgOperand(3) });
2108 Rep
= IsUnsigned
? Builder
.CreateUIToFP(Rep
, DstTy
, "cvt")
2109 : Builder
.CreateSIToFP(Rep
, DstTy
, "cvt");
2112 if (CI
->getNumArgOperands() >= 3)
2113 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(2), Rep
,
2114 CI
->getArgOperand(1));
2115 } else if (IsX86
&& (Name
.startswith("avx512.mask.loadu."))) {
2116 Rep
= UpgradeMaskedLoad(Builder
, CI
->getArgOperand(0),
2117 CI
->getArgOperand(1), CI
->getArgOperand(2),
2119 } else if (IsX86
&& (Name
.startswith("avx512.mask.load."))) {
2120 Rep
= UpgradeMaskedLoad(Builder
, CI
->getArgOperand(0),
2121 CI
->getArgOperand(1),CI
->getArgOperand(2),
2123 } else if (IsX86
&& Name
.startswith("avx512.mask.expand.load.")) {
2124 Type
*ResultTy
= CI
->getType();
2125 Type
*PtrTy
= ResultTy
->getVectorElementType();
2127 // Cast the pointer to element type.
2128 Value
*Ptr
= Builder
.CreateBitCast(CI
->getOperand(0),
2129 llvm::PointerType::getUnqual(PtrTy
));
2131 Value
*MaskVec
= getX86MaskVec(Builder
, CI
->getArgOperand(2),
2132 ResultTy
->getVectorNumElements());
2134 Function
*ELd
= Intrinsic::getDeclaration(F
->getParent(),
2135 Intrinsic::masked_expandload
,
2137 Rep
= Builder
.CreateCall(ELd
, { Ptr
, MaskVec
, CI
->getOperand(1) });
2138 } else if (IsX86
&& Name
.startswith("avx512.mask.compress.store.")) {
2139 Type
*ResultTy
= CI
->getArgOperand(1)->getType();
2140 Type
*PtrTy
= ResultTy
->getVectorElementType();
2142 // Cast the pointer to element type.
2143 Value
*Ptr
= Builder
.CreateBitCast(CI
->getOperand(0),
2144 llvm::PointerType::getUnqual(PtrTy
));
2146 Value
*MaskVec
= getX86MaskVec(Builder
, CI
->getArgOperand(2),
2147 ResultTy
->getVectorNumElements());
2149 Function
*CSt
= Intrinsic::getDeclaration(F
->getParent(),
2150 Intrinsic::masked_compressstore
,
2152 Rep
= Builder
.CreateCall(CSt
, { CI
->getArgOperand(1), Ptr
, MaskVec
});
2153 } else if (IsX86
&& (Name
.startswith("avx512.mask.compress.") ||
2154 Name
.startswith("avx512.mask.expand."))) {
2155 Type
*ResultTy
= CI
->getType();
2157 Value
*MaskVec
= getX86MaskVec(Builder
, CI
->getArgOperand(2),
2158 ResultTy
->getVectorNumElements());
2160 bool IsCompress
= Name
[12] == 'c';
2161 Intrinsic::ID IID
= IsCompress
? Intrinsic::x86_avx512_mask_compress
2162 : Intrinsic::x86_avx512_mask_expand
;
2163 Function
*Intr
= Intrinsic::getDeclaration(F
->getParent(), IID
, ResultTy
);
2164 Rep
= Builder
.CreateCall(Intr
, { CI
->getOperand(0), CI
->getOperand(1),
2166 } else if (IsX86
&& Name
.startswith("xop.vpcom")) {
2168 if (Name
.endswith("ub") || Name
.endswith("uw") || Name
.endswith("ud") ||
2169 Name
.endswith("uq"))
2171 else if (Name
.endswith("b") || Name
.endswith("w") || Name
.endswith("d") ||
2175 llvm_unreachable("Unknown suffix");
2178 if (CI
->getNumArgOperands() == 3) {
2179 Imm
= cast
<ConstantInt
>(CI
->getArgOperand(2))->getZExtValue();
2181 Name
= Name
.substr(9); // strip off "xop.vpcom"
2182 if (Name
.startswith("lt"))
2184 else if (Name
.startswith("le"))
2186 else if (Name
.startswith("gt"))
2188 else if (Name
.startswith("ge"))
2190 else if (Name
.startswith("eq"))
2192 else if (Name
.startswith("ne"))
2194 else if (Name
.startswith("false"))
2196 else if (Name
.startswith("true"))
2199 llvm_unreachable("Unknown condition");
2202 Rep
= upgradeX86vpcom(Builder
, *CI
, Imm
, IsSigned
);
2203 } else if (IsX86
&& Name
.startswith("xop.vpcmov")) {
2204 Value
*Sel
= CI
->getArgOperand(2);
2205 Value
*NotSel
= Builder
.CreateNot(Sel
);
2206 Value
*Sel0
= Builder
.CreateAnd(CI
->getArgOperand(0), Sel
);
2207 Value
*Sel1
= Builder
.CreateAnd(CI
->getArgOperand(1), NotSel
);
2208 Rep
= Builder
.CreateOr(Sel0
, Sel1
);
2209 } else if (IsX86
&& (Name
.startswith("xop.vprot") ||
2210 Name
.startswith("avx512.prol") ||
2211 Name
.startswith("avx512.mask.prol"))) {
2212 Rep
= upgradeX86Rotate(Builder
, *CI
, false);
2213 } else if (IsX86
&& (Name
.startswith("avx512.pror") ||
2214 Name
.startswith("avx512.mask.pror"))) {
2215 Rep
= upgradeX86Rotate(Builder
, *CI
, true);
2216 } else if (IsX86
&& (Name
.startswith("avx512.vpshld.") ||
2217 Name
.startswith("avx512.mask.vpshld") ||
2218 Name
.startswith("avx512.maskz.vpshld"))) {
2219 bool ZeroMask
= Name
[11] == 'z';
2220 Rep
= upgradeX86ConcatShift(Builder
, *CI
, false, ZeroMask
);
2221 } else if (IsX86
&& (Name
.startswith("avx512.vpshrd.") ||
2222 Name
.startswith("avx512.mask.vpshrd") ||
2223 Name
.startswith("avx512.maskz.vpshrd"))) {
2224 bool ZeroMask
= Name
[11] == 'z';
2225 Rep
= upgradeX86ConcatShift(Builder
, *CI
, true, ZeroMask
);
2226 } else if (IsX86
&& Name
== "sse42.crc32.64.8") {
2227 Function
*CRC32
= Intrinsic::getDeclaration(F
->getParent(),
2228 Intrinsic::x86_sse42_crc32_32_8
);
2229 Value
*Trunc0
= Builder
.CreateTrunc(CI
->getArgOperand(0), Type::getInt32Ty(C
));
2230 Rep
= Builder
.CreateCall(CRC32
, {Trunc0
, CI
->getArgOperand(1)});
2231 Rep
= Builder
.CreateZExt(Rep
, CI
->getType(), "");
2232 } else if (IsX86
&& (Name
.startswith("avx.vbroadcast.s") ||
2233 Name
.startswith("avx512.vbroadcast.s"))) {
2234 // Replace broadcasts with a series of insertelements.
2235 Type
*VecTy
= CI
->getType();
2236 Type
*EltTy
= VecTy
->getVectorElementType();
2237 unsigned EltNum
= VecTy
->getVectorNumElements();
2238 Value
*Cast
= Builder
.CreateBitCast(CI
->getArgOperand(0),
2239 EltTy
->getPointerTo());
2240 Value
*Load
= Builder
.CreateLoad(EltTy
, Cast
);
2241 Type
*I32Ty
= Type::getInt32Ty(C
);
2242 Rep
= UndefValue::get(VecTy
);
2243 for (unsigned I
= 0; I
< EltNum
; ++I
)
2244 Rep
= Builder
.CreateInsertElement(Rep
, Load
,
2245 ConstantInt::get(I32Ty
, I
));
2246 } else if (IsX86
&& (Name
.startswith("sse41.pmovsx") ||
2247 Name
.startswith("sse41.pmovzx") ||
2248 Name
.startswith("avx2.pmovsx") ||
2249 Name
.startswith("avx2.pmovzx") ||
2250 Name
.startswith("avx512.mask.pmovsx") ||
2251 Name
.startswith("avx512.mask.pmovzx"))) {
2252 VectorType
*SrcTy
= cast
<VectorType
>(CI
->getArgOperand(0)->getType());
2253 VectorType
*DstTy
= cast
<VectorType
>(CI
->getType());
2254 unsigned NumDstElts
= DstTy
->getNumElements();
2256 // Extract a subvector of the first NumDstElts lanes and sign/zero extend.
2257 SmallVector
<uint32_t, 8> ShuffleMask(NumDstElts
);
2258 for (unsigned i
= 0; i
!= NumDstElts
; ++i
)
2261 Value
*SV
= Builder
.CreateShuffleVector(
2262 CI
->getArgOperand(0), UndefValue::get(SrcTy
), ShuffleMask
);
2264 bool DoSext
= (StringRef::npos
!= Name
.find("pmovsx"));
2265 Rep
= DoSext
? Builder
.CreateSExt(SV
, DstTy
)
2266 : Builder
.CreateZExt(SV
, DstTy
);
2267 // If there are 3 arguments, it's a masked intrinsic so we need a select.
2268 if (CI
->getNumArgOperands() == 3)
2269 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(2), Rep
,
2270 CI
->getArgOperand(1));
2271 } else if (Name
== "avx512.mask.pmov.qd.256" ||
2272 Name
== "avx512.mask.pmov.qd.512" ||
2273 Name
== "avx512.mask.pmov.wb.256" ||
2274 Name
== "avx512.mask.pmov.wb.512") {
2275 Type
*Ty
= CI
->getArgOperand(1)->getType();
2276 Rep
= Builder
.CreateTrunc(CI
->getArgOperand(0), Ty
);
2277 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(2), Rep
,
2278 CI
->getArgOperand(1));
2279 } else if (IsX86
&& (Name
.startswith("avx.vbroadcastf128") ||
2280 Name
== "avx2.vbroadcasti128")) {
2281 // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.
2282 Type
*EltTy
= CI
->getType()->getVectorElementType();
2283 unsigned NumSrcElts
= 128 / EltTy
->getPrimitiveSizeInBits();
2284 Type
*VT
= VectorType::get(EltTy
, NumSrcElts
);
2285 Value
*Op
= Builder
.CreatePointerCast(CI
->getArgOperand(0),
2286 PointerType::getUnqual(VT
));
2287 Value
*Load
= Builder
.CreateAlignedLoad(VT
, Op
, 1);
2288 if (NumSrcElts
== 2)
2289 Rep
= Builder
.CreateShuffleVector(Load
, UndefValue::get(Load
->getType()),
2292 Rep
= Builder
.CreateShuffleVector(Load
, UndefValue::get(Load
->getType()),
2293 { 0, 1, 2, 3, 0, 1, 2, 3 });
2294 } else if (IsX86
&& (Name
.startswith("avx512.mask.shuf.i") ||
2295 Name
.startswith("avx512.mask.shuf.f"))) {
2296 unsigned Imm
= cast
<ConstantInt
>(CI
->getArgOperand(2))->getZExtValue();
2297 Type
*VT
= CI
->getType();
2298 unsigned NumLanes
= VT
->getPrimitiveSizeInBits() / 128;
2299 unsigned NumElementsInLane
= 128 / VT
->getScalarSizeInBits();
2300 unsigned ControlBitsMask
= NumLanes
- 1;
2301 unsigned NumControlBits
= NumLanes
/ 2;
2302 SmallVector
<uint32_t, 8> ShuffleMask(0);
2304 for (unsigned l
= 0; l
!= NumLanes
; ++l
) {
2305 unsigned LaneMask
= (Imm
>> (l
* NumControlBits
)) & ControlBitsMask
;
2306 // We actually need the other source.
2307 if (l
>= NumLanes
/ 2)
2308 LaneMask
+= NumLanes
;
2309 for (unsigned i
= 0; i
!= NumElementsInLane
; ++i
)
2310 ShuffleMask
.push_back(LaneMask
* NumElementsInLane
+ i
);
2312 Rep
= Builder
.CreateShuffleVector(CI
->getArgOperand(0),
2313 CI
->getArgOperand(1), ShuffleMask
);
2314 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(4), Rep
,
2315 CI
->getArgOperand(3));
2316 }else if (IsX86
&& (Name
.startswith("avx512.mask.broadcastf") ||
2317 Name
.startswith("avx512.mask.broadcasti"))) {
2318 unsigned NumSrcElts
=
2319 CI
->getArgOperand(0)->getType()->getVectorNumElements();
2320 unsigned NumDstElts
= CI
->getType()->getVectorNumElements();
2322 SmallVector
<uint32_t, 8> ShuffleMask(NumDstElts
);
2323 for (unsigned i
= 0; i
!= NumDstElts
; ++i
)
2324 ShuffleMask
[i
] = i
% NumSrcElts
;
2326 Rep
= Builder
.CreateShuffleVector(CI
->getArgOperand(0),
2327 CI
->getArgOperand(0),
2329 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(2), Rep
,
2330 CI
->getArgOperand(1));
2331 } else if (IsX86
&& (Name
.startswith("avx2.pbroadcast") ||
2332 Name
.startswith("avx2.vbroadcast") ||
2333 Name
.startswith("avx512.pbroadcast") ||
2334 Name
.startswith("avx512.mask.broadcast.s"))) {
2335 // Replace vp?broadcasts with a vector shuffle.
2336 Value
*Op
= CI
->getArgOperand(0);
2337 unsigned NumElts
= CI
->getType()->getVectorNumElements();
2338 Type
*MaskTy
= VectorType::get(Type::getInt32Ty(C
), NumElts
);
2339 Rep
= Builder
.CreateShuffleVector(Op
, UndefValue::get(Op
->getType()),
2340 Constant::getNullValue(MaskTy
));
2342 if (CI
->getNumArgOperands() == 3)
2343 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(2), Rep
,
2344 CI
->getArgOperand(1));
2345 } else if (IsX86
&& (Name
.startswith("sse2.padds.") ||
2346 Name
.startswith("sse2.psubs.") ||
2347 Name
.startswith("avx2.padds.") ||
2348 Name
.startswith("avx2.psubs.") ||
2349 Name
.startswith("avx512.padds.") ||
2350 Name
.startswith("avx512.psubs.") ||
2351 Name
.startswith("avx512.mask.padds.") ||
2352 Name
.startswith("avx512.mask.psubs."))) {
2353 bool IsAdd
= Name
.contains(".padds");
2354 Rep
= UpgradeX86AddSubSatIntrinsics(Builder
, *CI
, true, IsAdd
);
2355 } else if (IsX86
&& (Name
.startswith("sse2.paddus.") ||
2356 Name
.startswith("sse2.psubus.") ||
2357 Name
.startswith("avx2.paddus.") ||
2358 Name
.startswith("avx2.psubus.") ||
2359 Name
.startswith("avx512.mask.paddus.") ||
2360 Name
.startswith("avx512.mask.psubus."))) {
2361 bool IsAdd
= Name
.contains(".paddus");
2362 Rep
= UpgradeX86AddSubSatIntrinsics(Builder
, *CI
, false, IsAdd
);
2363 } else if (IsX86
&& Name
.startswith("avx512.mask.palignr.")) {
2364 Rep
= UpgradeX86ALIGNIntrinsics(Builder
, CI
->getArgOperand(0),
2365 CI
->getArgOperand(1),
2366 CI
->getArgOperand(2),
2367 CI
->getArgOperand(3),
2368 CI
->getArgOperand(4),
2370 } else if (IsX86
&& Name
.startswith("avx512.mask.valign.")) {
2371 Rep
= UpgradeX86ALIGNIntrinsics(Builder
, CI
->getArgOperand(0),
2372 CI
->getArgOperand(1),
2373 CI
->getArgOperand(2),
2374 CI
->getArgOperand(3),
2375 CI
->getArgOperand(4),
2377 } else if (IsX86
&& (Name
== "sse2.psll.dq" ||
2378 Name
== "avx2.psll.dq")) {
2379 // 128/256-bit shift left specified in bits.
2380 unsigned Shift
= cast
<ConstantInt
>(CI
->getArgOperand(1))->getZExtValue();
2381 Rep
= UpgradeX86PSLLDQIntrinsics(Builder
, CI
->getArgOperand(0),
2382 Shift
/ 8); // Shift is in bits.
2383 } else if (IsX86
&& (Name
== "sse2.psrl.dq" ||
2384 Name
== "avx2.psrl.dq")) {
2385 // 128/256-bit shift right specified in bits.
2386 unsigned Shift
= cast
<ConstantInt
>(CI
->getArgOperand(1))->getZExtValue();
2387 Rep
= UpgradeX86PSRLDQIntrinsics(Builder
, CI
->getArgOperand(0),
2388 Shift
/ 8); // Shift is in bits.
2389 } else if (IsX86
&& (Name
== "sse2.psll.dq.bs" ||
2390 Name
== "avx2.psll.dq.bs" ||
2391 Name
== "avx512.psll.dq.512")) {
2392 // 128/256/512-bit shift left specified in bytes.
2393 unsigned Shift
= cast
<ConstantInt
>(CI
->getArgOperand(1))->getZExtValue();
2394 Rep
= UpgradeX86PSLLDQIntrinsics(Builder
, CI
->getArgOperand(0), Shift
);
2395 } else if (IsX86
&& (Name
== "sse2.psrl.dq.bs" ||
2396 Name
== "avx2.psrl.dq.bs" ||
2397 Name
== "avx512.psrl.dq.512")) {
2398 // 128/256/512-bit shift right specified in bytes.
2399 unsigned Shift
= cast
<ConstantInt
>(CI
->getArgOperand(1))->getZExtValue();
2400 Rep
= UpgradeX86PSRLDQIntrinsics(Builder
, CI
->getArgOperand(0), Shift
);
2401 } else if (IsX86
&& (Name
== "sse41.pblendw" ||
2402 Name
.startswith("sse41.blendp") ||
2403 Name
.startswith("avx.blend.p") ||
2404 Name
== "avx2.pblendw" ||
2405 Name
.startswith("avx2.pblendd."))) {
2406 Value
*Op0
= CI
->getArgOperand(0);
2407 Value
*Op1
= CI
->getArgOperand(1);
2408 unsigned Imm
= cast
<ConstantInt
>(CI
->getArgOperand(2))->getZExtValue();
2409 VectorType
*VecTy
= cast
<VectorType
>(CI
->getType());
2410 unsigned NumElts
= VecTy
->getNumElements();
2412 SmallVector
<uint32_t, 16> Idxs(NumElts
);
2413 for (unsigned i
= 0; i
!= NumElts
; ++i
)
2414 Idxs
[i
] = ((Imm
>> (i
%8)) & 1) ? i
+ NumElts
: i
;
2416 Rep
= Builder
.CreateShuffleVector(Op0
, Op1
, Idxs
);
2417 } else if (IsX86
&& (Name
.startswith("avx.vinsertf128.") ||
2418 Name
== "avx2.vinserti128" ||
2419 Name
.startswith("avx512.mask.insert"))) {
2420 Value
*Op0
= CI
->getArgOperand(0);
2421 Value
*Op1
= CI
->getArgOperand(1);
2422 unsigned Imm
= cast
<ConstantInt
>(CI
->getArgOperand(2))->getZExtValue();
2423 unsigned DstNumElts
= CI
->getType()->getVectorNumElements();
2424 unsigned SrcNumElts
= Op1
->getType()->getVectorNumElements();
2425 unsigned Scale
= DstNumElts
/ SrcNumElts
;
2427 // Mask off the high bits of the immediate value; hardware ignores those.
2430 // Extend the second operand into a vector the size of the destination.
2431 Value
*UndefV
= UndefValue::get(Op1
->getType());
2432 SmallVector
<uint32_t, 8> Idxs(DstNumElts
);
2433 for (unsigned i
= 0; i
!= SrcNumElts
; ++i
)
2435 for (unsigned i
= SrcNumElts
; i
!= DstNumElts
; ++i
)
2436 Idxs
[i
] = SrcNumElts
;
2437 Rep
= Builder
.CreateShuffleVector(Op1
, UndefV
, Idxs
);
2439 // Insert the second operand into the first operand.
2441 // Note that there is no guarantee that instruction lowering will actually
2442 // produce a vinsertf128 instruction for the created shuffles. In
2443 // particular, the 0 immediate case involves no lane changes, so it can
2444 // be handled as a blend.
2446 // Example of shuffle mask for 32-bit elements:
2447 // Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
2448 // Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 >
2450 // First fill with identify mask.
2451 for (unsigned i
= 0; i
!= DstNumElts
; ++i
)
2453 // Then replace the elements where we need to insert.
2454 for (unsigned i
= 0; i
!= SrcNumElts
; ++i
)
2455 Idxs
[i
+ Imm
* SrcNumElts
] = i
+ DstNumElts
;
2456 Rep
= Builder
.CreateShuffleVector(Op0
, Rep
, Idxs
);
2458 // If the intrinsic has a mask operand, handle that.
2459 if (CI
->getNumArgOperands() == 5)
2460 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(4), Rep
,
2461 CI
->getArgOperand(3));
2462 } else if (IsX86
&& (Name
.startswith("avx.vextractf128.") ||
2463 Name
== "avx2.vextracti128" ||
2464 Name
.startswith("avx512.mask.vextract"))) {
2465 Value
*Op0
= CI
->getArgOperand(0);
2466 unsigned Imm
= cast
<ConstantInt
>(CI
->getArgOperand(1))->getZExtValue();
2467 unsigned DstNumElts
= CI
->getType()->getVectorNumElements();
2468 unsigned SrcNumElts
= Op0
->getType()->getVectorNumElements();
2469 unsigned Scale
= SrcNumElts
/ DstNumElts
;
2471 // Mask off the high bits of the immediate value; hardware ignores those.
2474 // Get indexes for the subvector of the input vector.
2475 SmallVector
<uint32_t, 8> Idxs(DstNumElts
);
2476 for (unsigned i
= 0; i
!= DstNumElts
; ++i
) {
2477 Idxs
[i
] = i
+ (Imm
* DstNumElts
);
2479 Rep
= Builder
.CreateShuffleVector(Op0
, Op0
, Idxs
);
2481 // If the intrinsic has a mask operand, handle that.
2482 if (CI
->getNumArgOperands() == 4)
2483 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
2484 CI
->getArgOperand(2));
2485 } else if (!IsX86
&& Name
== "stackprotectorcheck") {
2487 } else if (IsX86
&& (Name
.startswith("avx512.mask.perm.df.") ||
2488 Name
.startswith("avx512.mask.perm.di."))) {
2489 Value
*Op0
= CI
->getArgOperand(0);
2490 unsigned Imm
= cast
<ConstantInt
>(CI
->getArgOperand(1))->getZExtValue();
2491 VectorType
*VecTy
= cast
<VectorType
>(CI
->getType());
2492 unsigned NumElts
= VecTy
->getNumElements();
2494 SmallVector
<uint32_t, 8> Idxs(NumElts
);
2495 for (unsigned i
= 0; i
!= NumElts
; ++i
)
2496 Idxs
[i
] = (i
& ~0x3) + ((Imm
>> (2 * (i
& 0x3))) & 3);
2498 Rep
= Builder
.CreateShuffleVector(Op0
, Op0
, Idxs
);
2500 if (CI
->getNumArgOperands() == 4)
2501 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
2502 CI
->getArgOperand(2));
2503 } else if (IsX86
&& (Name
.startswith("avx.vperm2f128.") ||
2504 Name
== "avx2.vperm2i128")) {
2505 // The immediate permute control byte looks like this:
2506 // [1:0] - select 128 bits from sources for low half of destination
2508 // [3] - zero low half of destination
2509 // [5:4] - select 128 bits from sources for high half of destination
2511 // [7] - zero high half of destination
2513 uint8_t Imm
= cast
<ConstantInt
>(CI
->getArgOperand(2))->getZExtValue();
2515 unsigned NumElts
= CI
->getType()->getVectorNumElements();
2516 unsigned HalfSize
= NumElts
/ 2;
2517 SmallVector
<uint32_t, 8> ShuffleMask(NumElts
);
2519 // Determine which operand(s) are actually in use for this instruction.
2520 Value
*V0
= (Imm
& 0x02) ? CI
->getArgOperand(1) : CI
->getArgOperand(0);
2521 Value
*V1
= (Imm
& 0x20) ? CI
->getArgOperand(1) : CI
->getArgOperand(0);
2523 // If needed, replace operands based on zero mask.
2524 V0
= (Imm
& 0x08) ? ConstantAggregateZero::get(CI
->getType()) : V0
;
2525 V1
= (Imm
& 0x80) ? ConstantAggregateZero::get(CI
->getType()) : V1
;
2527 // Permute low half of result.
2528 unsigned StartIndex
= (Imm
& 0x01) ? HalfSize
: 0;
2529 for (unsigned i
= 0; i
< HalfSize
; ++i
)
2530 ShuffleMask
[i
] = StartIndex
+ i
;
2532 // Permute high half of result.
2533 StartIndex
= (Imm
& 0x10) ? HalfSize
: 0;
2534 for (unsigned i
= 0; i
< HalfSize
; ++i
)
2535 ShuffleMask
[i
+ HalfSize
] = NumElts
+ StartIndex
+ i
;
2537 Rep
= Builder
.CreateShuffleVector(V0
, V1
, ShuffleMask
);
2539 } else if (IsX86
&& (Name
.startswith("avx.vpermil.") ||
2540 Name
== "sse2.pshuf.d" ||
2541 Name
.startswith("avx512.mask.vpermil.p") ||
2542 Name
.startswith("avx512.mask.pshuf.d."))) {
2543 Value
*Op0
= CI
->getArgOperand(0);
2544 unsigned Imm
= cast
<ConstantInt
>(CI
->getArgOperand(1))->getZExtValue();
2545 VectorType
*VecTy
= cast
<VectorType
>(CI
->getType());
2546 unsigned NumElts
= VecTy
->getNumElements();
2547 // Calculate the size of each index in the immediate.
2548 unsigned IdxSize
= 64 / VecTy
->getScalarSizeInBits();
2549 unsigned IdxMask
= ((1 << IdxSize
) - 1);
2551 SmallVector
<uint32_t, 8> Idxs(NumElts
);
2552 // Lookup the bits for this element, wrapping around the immediate every
2553 // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need
2554 // to offset by the first index of each group.
2555 for (unsigned i
= 0; i
!= NumElts
; ++i
)
2556 Idxs
[i
] = ((Imm
>> ((i
* IdxSize
) % 8)) & IdxMask
) | (i
& ~IdxMask
);
2558 Rep
= Builder
.CreateShuffleVector(Op0
, Op0
, Idxs
);
2560 if (CI
->getNumArgOperands() == 4)
2561 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
2562 CI
->getArgOperand(2));
2563 } else if (IsX86
&& (Name
== "sse2.pshufl.w" ||
2564 Name
.startswith("avx512.mask.pshufl.w."))) {
2565 Value
*Op0
= CI
->getArgOperand(0);
2566 unsigned Imm
= cast
<ConstantInt
>(CI
->getArgOperand(1))->getZExtValue();
2567 unsigned NumElts
= CI
->getType()->getVectorNumElements();
2569 SmallVector
<uint32_t, 16> Idxs(NumElts
);
2570 for (unsigned l
= 0; l
!= NumElts
; l
+= 8) {
2571 for (unsigned i
= 0; i
!= 4; ++i
)
2572 Idxs
[i
+ l
] = ((Imm
>> (2 * i
)) & 0x3) + l
;
2573 for (unsigned i
= 4; i
!= 8; ++i
)
2574 Idxs
[i
+ l
] = i
+ l
;
2577 Rep
= Builder
.CreateShuffleVector(Op0
, Op0
, Idxs
);
2579 if (CI
->getNumArgOperands() == 4)
2580 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
2581 CI
->getArgOperand(2));
2582 } else if (IsX86
&& (Name
== "sse2.pshufh.w" ||
2583 Name
.startswith("avx512.mask.pshufh.w."))) {
2584 Value
*Op0
= CI
->getArgOperand(0);
2585 unsigned Imm
= cast
<ConstantInt
>(CI
->getArgOperand(1))->getZExtValue();
2586 unsigned NumElts
= CI
->getType()->getVectorNumElements();
2588 SmallVector
<uint32_t, 16> Idxs(NumElts
);
2589 for (unsigned l
= 0; l
!= NumElts
; l
+= 8) {
2590 for (unsigned i
= 0; i
!= 4; ++i
)
2591 Idxs
[i
+ l
] = i
+ l
;
2592 for (unsigned i
= 0; i
!= 4; ++i
)
2593 Idxs
[i
+ l
+ 4] = ((Imm
>> (2 * i
)) & 0x3) + 4 + l
;
2596 Rep
= Builder
.CreateShuffleVector(Op0
, Op0
, Idxs
);
2598 if (CI
->getNumArgOperands() == 4)
2599 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
2600 CI
->getArgOperand(2));
2601 } else if (IsX86
&& Name
.startswith("avx512.mask.shuf.p")) {
2602 Value
*Op0
= CI
->getArgOperand(0);
2603 Value
*Op1
= CI
->getArgOperand(1);
2604 unsigned Imm
= cast
<ConstantInt
>(CI
->getArgOperand(2))->getZExtValue();
2605 unsigned NumElts
= CI
->getType()->getVectorNumElements();
2607 unsigned NumLaneElts
= 128/CI
->getType()->getScalarSizeInBits();
2608 unsigned HalfLaneElts
= NumLaneElts
/ 2;
2610 SmallVector
<uint32_t, 16> Idxs(NumElts
);
2611 for (unsigned i
= 0; i
!= NumElts
; ++i
) {
2612 // Base index is the starting element of the lane.
2613 Idxs
[i
] = i
- (i
% NumLaneElts
);
2614 // If we are half way through the lane switch to the other source.
2615 if ((i
% NumLaneElts
) >= HalfLaneElts
)
2617 // Now select the specific element. By adding HalfLaneElts bits from
2618 // the immediate. Wrapping around the immediate every 8-bits.
2619 Idxs
[i
] += (Imm
>> ((i
* HalfLaneElts
) % 8)) & ((1 << HalfLaneElts
) - 1);
2622 Rep
= Builder
.CreateShuffleVector(Op0
, Op1
, Idxs
);
2624 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(4), Rep
,
2625 CI
->getArgOperand(3));
2626 } else if (IsX86
&& (Name
.startswith("avx512.mask.movddup") ||
2627 Name
.startswith("avx512.mask.movshdup") ||
2628 Name
.startswith("avx512.mask.movsldup"))) {
2629 Value
*Op0
= CI
->getArgOperand(0);
2630 unsigned NumElts
= CI
->getType()->getVectorNumElements();
2631 unsigned NumLaneElts
= 128/CI
->getType()->getScalarSizeInBits();
2633 unsigned Offset
= 0;
2634 if (Name
.startswith("avx512.mask.movshdup."))
2637 SmallVector
<uint32_t, 16> Idxs(NumElts
);
2638 for (unsigned l
= 0; l
!= NumElts
; l
+= NumLaneElts
)
2639 for (unsigned i
= 0; i
!= NumLaneElts
; i
+= 2) {
2640 Idxs
[i
+ l
+ 0] = i
+ l
+ Offset
;
2641 Idxs
[i
+ l
+ 1] = i
+ l
+ Offset
;
2644 Rep
= Builder
.CreateShuffleVector(Op0
, Op0
, Idxs
);
2646 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(2), Rep
,
2647 CI
->getArgOperand(1));
2648 } else if (IsX86
&& (Name
.startswith("avx512.mask.punpckl") ||
2649 Name
.startswith("avx512.mask.unpckl."))) {
2650 Value
*Op0
= CI
->getArgOperand(0);
2651 Value
*Op1
= CI
->getArgOperand(1);
2652 int NumElts
= CI
->getType()->getVectorNumElements();
2653 int NumLaneElts
= 128/CI
->getType()->getScalarSizeInBits();
2655 SmallVector
<uint32_t, 64> Idxs(NumElts
);
2656 for (int l
= 0; l
!= NumElts
; l
+= NumLaneElts
)
2657 for (int i
= 0; i
!= NumLaneElts
; ++i
)
2658 Idxs
[i
+ l
] = l
+ (i
/ 2) + NumElts
* (i
% 2);
2660 Rep
= Builder
.CreateShuffleVector(Op0
, Op1
, Idxs
);
2662 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
2663 CI
->getArgOperand(2));
2664 } else if (IsX86
&& (Name
.startswith("avx512.mask.punpckh") ||
2665 Name
.startswith("avx512.mask.unpckh."))) {
2666 Value
*Op0
= CI
->getArgOperand(0);
2667 Value
*Op1
= CI
->getArgOperand(1);
2668 int NumElts
= CI
->getType()->getVectorNumElements();
2669 int NumLaneElts
= 128/CI
->getType()->getScalarSizeInBits();
2671 SmallVector
<uint32_t, 64> Idxs(NumElts
);
2672 for (int l
= 0; l
!= NumElts
; l
+= NumLaneElts
)
2673 for (int i
= 0; i
!= NumLaneElts
; ++i
)
2674 Idxs
[i
+ l
] = (NumLaneElts
/ 2) + l
+ (i
/ 2) + NumElts
* (i
% 2);
2676 Rep
= Builder
.CreateShuffleVector(Op0
, Op1
, Idxs
);
2678 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
2679 CI
->getArgOperand(2));
2680 } else if (IsX86
&& (Name
.startswith("avx512.mask.and.") ||
2681 Name
.startswith("avx512.mask.pand."))) {
2682 VectorType
*FTy
= cast
<VectorType
>(CI
->getType());
2683 VectorType
*ITy
= VectorType::getInteger(FTy
);
2684 Rep
= Builder
.CreateAnd(Builder
.CreateBitCast(CI
->getArgOperand(0), ITy
),
2685 Builder
.CreateBitCast(CI
->getArgOperand(1), ITy
));
2686 Rep
= Builder
.CreateBitCast(Rep
, FTy
);
2687 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
2688 CI
->getArgOperand(2));
2689 } else if (IsX86
&& (Name
.startswith("avx512.mask.andn.") ||
2690 Name
.startswith("avx512.mask.pandn."))) {
2691 VectorType
*FTy
= cast
<VectorType
>(CI
->getType());
2692 VectorType
*ITy
= VectorType::getInteger(FTy
);
2693 Rep
= Builder
.CreateNot(Builder
.CreateBitCast(CI
->getArgOperand(0), ITy
));
2694 Rep
= Builder
.CreateAnd(Rep
,
2695 Builder
.CreateBitCast(CI
->getArgOperand(1), ITy
));
2696 Rep
= Builder
.CreateBitCast(Rep
, FTy
);
2697 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
2698 CI
->getArgOperand(2));
2699 } else if (IsX86
&& (Name
.startswith("avx512.mask.or.") ||
2700 Name
.startswith("avx512.mask.por."))) {
2701 VectorType
*FTy
= cast
<VectorType
>(CI
->getType());
2702 VectorType
*ITy
= VectorType::getInteger(FTy
);
2703 Rep
= Builder
.CreateOr(Builder
.CreateBitCast(CI
->getArgOperand(0), ITy
),
2704 Builder
.CreateBitCast(CI
->getArgOperand(1), ITy
));
2705 Rep
= Builder
.CreateBitCast(Rep
, FTy
);
2706 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
2707 CI
->getArgOperand(2));
2708 } else if (IsX86
&& (Name
.startswith("avx512.mask.xor.") ||
2709 Name
.startswith("avx512.mask.pxor."))) {
2710 VectorType
*FTy
= cast
<VectorType
>(CI
->getType());
2711 VectorType
*ITy
= VectorType::getInteger(FTy
);
2712 Rep
= Builder
.CreateXor(Builder
.CreateBitCast(CI
->getArgOperand(0), ITy
),
2713 Builder
.CreateBitCast(CI
->getArgOperand(1), ITy
));
2714 Rep
= Builder
.CreateBitCast(Rep
, FTy
);
2715 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
2716 CI
->getArgOperand(2));
2717 } else if (IsX86
&& Name
.startswith("avx512.mask.padd.")) {
2718 Rep
= Builder
.CreateAdd(CI
->getArgOperand(0), CI
->getArgOperand(1));
2719 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
2720 CI
->getArgOperand(2));
2721 } else if (IsX86
&& Name
.startswith("avx512.mask.psub.")) {
2722 Rep
= Builder
.CreateSub(CI
->getArgOperand(0), CI
->getArgOperand(1));
2723 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
2724 CI
->getArgOperand(2));
2725 } else if (IsX86
&& Name
.startswith("avx512.mask.pmull.")) {
2726 Rep
= Builder
.CreateMul(CI
->getArgOperand(0), CI
->getArgOperand(1));
2727 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
2728 CI
->getArgOperand(2));
2729 } else if (IsX86
&& Name
.startswith("avx512.mask.add.p")) {
2730 if (Name
.endswith(".512")) {
2732 if (Name
[17] == 's')
2733 IID
= Intrinsic::x86_avx512_add_ps_512
;
2735 IID
= Intrinsic::x86_avx512_add_pd_512
;
2737 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(F
->getParent(), IID
),
2738 { CI
->getArgOperand(0), CI
->getArgOperand(1),
2739 CI
->getArgOperand(4) });
2741 Rep
= Builder
.CreateFAdd(CI
->getArgOperand(0), CI
->getArgOperand(1));
2743 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
2744 CI
->getArgOperand(2));
2745 } else if (IsX86
&& Name
.startswith("avx512.mask.div.p")) {
2746 if (Name
.endswith(".512")) {
2748 if (Name
[17] == 's')
2749 IID
= Intrinsic::x86_avx512_div_ps_512
;
2751 IID
= Intrinsic::x86_avx512_div_pd_512
;
2753 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(F
->getParent(), IID
),
2754 { CI
->getArgOperand(0), CI
->getArgOperand(1),
2755 CI
->getArgOperand(4) });
2757 Rep
= Builder
.CreateFDiv(CI
->getArgOperand(0), CI
->getArgOperand(1));
2759 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
2760 CI
->getArgOperand(2));
2761 } else if (IsX86
&& Name
.startswith("avx512.mask.mul.p")) {
2762 if (Name
.endswith(".512")) {
2764 if (Name
[17] == 's')
2765 IID
= Intrinsic::x86_avx512_mul_ps_512
;
2767 IID
= Intrinsic::x86_avx512_mul_pd_512
;
2769 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(F
->getParent(), IID
),
2770 { CI
->getArgOperand(0), CI
->getArgOperand(1),
2771 CI
->getArgOperand(4) });
2773 Rep
= Builder
.CreateFMul(CI
->getArgOperand(0), CI
->getArgOperand(1));
2775 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
2776 CI
->getArgOperand(2));
2777 } else if (IsX86
&& Name
.startswith("avx512.mask.sub.p")) {
2778 if (Name
.endswith(".512")) {
2780 if (Name
[17] == 's')
2781 IID
= Intrinsic::x86_avx512_sub_ps_512
;
2783 IID
= Intrinsic::x86_avx512_sub_pd_512
;
2785 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(F
->getParent(), IID
),
2786 { CI
->getArgOperand(0), CI
->getArgOperand(1),
2787 CI
->getArgOperand(4) });
2789 Rep
= Builder
.CreateFSub(CI
->getArgOperand(0), CI
->getArgOperand(1));
2791 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
2792 CI
->getArgOperand(2));
2793 } else if (IsX86
&& (Name
.startswith("avx512.mask.max.p") ||
2794 Name
.startswith("avx512.mask.min.p")) &&
2795 Name
.drop_front(18) == ".512") {
2796 bool IsDouble
= Name
[17] == 'd';
2797 bool IsMin
= Name
[13] == 'i';
2798 static const Intrinsic::ID MinMaxTbl
[2][2] = {
2799 { Intrinsic::x86_avx512_max_ps_512
, Intrinsic::x86_avx512_max_pd_512
},
2800 { Intrinsic::x86_avx512_min_ps_512
, Intrinsic::x86_avx512_min_pd_512
}
2802 Intrinsic::ID IID
= MinMaxTbl
[IsMin
][IsDouble
];
2804 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(F
->getParent(), IID
),
2805 { CI
->getArgOperand(0), CI
->getArgOperand(1),
2806 CI
->getArgOperand(4) });
2807 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
,
2808 CI
->getArgOperand(2));
2809 } else if (IsX86
&& Name
.startswith("avx512.mask.lzcnt.")) {
2810 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(F
->getParent(),
2813 { CI
->getArgOperand(0), Builder
.getInt1(false) });
2814 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(2), Rep
,
2815 CI
->getArgOperand(1));
2816 } else if (IsX86
&& Name
.startswith("avx512.mask.psll")) {
2817 bool IsImmediate
= Name
[16] == 'i' ||
2818 (Name
.size() > 18 && Name
[18] == 'i');
2819 bool IsVariable
= Name
[16] == 'v';
2820 char Size
= Name
[16] == '.' ? Name
[17] :
2821 Name
[17] == '.' ? Name
[18] :
2822 Name
[18] == '.' ? Name
[19] :
2826 if (IsVariable
&& Name
[17] != '.') {
2827 if (Size
== 'd' && Name
[17] == '2') // avx512.mask.psllv2.di
2828 IID
= Intrinsic::x86_avx2_psllv_q
;
2829 else if (Size
== 'd' && Name
[17] == '4') // avx512.mask.psllv4.di
2830 IID
= Intrinsic::x86_avx2_psllv_q_256
;
2831 else if (Size
== 's' && Name
[17] == '4') // avx512.mask.psllv4.si
2832 IID
= Intrinsic::x86_avx2_psllv_d
;
2833 else if (Size
== 's' && Name
[17] == '8') // avx512.mask.psllv8.si
2834 IID
= Intrinsic::x86_avx2_psllv_d_256
;
2835 else if (Size
== 'h' && Name
[17] == '8') // avx512.mask.psllv8.hi
2836 IID
= Intrinsic::x86_avx512_psllv_w_128
;
2837 else if (Size
== 'h' && Name
[17] == '1') // avx512.mask.psllv16.hi
2838 IID
= Intrinsic::x86_avx512_psllv_w_256
;
2839 else if (Name
[17] == '3' && Name
[18] == '2') // avx512.mask.psllv32hi
2840 IID
= Intrinsic::x86_avx512_psllv_w_512
;
2842 llvm_unreachable("Unexpected size");
2843 } else if (Name
.endswith(".128")) {
2844 if (Size
== 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128
2845 IID
= IsImmediate
? Intrinsic::x86_sse2_pslli_d
2846 : Intrinsic::x86_sse2_psll_d
;
2847 else if (Size
== 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.128
2848 IID
= IsImmediate
? Intrinsic::x86_sse2_pslli_q
2849 : Intrinsic::x86_sse2_psll_q
;
2850 else if (Size
== 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.128
2851 IID
= IsImmediate
? Intrinsic::x86_sse2_pslli_w
2852 : Intrinsic::x86_sse2_psll_w
;
2854 llvm_unreachable("Unexpected size");
2855 } else if (Name
.endswith(".256")) {
2856 if (Size
== 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256
2857 IID
= IsImmediate
? Intrinsic::x86_avx2_pslli_d
2858 : Intrinsic::x86_avx2_psll_d
;
2859 else if (Size
== 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.256
2860 IID
= IsImmediate
? Intrinsic::x86_avx2_pslli_q
2861 : Intrinsic::x86_avx2_psll_q
;
2862 else if (Size
== 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.256
2863 IID
= IsImmediate
? Intrinsic::x86_avx2_pslli_w
2864 : Intrinsic::x86_avx2_psll_w
;
2866 llvm_unreachable("Unexpected size");
2868 if (Size
== 'd') // psll.di.512, pslli.d, psll.d, psllv.d.512
2869 IID
= IsImmediate
? Intrinsic::x86_avx512_pslli_d_512
:
2870 IsVariable
? Intrinsic::x86_avx512_psllv_d_512
:
2871 Intrinsic::x86_avx512_psll_d_512
;
2872 else if (Size
== 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.512
2873 IID
= IsImmediate
? Intrinsic::x86_avx512_pslli_q_512
:
2874 IsVariable
? Intrinsic::x86_avx512_psllv_q_512
:
2875 Intrinsic::x86_avx512_psll_q_512
;
2876 else if (Size
== 'w') // psll.wi.512, pslli.w, psll.w
2877 IID
= IsImmediate
? Intrinsic::x86_avx512_pslli_w_512
2878 : Intrinsic::x86_avx512_psll_w_512
;
2880 llvm_unreachable("Unexpected size");
2883 Rep
= UpgradeX86MaskedShift(Builder
, *CI
, IID
);
2884 } else if (IsX86
&& Name
.startswith("avx512.mask.psrl")) {
2885 bool IsImmediate
= Name
[16] == 'i' ||
2886 (Name
.size() > 18 && Name
[18] == 'i');
2887 bool IsVariable
= Name
[16] == 'v';
2888 char Size
= Name
[16] == '.' ? Name
[17] :
2889 Name
[17] == '.' ? Name
[18] :
2890 Name
[18] == '.' ? Name
[19] :
2894 if (IsVariable
&& Name
[17] != '.') {
2895 if (Size
== 'd' && Name
[17] == '2') // avx512.mask.psrlv2.di
2896 IID
= Intrinsic::x86_avx2_psrlv_q
;
2897 else if (Size
== 'd' && Name
[17] == '4') // avx512.mask.psrlv4.di
2898 IID
= Intrinsic::x86_avx2_psrlv_q_256
;
2899 else if (Size
== 's' && Name
[17] == '4') // avx512.mask.psrlv4.si
2900 IID
= Intrinsic::x86_avx2_psrlv_d
;
2901 else if (Size
== 's' && Name
[17] == '8') // avx512.mask.psrlv8.si
2902 IID
= Intrinsic::x86_avx2_psrlv_d_256
;
2903 else if (Size
== 'h' && Name
[17] == '8') // avx512.mask.psrlv8.hi
2904 IID
= Intrinsic::x86_avx512_psrlv_w_128
;
2905 else if (Size
== 'h' && Name
[17] == '1') // avx512.mask.psrlv16.hi
2906 IID
= Intrinsic::x86_avx512_psrlv_w_256
;
2907 else if (Name
[17] == '3' && Name
[18] == '2') // avx512.mask.psrlv32hi
2908 IID
= Intrinsic::x86_avx512_psrlv_w_512
;
2910 llvm_unreachable("Unexpected size");
2911 } else if (Name
.endswith(".128")) {
2912 if (Size
== 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128
2913 IID
= IsImmediate
? Intrinsic::x86_sse2_psrli_d
2914 : Intrinsic::x86_sse2_psrl_d
;
2915 else if (Size
== 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.128
2916 IID
= IsImmediate
? Intrinsic::x86_sse2_psrli_q
2917 : Intrinsic::x86_sse2_psrl_q
;
2918 else if (Size
== 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.128
2919 IID
= IsImmediate
? Intrinsic::x86_sse2_psrli_w
2920 : Intrinsic::x86_sse2_psrl_w
;
2922 llvm_unreachable("Unexpected size");
2923 } else if (Name
.endswith(".256")) {
2924 if (Size
== 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256
2925 IID
= IsImmediate
? Intrinsic::x86_avx2_psrli_d
2926 : Intrinsic::x86_avx2_psrl_d
;
2927 else if (Size
== 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.256
2928 IID
= IsImmediate
? Intrinsic::x86_avx2_psrli_q
2929 : Intrinsic::x86_avx2_psrl_q
;
2930 else if (Size
== 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.256
2931 IID
= IsImmediate
? Intrinsic::x86_avx2_psrli_w
2932 : Intrinsic::x86_avx2_psrl_w
;
2934 llvm_unreachable("Unexpected size");
2936 if (Size
== 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.512
2937 IID
= IsImmediate
? Intrinsic::x86_avx512_psrli_d_512
:
2938 IsVariable
? Intrinsic::x86_avx512_psrlv_d_512
:
2939 Intrinsic::x86_avx512_psrl_d_512
;
2940 else if (Size
== 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.512
2941 IID
= IsImmediate
? Intrinsic::x86_avx512_psrli_q_512
:
2942 IsVariable
? Intrinsic::x86_avx512_psrlv_q_512
:
2943 Intrinsic::x86_avx512_psrl_q_512
;
2944 else if (Size
== 'w') // psrl.wi.512, psrli.w, psrl.w)
2945 IID
= IsImmediate
? Intrinsic::x86_avx512_psrli_w_512
2946 : Intrinsic::x86_avx512_psrl_w_512
;
2948 llvm_unreachable("Unexpected size");
2951 Rep
= UpgradeX86MaskedShift(Builder
, *CI
, IID
);
2952 } else if (IsX86
&& Name
.startswith("avx512.mask.psra")) {
2953 bool IsImmediate
= Name
[16] == 'i' ||
2954 (Name
.size() > 18 && Name
[18] == 'i');
2955 bool IsVariable
= Name
[16] == 'v';
2956 char Size
= Name
[16] == '.' ? Name
[17] :
2957 Name
[17] == '.' ? Name
[18] :
2958 Name
[18] == '.' ? Name
[19] :
2962 if (IsVariable
&& Name
[17] != '.') {
2963 if (Size
== 's' && Name
[17] == '4') // avx512.mask.psrav4.si
2964 IID
= Intrinsic::x86_avx2_psrav_d
;
2965 else if (Size
== 's' && Name
[17] == '8') // avx512.mask.psrav8.si
2966 IID
= Intrinsic::x86_avx2_psrav_d_256
;
2967 else if (Size
== 'h' && Name
[17] == '8') // avx512.mask.psrav8.hi
2968 IID
= Intrinsic::x86_avx512_psrav_w_128
;
2969 else if (Size
== 'h' && Name
[17] == '1') // avx512.mask.psrav16.hi
2970 IID
= Intrinsic::x86_avx512_psrav_w_256
;
2971 else if (Name
[17] == '3' && Name
[18] == '2') // avx512.mask.psrav32hi
2972 IID
= Intrinsic::x86_avx512_psrav_w_512
;
2974 llvm_unreachable("Unexpected size");
2975 } else if (Name
.endswith(".128")) {
2976 if (Size
== 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128
2977 IID
= IsImmediate
? Intrinsic::x86_sse2_psrai_d
2978 : Intrinsic::x86_sse2_psra_d
;
2979 else if (Size
== 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.128
2980 IID
= IsImmediate
? Intrinsic::x86_avx512_psrai_q_128
:
2981 IsVariable
? Intrinsic::x86_avx512_psrav_q_128
:
2982 Intrinsic::x86_avx512_psra_q_128
;
2983 else if (Size
== 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.128
2984 IID
= IsImmediate
? Intrinsic::x86_sse2_psrai_w
2985 : Intrinsic::x86_sse2_psra_w
;
2987 llvm_unreachable("Unexpected size");
2988 } else if (Name
.endswith(".256")) {
2989 if (Size
== 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256
2990 IID
= IsImmediate
? Intrinsic::x86_avx2_psrai_d
2991 : Intrinsic::x86_avx2_psra_d
;
2992 else if (Size
== 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.256
2993 IID
= IsImmediate
? Intrinsic::x86_avx512_psrai_q_256
:
2994 IsVariable
? Intrinsic::x86_avx512_psrav_q_256
:
2995 Intrinsic::x86_avx512_psra_q_256
;
2996 else if (Size
== 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.256
2997 IID
= IsImmediate
? Intrinsic::x86_avx2_psrai_w
2998 : Intrinsic::x86_avx2_psra_w
;
3000 llvm_unreachable("Unexpected size");
3002 if (Size
== 'd') // psra.di.512, psrai.d, psra.d, psrav.d.512
3003 IID
= IsImmediate
? Intrinsic::x86_avx512_psrai_d_512
:
3004 IsVariable
? Intrinsic::x86_avx512_psrav_d_512
:
3005 Intrinsic::x86_avx512_psra_d_512
;
3006 else if (Size
== 'q') // psra.qi.512, psrai.q, psra.q
3007 IID
= IsImmediate
? Intrinsic::x86_avx512_psrai_q_512
:
3008 IsVariable
? Intrinsic::x86_avx512_psrav_q_512
:
3009 Intrinsic::x86_avx512_psra_q_512
;
3010 else if (Size
== 'w') // psra.wi.512, psrai.w, psra.w
3011 IID
= IsImmediate
? Intrinsic::x86_avx512_psrai_w_512
3012 : Intrinsic::x86_avx512_psra_w_512
;
3014 llvm_unreachable("Unexpected size");
3017 Rep
= UpgradeX86MaskedShift(Builder
, *CI
, IID
);
3018 } else if (IsX86
&& Name
.startswith("avx512.mask.move.s")) {
3019 Rep
= upgradeMaskedMove(Builder
, *CI
);
3020 } else if (IsX86
&& Name
.startswith("avx512.cvtmask2")) {
3021 Rep
= UpgradeMaskToInt(Builder
, *CI
);
3022 } else if (IsX86
&& Name
.endswith(".movntdqa")) {
3023 Module
*M
= F
->getParent();
3024 MDNode
*Node
= MDNode::get(
3025 C
, ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C
), 1)));
3027 Value
*Ptr
= CI
->getArgOperand(0);
3028 VectorType
*VTy
= cast
<VectorType
>(CI
->getType());
3030 // Convert the type of the pointer to a pointer to the stored type.
3032 Builder
.CreateBitCast(Ptr
, PointerType::getUnqual(VTy
), "cast");
3033 LoadInst
*LI
= Builder
.CreateAlignedLoad(VTy
, BC
, VTy
->getBitWidth() / 8);
3034 LI
->setMetadata(M
->getMDKindID("nontemporal"), Node
);
3036 } else if (IsX86
&& (Name
.startswith("fma.vfmadd.") ||
3037 Name
.startswith("fma.vfmsub.") ||
3038 Name
.startswith("fma.vfnmadd.") ||
3039 Name
.startswith("fma.vfnmsub."))) {
3040 bool NegMul
= Name
[6] == 'n';
3041 bool NegAcc
= NegMul
? Name
[8] == 's' : Name
[7] == 's';
3042 bool IsScalar
= NegMul
? Name
[12] == 's' : Name
[11] == 's';
3044 Value
*Ops
[] = { CI
->getArgOperand(0), CI
->getArgOperand(1),
3045 CI
->getArgOperand(2) };
3048 Ops
[0] = Builder
.CreateExtractElement(Ops
[0], (uint64_t)0);
3049 Ops
[1] = Builder
.CreateExtractElement(Ops
[1], (uint64_t)0);
3050 Ops
[2] = Builder
.CreateExtractElement(Ops
[2], (uint64_t)0);
3053 if (NegMul
&& !IsScalar
)
3054 Ops
[0] = Builder
.CreateFNeg(Ops
[0]);
3055 if (NegMul
&& IsScalar
)
3056 Ops
[1] = Builder
.CreateFNeg(Ops
[1]);
3058 Ops
[2] = Builder
.CreateFNeg(Ops
[2]);
3060 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(CI
->getModule(),
3066 Rep
= Builder
.CreateInsertElement(CI
->getArgOperand(0), Rep
,
3068 } else if (IsX86
&& Name
.startswith("fma4.vfmadd.s")) {
3069 Value
*Ops
[] = { CI
->getArgOperand(0), CI
->getArgOperand(1),
3070 CI
->getArgOperand(2) };
3072 Ops
[0] = Builder
.CreateExtractElement(Ops
[0], (uint64_t)0);
3073 Ops
[1] = Builder
.CreateExtractElement(Ops
[1], (uint64_t)0);
3074 Ops
[2] = Builder
.CreateExtractElement(Ops
[2], (uint64_t)0);
3076 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(CI
->getModule(),
3081 Rep
= Builder
.CreateInsertElement(Constant::getNullValue(CI
->getType()),
3083 } else if (IsX86
&& (Name
.startswith("avx512.mask.vfmadd.s") ||
3084 Name
.startswith("avx512.maskz.vfmadd.s") ||
3085 Name
.startswith("avx512.mask3.vfmadd.s") ||
3086 Name
.startswith("avx512.mask3.vfmsub.s") ||
3087 Name
.startswith("avx512.mask3.vfnmsub.s"))) {
3088 bool IsMask3
= Name
[11] == '3';
3089 bool IsMaskZ
= Name
[11] == 'z';
3090 // Drop the "avx512.mask." to make it easier.
3091 Name
= Name
.drop_front(IsMask3
|| IsMaskZ
? 13 : 12);
3092 bool NegMul
= Name
[2] == 'n';
3093 bool NegAcc
= NegMul
? Name
[4] == 's' : Name
[3] == 's';
3095 Value
*A
= CI
->getArgOperand(0);
3096 Value
*B
= CI
->getArgOperand(1);
3097 Value
*C
= CI
->getArgOperand(2);
3099 if (NegMul
&& (IsMask3
|| IsMaskZ
))
3100 A
= Builder
.CreateFNeg(A
);
3101 if (NegMul
&& !(IsMask3
|| IsMaskZ
))
3102 B
= Builder
.CreateFNeg(B
);
3104 C
= Builder
.CreateFNeg(C
);
3106 A
= Builder
.CreateExtractElement(A
, (uint64_t)0);
3107 B
= Builder
.CreateExtractElement(B
, (uint64_t)0);
3108 C
= Builder
.CreateExtractElement(C
, (uint64_t)0);
3110 if (!isa
<ConstantInt
>(CI
->getArgOperand(4)) ||
3111 cast
<ConstantInt
>(CI
->getArgOperand(4))->getZExtValue() != 4) {
3112 Value
*Ops
[] = { A
, B
, C
, CI
->getArgOperand(4) };
3115 if (Name
.back() == 'd')
3116 IID
= Intrinsic::x86_avx512_vfmadd_f64
;
3118 IID
= Intrinsic::x86_avx512_vfmadd_f32
;
3119 Function
*FMA
= Intrinsic::getDeclaration(CI
->getModule(), IID
);
3120 Rep
= Builder
.CreateCall(FMA
, Ops
);
3122 Function
*FMA
= Intrinsic::getDeclaration(CI
->getModule(),
3125 Rep
= Builder
.CreateCall(FMA
, { A
, B
, C
});
3128 Value
*PassThru
= IsMaskZ
? Constant::getNullValue(Rep
->getType()) :
3131 // For Mask3 with NegAcc, we need to create a new extractelement that
3132 // avoids the negation above.
3133 if (NegAcc
&& IsMask3
)
3134 PassThru
= Builder
.CreateExtractElement(CI
->getArgOperand(2),
3137 Rep
= EmitX86ScalarSelect(Builder
, CI
->getArgOperand(3),
3139 Rep
= Builder
.CreateInsertElement(CI
->getArgOperand(IsMask3
? 2 : 0),
3141 } else if (IsX86
&& (Name
.startswith("avx512.mask.vfmadd.p") ||
3142 Name
.startswith("avx512.mask.vfnmadd.p") ||
3143 Name
.startswith("avx512.mask.vfnmsub.p") ||
3144 Name
.startswith("avx512.mask3.vfmadd.p") ||
3145 Name
.startswith("avx512.mask3.vfmsub.p") ||
3146 Name
.startswith("avx512.mask3.vfnmsub.p") ||
3147 Name
.startswith("avx512.maskz.vfmadd.p"))) {
3148 bool IsMask3
= Name
[11] == '3';
3149 bool IsMaskZ
= Name
[11] == 'z';
3150 // Drop the "avx512.mask." to make it easier.
3151 Name
= Name
.drop_front(IsMask3
|| IsMaskZ
? 13 : 12);
3152 bool NegMul
= Name
[2] == 'n';
3153 bool NegAcc
= NegMul
? Name
[4] == 's' : Name
[3] == 's';
3155 Value
*A
= CI
->getArgOperand(0);
3156 Value
*B
= CI
->getArgOperand(1);
3157 Value
*C
= CI
->getArgOperand(2);
3159 if (NegMul
&& (IsMask3
|| IsMaskZ
))
3160 A
= Builder
.CreateFNeg(A
);
3161 if (NegMul
&& !(IsMask3
|| IsMaskZ
))
3162 B
= Builder
.CreateFNeg(B
);
3164 C
= Builder
.CreateFNeg(C
);
3166 if (CI
->getNumArgOperands() == 5 &&
3167 (!isa
<ConstantInt
>(CI
->getArgOperand(4)) ||
3168 cast
<ConstantInt
>(CI
->getArgOperand(4))->getZExtValue() != 4)) {
3170 // Check the character before ".512" in string.
3171 if (Name
[Name
.size()-5] == 's')
3172 IID
= Intrinsic::x86_avx512_vfmadd_ps_512
;
3174 IID
= Intrinsic::x86_avx512_vfmadd_pd_512
;
3176 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(F
->getParent(), IID
),
3177 { A
, B
, C
, CI
->getArgOperand(4) });
3179 Function
*FMA
= Intrinsic::getDeclaration(CI
->getModule(),
3182 Rep
= Builder
.CreateCall(FMA
, { A
, B
, C
});
3185 Value
*PassThru
= IsMaskZ
? llvm::Constant::getNullValue(CI
->getType()) :
3186 IsMask3
? CI
->getArgOperand(2) :
3187 CI
->getArgOperand(0);
3189 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
, PassThru
);
3190 } else if (IsX86
&& (Name
.startswith("fma.vfmaddsub.p") ||
3191 Name
.startswith("fma.vfmsubadd.p"))) {
3192 bool IsSubAdd
= Name
[7] == 's';
3193 int NumElts
= CI
->getType()->getVectorNumElements();
3195 Value
*Ops
[] = { CI
->getArgOperand(0), CI
->getArgOperand(1),
3196 CI
->getArgOperand(2) };
3198 Function
*FMA
= Intrinsic::getDeclaration(CI
->getModule(), Intrinsic::fma
,
3200 Value
*Odd
= Builder
.CreateCall(FMA
, Ops
);
3201 Ops
[2] = Builder
.CreateFNeg(Ops
[2]);
3202 Value
*Even
= Builder
.CreateCall(FMA
, Ops
);
3205 std::swap(Even
, Odd
);
3207 SmallVector
<uint32_t, 32> Idxs(NumElts
);
3208 for (int i
= 0; i
!= NumElts
; ++i
)
3209 Idxs
[i
] = i
+ (i
% 2) * NumElts
;
3211 Rep
= Builder
.CreateShuffleVector(Even
, Odd
, Idxs
);
3212 } else if (IsX86
&& (Name
.startswith("avx512.mask.vfmaddsub.p") ||
3213 Name
.startswith("avx512.mask3.vfmaddsub.p") ||
3214 Name
.startswith("avx512.maskz.vfmaddsub.p") ||
3215 Name
.startswith("avx512.mask3.vfmsubadd.p"))) {
3216 bool IsMask3
= Name
[11] == '3';
3217 bool IsMaskZ
= Name
[11] == 'z';
3218 // Drop the "avx512.mask." to make it easier.
3219 Name
= Name
.drop_front(IsMask3
|| IsMaskZ
? 13 : 12);
3220 bool IsSubAdd
= Name
[3] == 's';
3221 if (CI
->getNumArgOperands() == 5 &&
3222 (!isa
<ConstantInt
>(CI
->getArgOperand(4)) ||
3223 cast
<ConstantInt
>(CI
->getArgOperand(4))->getZExtValue() != 4)) {
3225 // Check the character before ".512" in string.
3226 if (Name
[Name
.size()-5] == 's')
3227 IID
= Intrinsic::x86_avx512_vfmaddsub_ps_512
;
3229 IID
= Intrinsic::x86_avx512_vfmaddsub_pd_512
;
3231 Value
*Ops
[] = { CI
->getArgOperand(0), CI
->getArgOperand(1),
3232 CI
->getArgOperand(2), CI
->getArgOperand(4) };
3234 Ops
[2] = Builder
.CreateFNeg(Ops
[2]);
3236 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(F
->getParent(), IID
),
3237 {CI
->getArgOperand(0), CI
->getArgOperand(1),
3238 CI
->getArgOperand(2), CI
->getArgOperand(4)});
3240 int NumElts
= CI
->getType()->getVectorNumElements();
3242 Value
*Ops
[] = { CI
->getArgOperand(0), CI
->getArgOperand(1),
3243 CI
->getArgOperand(2) };
3245 Function
*FMA
= Intrinsic::getDeclaration(CI
->getModule(), Intrinsic::fma
,
3247 Value
*Odd
= Builder
.CreateCall(FMA
, Ops
);
3248 Ops
[2] = Builder
.CreateFNeg(Ops
[2]);
3249 Value
*Even
= Builder
.CreateCall(FMA
, Ops
);
3252 std::swap(Even
, Odd
);
3254 SmallVector
<uint32_t, 32> Idxs(NumElts
);
3255 for (int i
= 0; i
!= NumElts
; ++i
)
3256 Idxs
[i
] = i
+ (i
% 2) * NumElts
;
3258 Rep
= Builder
.CreateShuffleVector(Even
, Odd
, Idxs
);
3261 Value
*PassThru
= IsMaskZ
? llvm::Constant::getNullValue(CI
->getType()) :
3262 IsMask3
? CI
->getArgOperand(2) :
3263 CI
->getArgOperand(0);
3265 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
, PassThru
);
3266 } else if (IsX86
&& (Name
.startswith("avx512.mask.pternlog.") ||
3267 Name
.startswith("avx512.maskz.pternlog."))) {
3268 bool ZeroMask
= Name
[11] == 'z';
3269 unsigned VecWidth
= CI
->getType()->getPrimitiveSizeInBits();
3270 unsigned EltWidth
= CI
->getType()->getScalarSizeInBits();
3272 if (VecWidth
== 128 && EltWidth
== 32)
3273 IID
= Intrinsic::x86_avx512_pternlog_d_128
;
3274 else if (VecWidth
== 256 && EltWidth
== 32)
3275 IID
= Intrinsic::x86_avx512_pternlog_d_256
;
3276 else if (VecWidth
== 512 && EltWidth
== 32)
3277 IID
= Intrinsic::x86_avx512_pternlog_d_512
;
3278 else if (VecWidth
== 128 && EltWidth
== 64)
3279 IID
= Intrinsic::x86_avx512_pternlog_q_128
;
3280 else if (VecWidth
== 256 && EltWidth
== 64)
3281 IID
= Intrinsic::x86_avx512_pternlog_q_256
;
3282 else if (VecWidth
== 512 && EltWidth
== 64)
3283 IID
= Intrinsic::x86_avx512_pternlog_q_512
;
3285 llvm_unreachable("Unexpected intrinsic");
3287 Value
*Args
[] = { CI
->getArgOperand(0) , CI
->getArgOperand(1),
3288 CI
->getArgOperand(2), CI
->getArgOperand(3) };
3289 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(CI
->getModule(), IID
),
3291 Value
*PassThru
= ZeroMask
? ConstantAggregateZero::get(CI
->getType())
3292 : CI
->getArgOperand(0);
3293 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(4), Rep
, PassThru
);
3294 } else if (IsX86
&& (Name
.startswith("avx512.mask.vpmadd52") ||
3295 Name
.startswith("avx512.maskz.vpmadd52"))) {
3296 bool ZeroMask
= Name
[11] == 'z';
3297 bool High
= Name
[20] == 'h' || Name
[21] == 'h';
3298 unsigned VecWidth
= CI
->getType()->getPrimitiveSizeInBits();
3300 if (VecWidth
== 128 && !High
)
3301 IID
= Intrinsic::x86_avx512_vpmadd52l_uq_128
;
3302 else if (VecWidth
== 256 && !High
)
3303 IID
= Intrinsic::x86_avx512_vpmadd52l_uq_256
;
3304 else if (VecWidth
== 512 && !High
)
3305 IID
= Intrinsic::x86_avx512_vpmadd52l_uq_512
;
3306 else if (VecWidth
== 128 && High
)
3307 IID
= Intrinsic::x86_avx512_vpmadd52h_uq_128
;
3308 else if (VecWidth
== 256 && High
)
3309 IID
= Intrinsic::x86_avx512_vpmadd52h_uq_256
;
3310 else if (VecWidth
== 512 && High
)
3311 IID
= Intrinsic::x86_avx512_vpmadd52h_uq_512
;
3313 llvm_unreachable("Unexpected intrinsic");
3315 Value
*Args
[] = { CI
->getArgOperand(0) , CI
->getArgOperand(1),
3316 CI
->getArgOperand(2) };
3317 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(CI
->getModule(), IID
),
3319 Value
*PassThru
= ZeroMask
? ConstantAggregateZero::get(CI
->getType())
3320 : CI
->getArgOperand(0);
3321 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
, PassThru
);
3322 } else if (IsX86
&& (Name
.startswith("avx512.mask.vpermi2var.") ||
3323 Name
.startswith("avx512.mask.vpermt2var.") ||
3324 Name
.startswith("avx512.maskz.vpermt2var."))) {
3325 bool ZeroMask
= Name
[11] == 'z';
3326 bool IndexForm
= Name
[17] == 'i';
3327 Rep
= UpgradeX86VPERMT2Intrinsics(Builder
, *CI
, ZeroMask
, IndexForm
);
3328 } else if (IsX86
&& (Name
.startswith("avx512.mask.vpdpbusd.") ||
3329 Name
.startswith("avx512.maskz.vpdpbusd.") ||
3330 Name
.startswith("avx512.mask.vpdpbusds.") ||
3331 Name
.startswith("avx512.maskz.vpdpbusds."))) {
3332 bool ZeroMask
= Name
[11] == 'z';
3333 bool IsSaturating
= Name
[ZeroMask
? 21 : 20] == 's';
3334 unsigned VecWidth
= CI
->getType()->getPrimitiveSizeInBits();
3336 if (VecWidth
== 128 && !IsSaturating
)
3337 IID
= Intrinsic::x86_avx512_vpdpbusd_128
;
3338 else if (VecWidth
== 256 && !IsSaturating
)
3339 IID
= Intrinsic::x86_avx512_vpdpbusd_256
;
3340 else if (VecWidth
== 512 && !IsSaturating
)
3341 IID
= Intrinsic::x86_avx512_vpdpbusd_512
;
3342 else if (VecWidth
== 128 && IsSaturating
)
3343 IID
= Intrinsic::x86_avx512_vpdpbusds_128
;
3344 else if (VecWidth
== 256 && IsSaturating
)
3345 IID
= Intrinsic::x86_avx512_vpdpbusds_256
;
3346 else if (VecWidth
== 512 && IsSaturating
)
3347 IID
= Intrinsic::x86_avx512_vpdpbusds_512
;
3349 llvm_unreachable("Unexpected intrinsic");
3351 Value
*Args
[] = { CI
->getArgOperand(0), CI
->getArgOperand(1),
3352 CI
->getArgOperand(2) };
3353 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(CI
->getModule(), IID
),
3355 Value
*PassThru
= ZeroMask
? ConstantAggregateZero::get(CI
->getType())
3356 : CI
->getArgOperand(0);
3357 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
, PassThru
);
3358 } else if (IsX86
&& (Name
.startswith("avx512.mask.vpdpwssd.") ||
3359 Name
.startswith("avx512.maskz.vpdpwssd.") ||
3360 Name
.startswith("avx512.mask.vpdpwssds.") ||
3361 Name
.startswith("avx512.maskz.vpdpwssds."))) {
3362 bool ZeroMask
= Name
[11] == 'z';
3363 bool IsSaturating
= Name
[ZeroMask
? 21 : 20] == 's';
3364 unsigned VecWidth
= CI
->getType()->getPrimitiveSizeInBits();
3366 if (VecWidth
== 128 && !IsSaturating
)
3367 IID
= Intrinsic::x86_avx512_vpdpwssd_128
;
3368 else if (VecWidth
== 256 && !IsSaturating
)
3369 IID
= Intrinsic::x86_avx512_vpdpwssd_256
;
3370 else if (VecWidth
== 512 && !IsSaturating
)
3371 IID
= Intrinsic::x86_avx512_vpdpwssd_512
;
3372 else if (VecWidth
== 128 && IsSaturating
)
3373 IID
= Intrinsic::x86_avx512_vpdpwssds_128
;
3374 else if (VecWidth
== 256 && IsSaturating
)
3375 IID
= Intrinsic::x86_avx512_vpdpwssds_256
;
3376 else if (VecWidth
== 512 && IsSaturating
)
3377 IID
= Intrinsic::x86_avx512_vpdpwssds_512
;
3379 llvm_unreachable("Unexpected intrinsic");
3381 Value
*Args
[] = { CI
->getArgOperand(0), CI
->getArgOperand(1),
3382 CI
->getArgOperand(2) };
3383 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(CI
->getModule(), IID
),
3385 Value
*PassThru
= ZeroMask
? ConstantAggregateZero::get(CI
->getType())
3386 : CI
->getArgOperand(0);
3387 Rep
= EmitX86Select(Builder
, CI
->getArgOperand(3), Rep
, PassThru
);
3388 } else if (IsX86
&& (Name
== "addcarryx.u32" || Name
== "addcarryx.u64" ||
3389 Name
== "addcarry.u32" || Name
== "addcarry.u64" ||
3390 Name
== "subborrow.u32" || Name
== "subborrow.u64")) {
3392 if (Name
[0] == 'a' && Name
.back() == '2')
3393 IID
= Intrinsic::x86_addcarry_32
;
3394 else if (Name
[0] == 'a' && Name
.back() == '4')
3395 IID
= Intrinsic::x86_addcarry_64
;
3396 else if (Name
[0] == 's' && Name
.back() == '2')
3397 IID
= Intrinsic::x86_subborrow_32
;
3398 else if (Name
[0] == 's' && Name
.back() == '4')
3399 IID
= Intrinsic::x86_subborrow_64
;
3401 llvm_unreachable("Unexpected intrinsic");
3403 // Make a call with 3 operands.
3404 Value
*Args
[] = { CI
->getArgOperand(0), CI
->getArgOperand(1),
3405 CI
->getArgOperand(2)};
3406 Value
*NewCall
= Builder
.CreateCall(
3407 Intrinsic::getDeclaration(CI
->getModule(), IID
),
3410 // Extract the second result and store it.
3411 Value
*Data
= Builder
.CreateExtractValue(NewCall
, 1);
3412 // Cast the pointer to the right type.
3413 Value
*Ptr
= Builder
.CreateBitCast(CI
->getArgOperand(3),
3414 llvm::PointerType::getUnqual(Data
->getType()));
3415 Builder
.CreateAlignedStore(Data
, Ptr
, 1);
3416 // Replace the original call result with the first result of the new call.
3417 Value
*CF
= Builder
.CreateExtractValue(NewCall
, 0);
3419 CI
->replaceAllUsesWith(CF
);
3421 } else if (IsX86
&& Name
.startswith("avx512.mask.") &&
3422 upgradeAVX512MaskToSelect(Name
, Builder
, *CI
, Rep
)) {
3423 // Rep will be updated by the call in the condition.
3424 } else if (IsNVVM
&& (Name
== "abs.i" || Name
== "abs.ll")) {
3425 Value
*Arg
= CI
->getArgOperand(0);
3426 Value
*Neg
= Builder
.CreateNeg(Arg
, "neg");
3427 Value
*Cmp
= Builder
.CreateICmpSGE(
3428 Arg
, llvm::Constant::getNullValue(Arg
->getType()), "abs.cond");
3429 Rep
= Builder
.CreateSelect(Cmp
, Arg
, Neg
, "abs");
3430 } else if (IsNVVM
&& (Name
.startswith("atomic.load.add.f32.p") ||
3431 Name
.startswith("atomic.load.add.f64.p"))) {
3432 Value
*Ptr
= CI
->getArgOperand(0);
3433 Value
*Val
= CI
->getArgOperand(1);
3434 Rep
= Builder
.CreateAtomicRMW(AtomicRMWInst::FAdd
, Ptr
, Val
,
3435 AtomicOrdering::SequentiallyConsistent
);
3436 } else if (IsNVVM
&& (Name
== "max.i" || Name
== "max.ll" ||
3437 Name
== "max.ui" || Name
== "max.ull")) {
3438 Value
*Arg0
= CI
->getArgOperand(0);
3439 Value
*Arg1
= CI
->getArgOperand(1);
3440 Value
*Cmp
= Name
.endswith(".ui") || Name
.endswith(".ull")
3441 ? Builder
.CreateICmpUGE(Arg0
, Arg1
, "max.cond")
3442 : Builder
.CreateICmpSGE(Arg0
, Arg1
, "max.cond");
3443 Rep
= Builder
.CreateSelect(Cmp
, Arg0
, Arg1
, "max");
3444 } else if (IsNVVM
&& (Name
== "min.i" || Name
== "min.ll" ||
3445 Name
== "min.ui" || Name
== "min.ull")) {
3446 Value
*Arg0
= CI
->getArgOperand(0);
3447 Value
*Arg1
= CI
->getArgOperand(1);
3448 Value
*Cmp
= Name
.endswith(".ui") || Name
.endswith(".ull")
3449 ? Builder
.CreateICmpULE(Arg0
, Arg1
, "min.cond")
3450 : Builder
.CreateICmpSLE(Arg0
, Arg1
, "min.cond");
3451 Rep
= Builder
.CreateSelect(Cmp
, Arg0
, Arg1
, "min");
3452 } else if (IsNVVM
&& Name
== "clz.ll") {
3453 // llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 and returns an i64.
3454 Value
*Arg
= CI
->getArgOperand(0);
3455 Value
*Ctlz
= Builder
.CreateCall(
3456 Intrinsic::getDeclaration(F
->getParent(), Intrinsic::ctlz
,
3458 {Arg
, Builder
.getFalse()}, "ctlz");
3459 Rep
= Builder
.CreateTrunc(Ctlz
, Builder
.getInt32Ty(), "ctlz.trunc");
3460 } else if (IsNVVM
&& Name
== "popc.ll") {
3461 // llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 and returns an
3463 Value
*Arg
= CI
->getArgOperand(0);
3464 Value
*Popc
= Builder
.CreateCall(
3465 Intrinsic::getDeclaration(F
->getParent(), Intrinsic::ctpop
,
3468 Rep
= Builder
.CreateTrunc(Popc
, Builder
.getInt32Ty(), "ctpop.trunc");
3469 } else if (IsNVVM
&& Name
== "h2f") {
3470 Rep
= Builder
.CreateCall(Intrinsic::getDeclaration(
3471 F
->getParent(), Intrinsic::convert_from_fp16
,
3472 {Builder
.getFloatTy()}),
3473 CI
->getArgOperand(0), "h2f");
3475 llvm_unreachable("Unknown function for CallInst upgrade.");
3479 CI
->replaceAllUsesWith(Rep
);
3480 CI
->eraseFromParent();
3484 const auto &DefaultCase
= [&NewFn
, &CI
]() -> void {
3485 // Handle generic mangling change, but nothing else
3487 (CI
->getCalledFunction()->getName() != NewFn
->getName()) &&
3488 "Unknown function for CallInst upgrade and isn't just a name change");
3489 CI
->setCalledFunction(NewFn
);
3491 CallInst
*NewCall
= nullptr;
3492 switch (NewFn
->getIntrinsicID()) {
3497 case Intrinsic::experimental_vector_reduce_v2_fmul
: {
3498 SmallVector
<Value
*, 2> Args
;
3500 Args
.push_back(ConstantFP::get(CI
->getOperand(0)->getType(), 1.0));
3502 Args
.push_back(CI
->getOperand(0));
3503 Args
.push_back(CI
->getOperand(1));
3504 NewCall
= Builder
.CreateCall(NewFn
, Args
);
3505 cast
<Instruction
>(NewCall
)->copyFastMathFlags(CI
);
3508 case Intrinsic::experimental_vector_reduce_v2_fadd
: {
3509 SmallVector
<Value
*, 2> Args
;
3511 Args
.push_back(Constant::getNullValue(CI
->getOperand(0)->getType()));
3513 Args
.push_back(CI
->getOperand(0));
3514 Args
.push_back(CI
->getOperand(1));
3515 NewCall
= Builder
.CreateCall(NewFn
, Args
);
3516 cast
<Instruction
>(NewCall
)->copyFastMathFlags(CI
);
3519 case Intrinsic::arm_neon_vld1
:
3520 case Intrinsic::arm_neon_vld2
:
3521 case Intrinsic::arm_neon_vld3
:
3522 case Intrinsic::arm_neon_vld4
:
3523 case Intrinsic::arm_neon_vld2lane
:
3524 case Intrinsic::arm_neon_vld3lane
:
3525 case Intrinsic::arm_neon_vld4lane
:
3526 case Intrinsic::arm_neon_vst1
:
3527 case Intrinsic::arm_neon_vst2
:
3528 case Intrinsic::arm_neon_vst3
:
3529 case Intrinsic::arm_neon_vst4
:
3530 case Intrinsic::arm_neon_vst2lane
:
3531 case Intrinsic::arm_neon_vst3lane
:
3532 case Intrinsic::arm_neon_vst4lane
: {
3533 SmallVector
<Value
*, 4> Args(CI
->arg_operands().begin(),
3534 CI
->arg_operands().end());
3535 NewCall
= Builder
.CreateCall(NewFn
, Args
);
3539 case Intrinsic::bitreverse
:
3540 NewCall
= Builder
.CreateCall(NewFn
, {CI
->getArgOperand(0)});
3543 case Intrinsic::ctlz
:
3544 case Intrinsic::cttz
:
3545 assert(CI
->getNumArgOperands() == 1 &&
3546 "Mismatch between function args and call args");
3548 Builder
.CreateCall(NewFn
, {CI
->getArgOperand(0), Builder
.getFalse()});
3551 case Intrinsic::objectsize
: {
3552 Value
*NullIsUnknownSize
= CI
->getNumArgOperands() == 2
3553 ? Builder
.getFalse()
3554 : CI
->getArgOperand(2);
3556 CI
->getNumArgOperands() < 4 ? Builder
.getFalse() : CI
->getArgOperand(3);
3557 NewCall
= Builder
.CreateCall(
3558 NewFn
, {CI
->getArgOperand(0), CI
->getArgOperand(1), NullIsUnknownSize
, Dynamic
});
3562 case Intrinsic::ctpop
:
3563 NewCall
= Builder
.CreateCall(NewFn
, {CI
->getArgOperand(0)});
3566 case Intrinsic::convert_from_fp16
:
3567 NewCall
= Builder
.CreateCall(NewFn
, {CI
->getArgOperand(0)});
3570 case Intrinsic::dbg_value
:
3571 // Upgrade from the old version that had an extra offset argument.
3572 assert(CI
->getNumArgOperands() == 4);
3573 // Drop nonzero offsets instead of attempting to upgrade them.
3574 if (auto *Offset
= dyn_cast_or_null
<Constant
>(CI
->getArgOperand(1)))
3575 if (Offset
->isZeroValue()) {
3576 NewCall
= Builder
.CreateCall(
3578 {CI
->getArgOperand(0), CI
->getArgOperand(2), CI
->getArgOperand(3)});
3581 CI
->eraseFromParent();
3584 case Intrinsic::x86_xop_vfrcz_ss
:
3585 case Intrinsic::x86_xop_vfrcz_sd
:
3586 NewCall
= Builder
.CreateCall(NewFn
, {CI
->getArgOperand(1)});
3589 case Intrinsic::x86_xop_vpermil2pd
:
3590 case Intrinsic::x86_xop_vpermil2ps
:
3591 case Intrinsic::x86_xop_vpermil2pd_256
:
3592 case Intrinsic::x86_xop_vpermil2ps_256
: {
3593 SmallVector
<Value
*, 4> Args(CI
->arg_operands().begin(),
3594 CI
->arg_operands().end());
3595 VectorType
*FltIdxTy
= cast
<VectorType
>(Args
[2]->getType());
3596 VectorType
*IntIdxTy
= VectorType::getInteger(FltIdxTy
);
3597 Args
[2] = Builder
.CreateBitCast(Args
[2], IntIdxTy
);
3598 NewCall
= Builder
.CreateCall(NewFn
, Args
);
3602 case Intrinsic::x86_sse41_ptestc
:
3603 case Intrinsic::x86_sse41_ptestz
:
3604 case Intrinsic::x86_sse41_ptestnzc
: {
3605 // The arguments for these intrinsics used to be v4f32, and changed
3606 // to v2i64. This is purely a nop, since those are bitwise intrinsics.
3607 // So, the only thing required is a bitcast for both arguments.
3608 // First, check the arguments have the old type.
3609 Value
*Arg0
= CI
->getArgOperand(0);
3610 if (Arg0
->getType() != VectorType::get(Type::getFloatTy(C
), 4))
3613 // Old intrinsic, add bitcasts
3614 Value
*Arg1
= CI
->getArgOperand(1);
3616 Type
*NewVecTy
= VectorType::get(Type::getInt64Ty(C
), 2);
3618 Value
*BC0
= Builder
.CreateBitCast(Arg0
, NewVecTy
, "cast");
3619 Value
*BC1
= Builder
.CreateBitCast(Arg1
, NewVecTy
, "cast");
3621 NewCall
= Builder
.CreateCall(NewFn
, {BC0
, BC1
});
3625 case Intrinsic::x86_rdtscp
: {
3626 // This used to take 1 arguments. If we have no arguments, it is already
3628 if (CI
->getNumOperands() == 0)
3631 NewCall
= Builder
.CreateCall(NewFn
);
3632 // Extract the second result and store it.
3633 Value
*Data
= Builder
.CreateExtractValue(NewCall
, 1);
3634 // Cast the pointer to the right type.
3635 Value
*Ptr
= Builder
.CreateBitCast(CI
->getArgOperand(0),
3636 llvm::PointerType::getUnqual(Data
->getType()));
3637 Builder
.CreateAlignedStore(Data
, Ptr
, 1);
3638 // Replace the original call result with the first result of the new call.
3639 Value
*TSC
= Builder
.CreateExtractValue(NewCall
, 0);
3641 std::string Name
= CI
->getName();
3642 if (!Name
.empty()) {
3643 CI
->setName(Name
+ ".old");
3644 NewCall
->setName(Name
);
3646 CI
->replaceAllUsesWith(TSC
);
3647 CI
->eraseFromParent();
3651 case Intrinsic::x86_sse41_insertps
:
3652 case Intrinsic::x86_sse41_dppd
:
3653 case Intrinsic::x86_sse41_dpps
:
3654 case Intrinsic::x86_sse41_mpsadbw
:
3655 case Intrinsic::x86_avx_dp_ps_256
:
3656 case Intrinsic::x86_avx2_mpsadbw
: {
3657 // Need to truncate the last argument from i32 to i8 -- this argument models
3658 // an inherently 8-bit immediate operand to these x86 instructions.
3659 SmallVector
<Value
*, 4> Args(CI
->arg_operands().begin(),
3660 CI
->arg_operands().end());
3662 // Replace the last argument with a trunc.
3663 Args
.back() = Builder
.CreateTrunc(Args
.back(), Type::getInt8Ty(C
), "trunc");
3664 NewCall
= Builder
.CreateCall(NewFn
, Args
);
3668 case Intrinsic::thread_pointer
: {
3669 NewCall
= Builder
.CreateCall(NewFn
, {});
3673 case Intrinsic::invariant_start
:
3674 case Intrinsic::invariant_end
:
3675 case Intrinsic::masked_load
:
3676 case Intrinsic::masked_store
:
3677 case Intrinsic::masked_gather
:
3678 case Intrinsic::masked_scatter
: {
3679 SmallVector
<Value
*, 4> Args(CI
->arg_operands().begin(),
3680 CI
->arg_operands().end());
3681 NewCall
= Builder
.CreateCall(NewFn
, Args
);
3685 case Intrinsic::memcpy
:
3686 case Intrinsic::memmove
:
3687 case Intrinsic::memset
: {
3688 // We have to make sure that the call signature is what we're expecting.
3689 // We only want to change the old signatures by removing the alignment arg:
3690 // @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i32, i1)
3691 // -> @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i1)
3692 // @llvm.memset...(i8*, i8, i[32|64], i32, i1)
3693 // -> @llvm.memset...(i8*, i8, i[32|64], i1)
3694 // Note: i8*'s in the above can be any pointer type
3695 if (CI
->getNumArgOperands() != 5) {
3699 // Remove alignment argument (3), and add alignment attributes to the
3700 // dest/src pointers.
3701 Value
*Args
[4] = {CI
->getArgOperand(0), CI
->getArgOperand(1),
3702 CI
->getArgOperand(2), CI
->getArgOperand(4)};
3703 NewCall
= Builder
.CreateCall(NewFn
, Args
);
3704 auto *MemCI
= cast
<MemIntrinsic
>(NewCall
);
3705 // All mem intrinsics support dest alignment.
3706 const ConstantInt
*Align
= cast
<ConstantInt
>(CI
->getArgOperand(3));
3707 MemCI
->setDestAlignment(Align
->getZExtValue());
3708 // Memcpy/Memmove also support source alignment.
3709 if (auto *MTI
= dyn_cast
<MemTransferInst
>(MemCI
))
3710 MTI
->setSourceAlignment(Align
->getZExtValue());
3714 assert(NewCall
&& "Should have either set this variable or returned through "
3715 "the default case");
3716 std::string Name
= CI
->getName();
3717 if (!Name
.empty()) {
3718 CI
->setName(Name
+ ".old");
3719 NewCall
->setName(Name
);
3721 CI
->replaceAllUsesWith(NewCall
);
3722 CI
->eraseFromParent();
3725 void llvm::UpgradeCallsToIntrinsic(Function
*F
) {
3726 assert(F
&& "Illegal attempt to upgrade a non-existent intrinsic.");
3728 // Check if this function should be upgraded and get the replacement function
3731 if (UpgradeIntrinsicFunction(F
, NewFn
)) {
3732 // Replace all users of the old function with the new function or new
3733 // instructions. This is not a range loop because the call is deleted.
3734 for (auto UI
= F
->user_begin(), UE
= F
->user_end(); UI
!= UE
; )
3735 if (CallInst
*CI
= dyn_cast
<CallInst
>(*UI
++))
3736 UpgradeIntrinsicCall(CI
, NewFn
);
3738 // Remove old function, no longer used, from the module.
3739 F
->eraseFromParent();
3743 MDNode
*llvm::UpgradeTBAANode(MDNode
&MD
) {
3744 // Check if the tag uses struct-path aware TBAA format.
3745 if (isa
<MDNode
>(MD
.getOperand(0)) && MD
.getNumOperands() >= 3)
3748 auto &Context
= MD
.getContext();
3749 if (MD
.getNumOperands() == 3) {
3750 Metadata
*Elts
[] = {MD
.getOperand(0), MD
.getOperand(1)};
3751 MDNode
*ScalarType
= MDNode::get(Context
, Elts
);
3752 // Create a MDNode <ScalarType, ScalarType, offset 0, const>
3753 Metadata
*Elts2
[] = {ScalarType
, ScalarType
,
3754 ConstantAsMetadata::get(
3755 Constant::getNullValue(Type::getInt64Ty(Context
))),
3757 return MDNode::get(Context
, Elts2
);
3759 // Create a MDNode <MD, MD, offset 0>
3760 Metadata
*Elts
[] = {&MD
, &MD
, ConstantAsMetadata::get(Constant::getNullValue(
3761 Type::getInt64Ty(Context
)))};
3762 return MDNode::get(Context
, Elts
);
3765 Instruction
*llvm::UpgradeBitCastInst(unsigned Opc
, Value
*V
, Type
*DestTy
,
3766 Instruction
*&Temp
) {
3767 if (Opc
!= Instruction::BitCast
)
3771 Type
*SrcTy
= V
->getType();
3772 if (SrcTy
->isPtrOrPtrVectorTy() && DestTy
->isPtrOrPtrVectorTy() &&
3773 SrcTy
->getPointerAddressSpace() != DestTy
->getPointerAddressSpace()) {
3774 LLVMContext
&Context
= V
->getContext();
3776 // We have no information about target data layout, so we assume that
3777 // the maximum pointer size is 64bit.
3778 Type
*MidTy
= Type::getInt64Ty(Context
);
3779 Temp
= CastInst::Create(Instruction::PtrToInt
, V
, MidTy
);
3781 return CastInst::Create(Instruction::IntToPtr
, Temp
, DestTy
);
3787 Value
*llvm::UpgradeBitCastExpr(unsigned Opc
, Constant
*C
, Type
*DestTy
) {
3788 if (Opc
!= Instruction::BitCast
)
3791 Type
*SrcTy
= C
->getType();
3792 if (SrcTy
->isPtrOrPtrVectorTy() && DestTy
->isPtrOrPtrVectorTy() &&
3793 SrcTy
->getPointerAddressSpace() != DestTy
->getPointerAddressSpace()) {
3794 LLVMContext
&Context
= C
->getContext();
3796 // We have no information about target data layout, so we assume that
3797 // the maximum pointer size is 64bit.
3798 Type
*MidTy
= Type::getInt64Ty(Context
);
3800 return ConstantExpr::getIntToPtr(ConstantExpr::getPtrToInt(C
, MidTy
),
3807 /// Check the debug info version number, if it is out-dated, drop the debug
3808 /// info. Return true if module is modified.
3809 bool llvm::UpgradeDebugInfo(Module
&M
) {
3810 unsigned Version
= getDebugMetadataVersionFromModule(M
);
3811 if (Version
== DEBUG_METADATA_VERSION
) {
3812 bool BrokenDebugInfo
= false;
3813 if (verifyModule(M
, &llvm::errs(), &BrokenDebugInfo
))
3814 report_fatal_error("Broken module found, compilation aborted!");
3815 if (!BrokenDebugInfo
)
3816 // Everything is ok.
3819 // Diagnose malformed debug info.
3820 DiagnosticInfoIgnoringInvalidDebugMetadata
Diag(M
);
3821 M
.getContext().diagnose(Diag
);
3824 bool Modified
= StripDebugInfo(M
);
3825 if (Modified
&& Version
!= DEBUG_METADATA_VERSION
) {
3826 // Diagnose a version mismatch.
3827 DiagnosticInfoDebugMetadataVersion
DiagVersion(M
, Version
);
3828 M
.getContext().diagnose(DiagVersion
);
3833 /// This checks for objc retain release marker which should be upgraded. It
3834 /// returns true if module is modified.
3835 static bool UpgradeRetainReleaseMarker(Module
&M
) {
3836 bool Changed
= false;
3837 const char *MarkerKey
= "clang.arc.retainAutoreleasedReturnValueMarker";
3838 NamedMDNode
*ModRetainReleaseMarker
= M
.getNamedMetadata(MarkerKey
);
3839 if (ModRetainReleaseMarker
) {
3840 MDNode
*Op
= ModRetainReleaseMarker
->getOperand(0);
3842 MDString
*ID
= dyn_cast_or_null
<MDString
>(Op
->getOperand(0));
3844 SmallVector
<StringRef
, 4> ValueComp
;
3845 ID
->getString().split(ValueComp
, "#");
3846 if (ValueComp
.size() == 2) {
3847 std::string NewValue
= ValueComp
[0].str() + ";" + ValueComp
[1].str();
3848 ID
= MDString::get(M
.getContext(), NewValue
);
3850 M
.addModuleFlag(Module::Error
, MarkerKey
, ID
);
3851 M
.eraseNamedMetadata(ModRetainReleaseMarker
);
3859 void llvm::UpgradeARCRuntime(Module
&M
) {
3860 // This lambda converts normal function calls to ARC runtime functions to
3862 auto UpgradeToIntrinsic
= [&](const char *OldFunc
,
3863 llvm::Intrinsic::ID IntrinsicFunc
) {
3864 Function
*Fn
= M
.getFunction(OldFunc
);
3869 Function
*NewFn
= llvm::Intrinsic::getDeclaration(&M
, IntrinsicFunc
);
3871 for (auto I
= Fn
->user_begin(), E
= Fn
->user_end(); I
!= E
;) {
3872 CallInst
*CI
= dyn_cast
<CallInst
>(*I
++);
3873 if (!CI
|| CI
->getCalledFunction() != Fn
)
3876 IRBuilder
<> Builder(CI
->getParent(), CI
->getIterator());
3877 FunctionType
*NewFuncTy
= NewFn
->getFunctionType();
3878 SmallVector
<Value
*, 2> Args
;
3880 for (unsigned I
= 0, E
= CI
->getNumArgOperands(); I
!= E
; ++I
) {
3881 Value
*Arg
= CI
->getArgOperand(I
);
3882 // Bitcast argument to the parameter type of the new function if it's
3883 // not a variadic argument.
3884 if (I
< NewFuncTy
->getNumParams())
3885 Arg
= Builder
.CreateBitCast(Arg
, NewFuncTy
->getParamType(I
));
3886 Args
.push_back(Arg
);
3889 // Create a call instruction that calls the new function.
3890 CallInst
*NewCall
= Builder
.CreateCall(NewFuncTy
, NewFn
, Args
);
3891 NewCall
->setTailCallKind(cast
<CallInst
>(CI
)->getTailCallKind());
3892 NewCall
->setName(CI
->getName());
3894 // Bitcast the return value back to the type of the old call.
3895 Value
*NewRetVal
= Builder
.CreateBitCast(NewCall
, CI
->getType());
3897 if (!CI
->use_empty())
3898 CI
->replaceAllUsesWith(NewRetVal
);
3899 CI
->eraseFromParent();
3902 if (Fn
->use_empty())
3903 Fn
->eraseFromParent();
3906 // Unconditionally convert a call to "clang.arc.use" to a call to
3907 // "llvm.objc.clang.arc.use".
3908 UpgradeToIntrinsic("clang.arc.use", llvm::Intrinsic::objc_clang_arc_use
);
3910 // Upgrade the retain release marker. If there is no need to upgrade
3911 // the marker, that means either the module is already new enough to contain
3912 // new intrinsics or it is not ARC. There is no need to upgrade runtime call.
3913 if (!UpgradeRetainReleaseMarker(M
))
3916 std::pair
<const char *, llvm::Intrinsic::ID
> RuntimeFuncs
[] = {
3917 {"objc_autorelease", llvm::Intrinsic::objc_autorelease
},
3918 {"objc_autoreleasePoolPop", llvm::Intrinsic::objc_autoreleasePoolPop
},
3919 {"objc_autoreleasePoolPush", llvm::Intrinsic::objc_autoreleasePoolPush
},
3920 {"objc_autoreleaseReturnValue",
3921 llvm::Intrinsic::objc_autoreleaseReturnValue
},
3922 {"objc_copyWeak", llvm::Intrinsic::objc_copyWeak
},
3923 {"objc_destroyWeak", llvm::Intrinsic::objc_destroyWeak
},
3924 {"objc_initWeak", llvm::Intrinsic::objc_initWeak
},
3925 {"objc_loadWeak", llvm::Intrinsic::objc_loadWeak
},
3926 {"objc_loadWeakRetained", llvm::Intrinsic::objc_loadWeakRetained
},
3927 {"objc_moveWeak", llvm::Intrinsic::objc_moveWeak
},
3928 {"objc_release", llvm::Intrinsic::objc_release
},
3929 {"objc_retain", llvm::Intrinsic::objc_retain
},
3930 {"objc_retainAutorelease", llvm::Intrinsic::objc_retainAutorelease
},
3931 {"objc_retainAutoreleaseReturnValue",
3932 llvm::Intrinsic::objc_retainAutoreleaseReturnValue
},
3933 {"objc_retainAutoreleasedReturnValue",
3934 llvm::Intrinsic::objc_retainAutoreleasedReturnValue
},
3935 {"objc_retainBlock", llvm::Intrinsic::objc_retainBlock
},
3936 {"objc_storeStrong", llvm::Intrinsic::objc_storeStrong
},
3937 {"objc_storeWeak", llvm::Intrinsic::objc_storeWeak
},
3938 {"objc_unsafeClaimAutoreleasedReturnValue",
3939 llvm::Intrinsic::objc_unsafeClaimAutoreleasedReturnValue
},
3940 {"objc_retainedObject", llvm::Intrinsic::objc_retainedObject
},
3941 {"objc_unretainedObject", llvm::Intrinsic::objc_unretainedObject
},
3942 {"objc_unretainedPointer", llvm::Intrinsic::objc_unretainedPointer
},
3943 {"objc_retain_autorelease", llvm::Intrinsic::objc_retain_autorelease
},
3944 {"objc_sync_enter", llvm::Intrinsic::objc_sync_enter
},
3945 {"objc_sync_exit", llvm::Intrinsic::objc_sync_exit
},
3946 {"objc_arc_annotation_topdown_bbstart",
3947 llvm::Intrinsic::objc_arc_annotation_topdown_bbstart
},
3948 {"objc_arc_annotation_topdown_bbend",
3949 llvm::Intrinsic::objc_arc_annotation_topdown_bbend
},
3950 {"objc_arc_annotation_bottomup_bbstart",
3951 llvm::Intrinsic::objc_arc_annotation_bottomup_bbstart
},
3952 {"objc_arc_annotation_bottomup_bbend",
3953 llvm::Intrinsic::objc_arc_annotation_bottomup_bbend
}};
3955 for (auto &I
: RuntimeFuncs
)
3956 UpgradeToIntrinsic(I
.first
, I
.second
);
3959 bool llvm::UpgradeModuleFlags(Module
&M
) {
3960 NamedMDNode
*ModFlags
= M
.getModuleFlagsMetadata();
3964 bool HasObjCFlag
= false, HasClassProperties
= false, Changed
= false;
3965 for (unsigned I
= 0, E
= ModFlags
->getNumOperands(); I
!= E
; ++I
) {
3966 MDNode
*Op
= ModFlags
->getOperand(I
);
3967 if (Op
->getNumOperands() != 3)
3969 MDString
*ID
= dyn_cast_or_null
<MDString
>(Op
->getOperand(1));
3972 if (ID
->getString() == "Objective-C Image Info Version")
3974 if (ID
->getString() == "Objective-C Class Properties")
3975 HasClassProperties
= true;
3976 // Upgrade PIC/PIE Module Flags. The module flag behavior for these two
3977 // field was Error and now they are Max.
3978 if (ID
->getString() == "PIC Level" || ID
->getString() == "PIE Level") {
3979 if (auto *Behavior
=
3980 mdconst::dyn_extract_or_null
<ConstantInt
>(Op
->getOperand(0))) {
3981 if (Behavior
->getLimitedValue() == Module::Error
) {
3982 Type
*Int32Ty
= Type::getInt32Ty(M
.getContext());
3983 Metadata
*Ops
[3] = {
3984 ConstantAsMetadata::get(ConstantInt::get(Int32Ty
, Module::Max
)),
3985 MDString::get(M
.getContext(), ID
->getString()),
3987 ModFlags
->setOperand(I
, MDNode::get(M
.getContext(), Ops
));
3992 // Upgrade Objective-C Image Info Section. Removed the whitespce in the
3993 // section name so that llvm-lto will not complain about mismatching
3994 // module flags that is functionally the same.
3995 if (ID
->getString() == "Objective-C Image Info Section") {
3996 if (auto *Value
= dyn_cast_or_null
<MDString
>(Op
->getOperand(2))) {
3997 SmallVector
<StringRef
, 4> ValueComp
;
3998 Value
->getString().split(ValueComp
, " ");
3999 if (ValueComp
.size() != 1) {
4000 std::string NewValue
;
4001 for (auto &S
: ValueComp
)
4002 NewValue
+= S
.str();
4003 Metadata
*Ops
[3] = {Op
->getOperand(0), Op
->getOperand(1),
4004 MDString::get(M
.getContext(), NewValue
)};
4005 ModFlags
->setOperand(I
, MDNode::get(M
.getContext(), Ops
));
4012 // "Objective-C Class Properties" is recently added for Objective-C. We
4013 // upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module
4014 // flag of value 0, so we can correclty downgrade this flag when trying to
4015 // link an ObjC bitcode without this module flag with an ObjC bitcode with
4016 // this module flag.
4017 if (HasObjCFlag
&& !HasClassProperties
) {
4018 M
.addModuleFlag(llvm::Module::Override
, "Objective-C Class Properties",
4026 void llvm::UpgradeSectionAttributes(Module
&M
) {
4027 auto TrimSpaces
= [](StringRef Section
) -> std::string
{
4028 SmallVector
<StringRef
, 5> Components
;
4029 Section
.split(Components
, ',');
4031 SmallString
<32> Buffer
;
4032 raw_svector_ostream
OS(Buffer
);
4034 for (auto Component
: Components
)
4035 OS
<< ',' << Component
.trim();
4037 return OS
.str().substr(1);
4040 for (auto &GV
: M
.globals()) {
4041 if (!GV
.hasSection())
4044 StringRef Section
= GV
.getSection();
4046 if (!Section
.startswith("__DATA, __objc_catlist"))
4049 // __DATA, __objc_catlist, regular, no_dead_strip
4050 // __DATA,__objc_catlist,regular,no_dead_strip
4051 GV
.setSection(TrimSpaces(Section
));
4055 static bool isOldLoopArgument(Metadata
*MD
) {
4056 auto *T
= dyn_cast_or_null
<MDTuple
>(MD
);
4059 if (T
->getNumOperands() < 1)
4061 auto *S
= dyn_cast_or_null
<MDString
>(T
->getOperand(0));
4064 return S
->getString().startswith("llvm.vectorizer.");
4067 static MDString
*upgradeLoopTag(LLVMContext
&C
, StringRef OldTag
) {
4068 StringRef OldPrefix
= "llvm.vectorizer.";
4069 assert(OldTag
.startswith(OldPrefix
) && "Expected old prefix");
4071 if (OldTag
== "llvm.vectorizer.unroll")
4072 return MDString::get(C
, "llvm.loop.interleave.count");
4074 return MDString::get(
4075 C
, (Twine("llvm.loop.vectorize.") + OldTag
.drop_front(OldPrefix
.size()))
4079 static Metadata
*upgradeLoopArgument(Metadata
*MD
) {
4080 auto *T
= dyn_cast_or_null
<MDTuple
>(MD
);
4083 if (T
->getNumOperands() < 1)
4085 auto *OldTag
= dyn_cast_or_null
<MDString
>(T
->getOperand(0));
4088 if (!OldTag
->getString().startswith("llvm.vectorizer."))
4091 // This has an old tag. Upgrade it.
4092 SmallVector
<Metadata
*, 8> Ops
;
4093 Ops
.reserve(T
->getNumOperands());
4094 Ops
.push_back(upgradeLoopTag(T
->getContext(), OldTag
->getString()));
4095 for (unsigned I
= 1, E
= T
->getNumOperands(); I
!= E
; ++I
)
4096 Ops
.push_back(T
->getOperand(I
));
4098 return MDTuple::get(T
->getContext(), Ops
);
4101 MDNode
*llvm::upgradeInstructionLoopAttachment(MDNode
&N
) {
4102 auto *T
= dyn_cast
<MDTuple
>(&N
);
4106 if (none_of(T
->operands(), isOldLoopArgument
))
4109 SmallVector
<Metadata
*, 8> Ops
;
4110 Ops
.reserve(T
->getNumOperands());
4111 for (Metadata
*MD
: T
->operands())
4112 Ops
.push_back(upgradeLoopArgument(MD
));
4114 return MDTuple::get(T
->getContext(), Ops
);