1 //=- WebAssemblyISelLowering.cpp - WebAssembly DAG Lowering Implementation -==//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 /// This file implements the WebAssemblyTargetLowering class.
12 //===----------------------------------------------------------------------===//
14 #include "WebAssemblyISelLowering.h"
15 #include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
16 #include "Utils/WebAssemblyTypeUtilities.h"
17 #include "Utils/WebAssemblyUtilities.h"
18 #include "WebAssemblyMachineFunctionInfo.h"
19 #include "WebAssemblySubtarget.h"
20 #include "WebAssemblyTargetMachine.h"
21 #include "llvm/CodeGen/CallingConvLower.h"
22 #include "llvm/CodeGen/MachineInstrBuilder.h"
23 #include "llvm/CodeGen/MachineJumpTableInfo.h"
24 #include "llvm/CodeGen/MachineModuleInfo.h"
25 #include "llvm/CodeGen/MachineRegisterInfo.h"
26 #include "llvm/CodeGen/SelectionDAG.h"
27 #include "llvm/CodeGen/SelectionDAGNodes.h"
28 #include "llvm/IR/DiagnosticInfo.h"
29 #include "llvm/IR/DiagnosticPrinter.h"
30 #include "llvm/IR/Function.h"
31 #include "llvm/IR/Intrinsics.h"
32 #include "llvm/IR/IntrinsicsWebAssembly.h"
33 #include "llvm/Support/Debug.h"
34 #include "llvm/Support/ErrorHandling.h"
35 #include "llvm/Support/KnownBits.h"
36 #include "llvm/Support/MathExtras.h"
37 #include "llvm/Support/raw_ostream.h"
38 #include "llvm/Target/TargetOptions.h"
41 #define DEBUG_TYPE "wasm-lower"
43 WebAssemblyTargetLowering::WebAssemblyTargetLowering(
44 const TargetMachine
&TM
, const WebAssemblySubtarget
&STI
)
45 : TargetLowering(TM
), Subtarget(&STI
) {
46 auto MVTPtr
= Subtarget
->hasAddr64() ? MVT::i64
: MVT::i32
;
48 // Booleans always contain 0 or 1.
49 setBooleanContents(ZeroOrOneBooleanContent
);
50 // Except in SIMD vectors
51 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent
);
52 // We don't know the microarchitecture here, so just reduce register pressure.
53 setSchedulingPreference(Sched::RegPressure
);
54 // Tell ISel that we have a stack pointer.
55 setStackPointerRegisterToSaveRestore(
56 Subtarget
->hasAddr64() ? WebAssembly::SP64
: WebAssembly::SP32
);
57 // Set up the register classes.
58 addRegisterClass(MVT::i32
, &WebAssembly::I32RegClass
);
59 addRegisterClass(MVT::i64
, &WebAssembly::I64RegClass
);
60 addRegisterClass(MVT::f32
, &WebAssembly::F32RegClass
);
61 addRegisterClass(MVT::f64
, &WebAssembly::F64RegClass
);
62 if (Subtarget
->hasSIMD128()) {
63 addRegisterClass(MVT::v16i8
, &WebAssembly::V128RegClass
);
64 addRegisterClass(MVT::v8i16
, &WebAssembly::V128RegClass
);
65 addRegisterClass(MVT::v4i32
, &WebAssembly::V128RegClass
);
66 addRegisterClass(MVT::v4f32
, &WebAssembly::V128RegClass
);
67 addRegisterClass(MVT::v2i64
, &WebAssembly::V128RegClass
);
68 addRegisterClass(MVT::v2f64
, &WebAssembly::V128RegClass
);
70 if (Subtarget
->hasReferenceTypes()) {
71 addRegisterClass(MVT::externref
, &WebAssembly::EXTERNREFRegClass
);
72 addRegisterClass(MVT::funcref
, &WebAssembly::FUNCREFRegClass
);
74 // Compute derived properties from the register classes.
75 computeRegisterProperties(Subtarget
->getRegisterInfo());
77 // Transform loads and stores to pointers in address space 1 to loads and
78 // stores to WebAssembly global variables, outside linear memory.
79 for (auto T
: {MVT::i32
, MVT::i64
, MVT::f32
, MVT::f64
}) {
80 setOperationAction(ISD::LOAD
, T
, Custom
);
81 setOperationAction(ISD::STORE
, T
, Custom
);
83 if (Subtarget
->hasSIMD128()) {
84 for (auto T
: {MVT::v16i8
, MVT::v8i16
, MVT::v4i32
, MVT::v4f32
, MVT::v2i64
,
86 setOperationAction(ISD::LOAD
, T
, Custom
);
87 setOperationAction(ISD::STORE
, T
, Custom
);
90 if (Subtarget
->hasReferenceTypes()) {
91 for (auto T
: {MVT::externref
, MVT::funcref
}) {
92 setOperationAction(ISD::LOAD
, T
, Custom
);
93 setOperationAction(ISD::STORE
, T
, Custom
);
97 setOperationAction(ISD::GlobalAddress
, MVTPtr
, Custom
);
98 setOperationAction(ISD::GlobalTLSAddress
, MVTPtr
, Custom
);
99 setOperationAction(ISD::ExternalSymbol
, MVTPtr
, Custom
);
100 setOperationAction(ISD::JumpTable
, MVTPtr
, Custom
);
101 setOperationAction(ISD::BlockAddress
, MVTPtr
, Custom
);
102 setOperationAction(ISD::BRIND
, MVT::Other
, Custom
);
104 // Take the default expansion for va_arg, va_copy, and va_end. There is no
105 // default action for va_start, so we do that custom.
106 setOperationAction(ISD::VASTART
, MVT::Other
, Custom
);
107 setOperationAction(ISD::VAARG
, MVT::Other
, Expand
);
108 setOperationAction(ISD::VACOPY
, MVT::Other
, Expand
);
109 setOperationAction(ISD::VAEND
, MVT::Other
, Expand
);
111 for (auto T
: {MVT::f32
, MVT::f64
, MVT::v4f32
, MVT::v2f64
}) {
112 // Don't expand the floating-point types to constant pools.
113 setOperationAction(ISD::ConstantFP
, T
, Legal
);
114 // Expand floating-point comparisons.
115 for (auto CC
: {ISD::SETO
, ISD::SETUO
, ISD::SETUEQ
, ISD::SETONE
,
116 ISD::SETULT
, ISD::SETULE
, ISD::SETUGT
, ISD::SETUGE
})
117 setCondCodeAction(CC
, T
, Expand
);
118 // Expand floating-point library function operators.
120 {ISD::FSIN
, ISD::FCOS
, ISD::FSINCOS
, ISD::FPOW
, ISD::FREM
, ISD::FMA
})
121 setOperationAction(Op
, T
, Expand
);
122 // Note supported floating-point library function operators that otherwise
123 // default to expand.
125 {ISD::FCEIL
, ISD::FFLOOR
, ISD::FTRUNC
, ISD::FNEARBYINT
, ISD::FRINT
})
126 setOperationAction(Op
, T
, Legal
);
127 // Support minimum and maximum, which otherwise default to expand.
128 setOperationAction(ISD::FMINIMUM
, T
, Legal
);
129 setOperationAction(ISD::FMAXIMUM
, T
, Legal
);
130 // WebAssembly currently has no builtin f16 support.
131 setOperationAction(ISD::FP16_TO_FP
, T
, Expand
);
132 setOperationAction(ISD::FP_TO_FP16
, T
, Expand
);
133 setLoadExtAction(ISD::EXTLOAD
, T
, MVT::f16
, Expand
);
134 setTruncStoreAction(T
, MVT::f16
, Expand
);
137 // Expand unavailable integer operations.
139 {ISD::BSWAP
, ISD::SMUL_LOHI
, ISD::UMUL_LOHI
, ISD::MULHS
, ISD::MULHU
,
140 ISD::SDIVREM
, ISD::UDIVREM
, ISD::SHL_PARTS
, ISD::SRA_PARTS
,
141 ISD::SRL_PARTS
, ISD::ADDC
, ISD::ADDE
, ISD::SUBC
, ISD::SUBE
}) {
142 for (auto T
: {MVT::i32
, MVT::i64
})
143 setOperationAction(Op
, T
, Expand
);
144 if (Subtarget
->hasSIMD128())
145 for (auto T
: {MVT::v16i8
, MVT::v8i16
, MVT::v4i32
, MVT::v2i64
})
146 setOperationAction(Op
, T
, Expand
);
149 if (Subtarget
->hasNontrappingFPToInt())
150 for (auto Op
: {ISD::FP_TO_SINT_SAT
, ISD::FP_TO_UINT_SAT
})
151 for (auto T
: {MVT::i32
, MVT::i64
})
152 setOperationAction(Op
, T
, Custom
);
154 // SIMD-specific configuration
155 if (Subtarget
->hasSIMD128()) {
156 // Hoist bitcasts out of shuffles
157 setTargetDAGCombine(ISD::VECTOR_SHUFFLE
);
159 // Combine extends of extract_subvectors into widening ops
160 setTargetDAGCombine(ISD::SIGN_EXTEND
);
161 setTargetDAGCombine(ISD::ZERO_EXTEND
);
163 // Combine int_to_fp or fp_extend of extract_vectors and vice versa into
165 setTargetDAGCombine(ISD::SINT_TO_FP
);
166 setTargetDAGCombine(ISD::UINT_TO_FP
);
167 setTargetDAGCombine(ISD::FP_EXTEND
);
168 setTargetDAGCombine(ISD::EXTRACT_SUBVECTOR
);
170 // Combine fp_to_{s,u}int_sat or fp_round of concat_vectors or vice versa
171 // into conversion ops
172 setTargetDAGCombine(ISD::FP_TO_SINT_SAT
);
173 setTargetDAGCombine(ISD::FP_TO_UINT_SAT
);
174 setTargetDAGCombine(ISD::FP_ROUND
);
175 setTargetDAGCombine(ISD::CONCAT_VECTORS
);
177 // Support saturating add for i8x16 and i16x8
178 for (auto Op
: {ISD::SADDSAT
, ISD::UADDSAT
})
179 for (auto T
: {MVT::v16i8
, MVT::v8i16
})
180 setOperationAction(Op
, T
, Legal
);
182 // Support integer abs
183 for (auto T
: {MVT::v16i8
, MVT::v8i16
, MVT::v4i32
, MVT::v2i64
})
184 setOperationAction(ISD::ABS
, T
, Legal
);
186 // Custom lower BUILD_VECTORs to minimize number of replace_lanes
187 for (auto T
: {MVT::v16i8
, MVT::v8i16
, MVT::v4i32
, MVT::v4f32
, MVT::v2i64
,
189 setOperationAction(ISD::BUILD_VECTOR
, T
, Custom
);
191 // We have custom shuffle lowering to expose the shuffle mask
192 for (auto T
: {MVT::v16i8
, MVT::v8i16
, MVT::v4i32
, MVT::v4f32
, MVT::v2i64
,
194 setOperationAction(ISD::VECTOR_SHUFFLE
, T
, Custom
);
196 // Custom lowering since wasm shifts must have a scalar shift amount
197 for (auto Op
: {ISD::SHL
, ISD::SRA
, ISD::SRL
})
198 for (auto T
: {MVT::v16i8
, MVT::v8i16
, MVT::v4i32
, MVT::v2i64
})
199 setOperationAction(Op
, T
, Custom
);
201 // Custom lower lane accesses to expand out variable indices
202 for (auto Op
: {ISD::EXTRACT_VECTOR_ELT
, ISD::INSERT_VECTOR_ELT
})
203 for (auto T
: {MVT::v16i8
, MVT::v8i16
, MVT::v4i32
, MVT::v4f32
, MVT::v2i64
,
205 setOperationAction(Op
, T
, Custom
);
207 // There is no i8x16.mul instruction
208 setOperationAction(ISD::MUL
, MVT::v16i8
, Expand
);
210 // There is no vector conditional select instruction
211 for (auto T
: {MVT::v16i8
, MVT::v8i16
, MVT::v4i32
, MVT::v4f32
, MVT::v2i64
,
213 setOperationAction(ISD::SELECT_CC
, T
, Expand
);
215 // Expand integer operations supported for scalars but not SIMD
216 for (auto Op
: {ISD::CTLZ
, ISD::CTTZ
, ISD::CTPOP
, ISD::SDIV
, ISD::UDIV
,
217 ISD::SREM
, ISD::UREM
, ISD::ROTL
, ISD::ROTR
})
218 for (auto T
: {MVT::v16i8
, MVT::v8i16
, MVT::v4i32
, MVT::v2i64
})
219 setOperationAction(Op
, T
, Expand
);
221 // But we do have integer min and max operations
222 for (auto Op
: {ISD::SMIN
, ISD::SMAX
, ISD::UMIN
, ISD::UMAX
})
223 for (auto T
: {MVT::v16i8
, MVT::v8i16
, MVT::v4i32
})
224 setOperationAction(Op
, T
, Legal
);
226 // And we have popcnt for i8x16
227 setOperationAction(ISD::CTPOP
, MVT::v16i8
, Legal
);
229 // Expand float operations supported for scalars but not SIMD
230 for (auto Op
: {ISD::FCOPYSIGN
, ISD::FLOG
, ISD::FLOG2
, ISD::FLOG10
,
231 ISD::FEXP
, ISD::FEXP2
, ISD::FRINT
})
232 for (auto T
: {MVT::v4f32
, MVT::v2f64
})
233 setOperationAction(Op
, T
, Expand
);
235 // Unsigned comparison operations are unavailable for i64x2 vectors.
236 for (auto CC
: {ISD::SETUGT
, ISD::SETUGE
, ISD::SETULT
, ISD::SETULE
})
237 setCondCodeAction(CC
, MVT::v2i64
, Custom
);
239 // 64x2 conversions are not in the spec
241 {ISD::SINT_TO_FP
, ISD::UINT_TO_FP
, ISD::FP_TO_SINT
, ISD::FP_TO_UINT
})
242 for (auto T
: {MVT::v2i64
, MVT::v2f64
})
243 setOperationAction(Op
, T
, Expand
);
245 // But saturating fp_to_int converstions are
246 for (auto Op
: {ISD::FP_TO_SINT_SAT
, ISD::FP_TO_UINT_SAT
})
247 setOperationAction(Op
, MVT::v4i32
, Custom
);
250 // As a special case, these operators use the type to mean the type to
252 setOperationAction(ISD::SIGN_EXTEND_INREG
, MVT::i1
, Expand
);
253 if (!Subtarget
->hasSignExt()) {
254 // Sign extends are legal only when extending a vector extract
255 auto Action
= Subtarget
->hasSIMD128() ? Custom
: Expand
;
256 for (auto T
: {MVT::i8
, MVT::i16
, MVT::i32
})
257 setOperationAction(ISD::SIGN_EXTEND_INREG
, T
, Action
);
259 for (auto T
: MVT::integer_fixedlen_vector_valuetypes())
260 setOperationAction(ISD::SIGN_EXTEND_INREG
, T
, Expand
);
262 // Dynamic stack allocation: use the default expansion.
263 setOperationAction(ISD::STACKSAVE
, MVT::Other
, Expand
);
264 setOperationAction(ISD::STACKRESTORE
, MVT::Other
, Expand
);
265 setOperationAction(ISD::DYNAMIC_STACKALLOC
, MVTPtr
, Expand
);
267 setOperationAction(ISD::FrameIndex
, MVT::i32
, Custom
);
268 setOperationAction(ISD::FrameIndex
, MVT::i64
, Custom
);
269 setOperationAction(ISD::CopyToReg
, MVT::Other
, Custom
);
271 // Expand these forms; we pattern-match the forms that we can handle in isel.
272 for (auto T
: {MVT::i32
, MVT::i64
, MVT::f32
, MVT::f64
})
273 for (auto Op
: {ISD::BR_CC
, ISD::SELECT_CC
})
274 setOperationAction(Op
, T
, Expand
);
276 // We have custom switch handling.
277 setOperationAction(ISD::BR_JT
, MVT::Other
, Custom
);
279 // WebAssembly doesn't have:
280 // - Floating-point extending loads.
281 // - Floating-point truncating stores.
282 // - i1 extending loads.
283 // - truncating SIMD stores and most extending loads
284 setLoadExtAction(ISD::EXTLOAD
, MVT::f64
, MVT::f32
, Expand
);
285 setTruncStoreAction(MVT::f64
, MVT::f32
, Expand
);
286 for (auto T
: MVT::integer_valuetypes())
287 for (auto Ext
: {ISD::EXTLOAD
, ISD::ZEXTLOAD
, ISD::SEXTLOAD
})
288 setLoadExtAction(Ext
, T
, MVT::i1
, Promote
);
289 if (Subtarget
->hasSIMD128()) {
290 for (auto T
: {MVT::v16i8
, MVT::v8i16
, MVT::v4i32
, MVT::v2i64
, MVT::v4f32
,
292 for (auto MemT
: MVT::fixedlen_vector_valuetypes()) {
293 if (MVT(T
) != MemT
) {
294 setTruncStoreAction(T
, MemT
, Expand
);
295 for (auto Ext
: {ISD::EXTLOAD
, ISD::ZEXTLOAD
, ISD::SEXTLOAD
})
296 setLoadExtAction(Ext
, T
, MemT
, Expand
);
300 // But some vector extending loads are legal
301 for (auto Ext
: {ISD::EXTLOAD
, ISD::SEXTLOAD
, ISD::ZEXTLOAD
}) {
302 setLoadExtAction(Ext
, MVT::v8i16
, MVT::v8i8
, Legal
);
303 setLoadExtAction(Ext
, MVT::v4i32
, MVT::v4i16
, Legal
);
304 setLoadExtAction(Ext
, MVT::v2i64
, MVT::v2i32
, Legal
);
308 // Don't do anything clever with build_pairs
309 setOperationAction(ISD::BUILD_PAIR
, MVT::i64
, Expand
);
311 // Trap lowers to wasm unreachable
312 setOperationAction(ISD::TRAP
, MVT::Other
, Legal
);
313 setOperationAction(ISD::DEBUGTRAP
, MVT::Other
, Legal
);
315 // Exception handling intrinsics
316 setOperationAction(ISD::INTRINSIC_WO_CHAIN
, MVT::Other
, Custom
);
317 setOperationAction(ISD::INTRINSIC_W_CHAIN
, MVT::Other
, Custom
);
318 setOperationAction(ISD::INTRINSIC_VOID
, MVT::Other
, Custom
);
320 setMaxAtomicSizeInBitsSupported(64);
322 // Override the __gnu_f2h_ieee/__gnu_h2f_ieee names so that the f32 name is
323 // consistent with the f64 and f128 names.
324 setLibcallName(RTLIB::FPEXT_F16_F32
, "__extendhfsf2");
325 setLibcallName(RTLIB::FPROUND_F32_F16
, "__truncsfhf2");
327 // Define the emscripten name for return address helper.
328 // TODO: when implementing other Wasm backends, make this generic or only do
329 // this on emscripten depending on what they end up doing.
330 setLibcallName(RTLIB::RETURN_ADDRESS
, "emscripten_return_address");
332 // Always convert switches to br_tables unless there is only one case, which
333 // is equivalent to a simple branch. This reduces code size for wasm, and we
334 // defer possible jump table optimizations to the VM.
335 setMinimumJumpTableEntries(2);
338 TargetLowering::AtomicExpansionKind
339 WebAssemblyTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst
*AI
) const {
340 // We have wasm instructions for these
341 switch (AI
->getOperation()) {
342 case AtomicRMWInst::Add
:
343 case AtomicRMWInst::Sub
:
344 case AtomicRMWInst::And
:
345 case AtomicRMWInst::Or
:
346 case AtomicRMWInst::Xor
:
347 case AtomicRMWInst::Xchg
:
348 return AtomicExpansionKind::None
;
352 return AtomicExpansionKind::CmpXChg
;
355 bool WebAssemblyTargetLowering::shouldScalarizeBinop(SDValue VecOp
) const {
356 // Implementation copied from X86TargetLowering.
357 unsigned Opc
= VecOp
.getOpcode();
359 // Assume target opcodes can't be scalarized.
360 // TODO - do we have any exceptions?
361 if (Opc
>= ISD::BUILTIN_OP_END
)
364 // If the vector op is not supported, try to convert to scalar.
365 EVT VecVT
= VecOp
.getValueType();
366 if (!isOperationLegalOrCustomOrPromote(Opc
, VecVT
))
369 // If the vector op is supported, but the scalar op is not, the transform may
370 // not be worthwhile.
371 EVT ScalarVT
= VecVT
.getScalarType();
372 return isOperationLegalOrCustomOrPromote(Opc
, ScalarVT
);
375 FastISel
*WebAssemblyTargetLowering::createFastISel(
376 FunctionLoweringInfo
&FuncInfo
, const TargetLibraryInfo
*LibInfo
) const {
377 return WebAssembly::createFastISel(FuncInfo
, LibInfo
);
380 MVT
WebAssemblyTargetLowering::getScalarShiftAmountTy(const DataLayout
& /*DL*/,
382 unsigned BitWidth
= NextPowerOf2(VT
.getSizeInBits() - 1);
383 if (BitWidth
> 1 && BitWidth
< 8)
387 // The shift will be lowered to a libcall, and compiler-rt libcalls expect
388 // the count to be an i32.
390 assert(BitWidth
>= Log2_32_Ceil(VT
.getSizeInBits()) &&
391 "32-bit shift counts ought to be enough for anyone");
394 MVT Result
= MVT::getIntegerVT(BitWidth
);
395 assert(Result
!= MVT::INVALID_SIMPLE_VALUE_TYPE
&&
396 "Unable to represent scalar shift amount type");
400 // Lower an fp-to-int conversion operator from the LLVM opcode, which has an
401 // undefined result on invalid/overflow, to the WebAssembly opcode, which
402 // traps on invalid/overflow.
403 static MachineBasicBlock
*LowerFPToInt(MachineInstr
&MI
, DebugLoc DL
,
404 MachineBasicBlock
*BB
,
405 const TargetInstrInfo
&TII
,
406 bool IsUnsigned
, bool Int64
,
407 bool Float64
, unsigned LoweredOpcode
) {
408 MachineRegisterInfo
&MRI
= BB
->getParent()->getRegInfo();
410 Register OutReg
= MI
.getOperand(0).getReg();
411 Register InReg
= MI
.getOperand(1).getReg();
413 unsigned Abs
= Float64
? WebAssembly::ABS_F64
: WebAssembly::ABS_F32
;
414 unsigned FConst
= Float64
? WebAssembly::CONST_F64
: WebAssembly::CONST_F32
;
415 unsigned LT
= Float64
? WebAssembly::LT_F64
: WebAssembly::LT_F32
;
416 unsigned GE
= Float64
? WebAssembly::GE_F64
: WebAssembly::GE_F32
;
417 unsigned IConst
= Int64
? WebAssembly::CONST_I64
: WebAssembly::CONST_I32
;
418 unsigned Eqz
= WebAssembly::EQZ_I32
;
419 unsigned And
= WebAssembly::AND_I32
;
420 int64_t Limit
= Int64
? INT64_MIN
: INT32_MIN
;
421 int64_t Substitute
= IsUnsigned
? 0 : Limit
;
422 double CmpVal
= IsUnsigned
? -(double)Limit
* 2.0 : -(double)Limit
;
423 auto &Context
= BB
->getParent()->getFunction().getContext();
424 Type
*Ty
= Float64
? Type::getDoubleTy(Context
) : Type::getFloatTy(Context
);
426 const BasicBlock
*LLVMBB
= BB
->getBasicBlock();
427 MachineFunction
*F
= BB
->getParent();
428 MachineBasicBlock
*TrueMBB
= F
->CreateMachineBasicBlock(LLVMBB
);
429 MachineBasicBlock
*FalseMBB
= F
->CreateMachineBasicBlock(LLVMBB
);
430 MachineBasicBlock
*DoneMBB
= F
->CreateMachineBasicBlock(LLVMBB
);
432 MachineFunction::iterator It
= ++BB
->getIterator();
433 F
->insert(It
, FalseMBB
);
434 F
->insert(It
, TrueMBB
);
435 F
->insert(It
, DoneMBB
);
437 // Transfer the remainder of BB and its successor edges to DoneMBB.
438 DoneMBB
->splice(DoneMBB
->begin(), BB
, std::next(MI
.getIterator()), BB
->end());
439 DoneMBB
->transferSuccessorsAndUpdatePHIs(BB
);
441 BB
->addSuccessor(TrueMBB
);
442 BB
->addSuccessor(FalseMBB
);
443 TrueMBB
->addSuccessor(DoneMBB
);
444 FalseMBB
->addSuccessor(DoneMBB
);
446 unsigned Tmp0
, Tmp1
, CmpReg
, EqzReg
, FalseReg
, TrueReg
;
447 Tmp0
= MRI
.createVirtualRegister(MRI
.getRegClass(InReg
));
448 Tmp1
= MRI
.createVirtualRegister(MRI
.getRegClass(InReg
));
449 CmpReg
= MRI
.createVirtualRegister(&WebAssembly::I32RegClass
);
450 EqzReg
= MRI
.createVirtualRegister(&WebAssembly::I32RegClass
);
451 FalseReg
= MRI
.createVirtualRegister(MRI
.getRegClass(OutReg
));
452 TrueReg
= MRI
.createVirtualRegister(MRI
.getRegClass(OutReg
));
454 MI
.eraseFromParent();
455 // For signed numbers, we can do a single comparison to determine whether
456 // fabs(x) is within range.
460 BuildMI(BB
, DL
, TII
.get(Abs
), Tmp0
).addReg(InReg
);
462 BuildMI(BB
, DL
, TII
.get(FConst
), Tmp1
)
463 .addFPImm(cast
<ConstantFP
>(ConstantFP::get(Ty
, CmpVal
)));
464 BuildMI(BB
, DL
, TII
.get(LT
), CmpReg
).addReg(Tmp0
).addReg(Tmp1
);
466 // For unsigned numbers, we have to do a separate comparison with zero.
468 Tmp1
= MRI
.createVirtualRegister(MRI
.getRegClass(InReg
));
469 Register SecondCmpReg
=
470 MRI
.createVirtualRegister(&WebAssembly::I32RegClass
);
471 Register AndReg
= MRI
.createVirtualRegister(&WebAssembly::I32RegClass
);
472 BuildMI(BB
, DL
, TII
.get(FConst
), Tmp1
)
473 .addFPImm(cast
<ConstantFP
>(ConstantFP::get(Ty
, 0.0)));
474 BuildMI(BB
, DL
, TII
.get(GE
), SecondCmpReg
).addReg(Tmp0
).addReg(Tmp1
);
475 BuildMI(BB
, DL
, TII
.get(And
), AndReg
).addReg(CmpReg
).addReg(SecondCmpReg
);
479 BuildMI(BB
, DL
, TII
.get(Eqz
), EqzReg
).addReg(CmpReg
);
481 // Create the CFG diamond to select between doing the conversion or using
482 // the substitute value.
483 BuildMI(BB
, DL
, TII
.get(WebAssembly::BR_IF
)).addMBB(TrueMBB
).addReg(EqzReg
);
484 BuildMI(FalseMBB
, DL
, TII
.get(LoweredOpcode
), FalseReg
).addReg(InReg
);
485 BuildMI(FalseMBB
, DL
, TII
.get(WebAssembly::BR
)).addMBB(DoneMBB
);
486 BuildMI(TrueMBB
, DL
, TII
.get(IConst
), TrueReg
).addImm(Substitute
);
487 BuildMI(*DoneMBB
, DoneMBB
->begin(), DL
, TII
.get(TargetOpcode::PHI
), OutReg
)
496 static MachineBasicBlock
*
497 LowerCallResults(MachineInstr
&CallResults
, DebugLoc DL
, MachineBasicBlock
*BB
,
498 const WebAssemblySubtarget
*Subtarget
,
499 const TargetInstrInfo
&TII
) {
500 MachineInstr
&CallParams
= *CallResults
.getPrevNode();
501 assert(CallParams
.getOpcode() == WebAssembly::CALL_PARAMS
);
502 assert(CallResults
.getOpcode() == WebAssembly::CALL_RESULTS
||
503 CallResults
.getOpcode() == WebAssembly::RET_CALL_RESULTS
);
505 bool IsIndirect
= CallParams
.getOperand(0).isReg();
506 bool IsRetCall
= CallResults
.getOpcode() == WebAssembly::RET_CALL_RESULTS
;
508 bool IsFuncrefCall
= false;
510 Register Reg
= CallParams
.getOperand(0).getReg();
511 const MachineFunction
*MF
= BB
->getParent();
512 const MachineRegisterInfo
&MRI
= MF
->getRegInfo();
513 const TargetRegisterClass
*TRC
= MRI
.getRegClass(Reg
);
514 IsFuncrefCall
= (TRC
== &WebAssembly::FUNCREFRegClass
);
515 assert(!IsFuncrefCall
|| Subtarget
->hasReferenceTypes());
519 if (IsIndirect
&& IsRetCall
) {
520 CallOp
= WebAssembly::RET_CALL_INDIRECT
;
521 } else if (IsIndirect
) {
522 CallOp
= WebAssembly::CALL_INDIRECT
;
523 } else if (IsRetCall
) {
524 CallOp
= WebAssembly::RET_CALL
;
526 CallOp
= WebAssembly::CALL
;
529 MachineFunction
&MF
= *BB
->getParent();
530 const MCInstrDesc
&MCID
= TII
.get(CallOp
);
531 MachineInstrBuilder
MIB(MF
, MF
.CreateMachineInstr(MCID
, DL
));
533 // See if we must truncate the function pointer.
534 // CALL_INDIRECT takes an i32, but in wasm64 we represent function pointers
535 // as 64-bit for uniformity with other pointer types.
536 // See also: WebAssemblyFastISel::selectCall
537 if (IsIndirect
&& MF
.getSubtarget
<WebAssemblySubtarget
>().hasAddr64()) {
539 MF
.getRegInfo().createVirtualRegister(&WebAssembly::I32RegClass
);
540 auto &FnPtr
= CallParams
.getOperand(0);
541 BuildMI(*BB
, CallResults
.getIterator(), DL
,
542 TII
.get(WebAssembly::I32_WRAP_I64
), Reg32
)
543 .addReg(FnPtr
.getReg());
547 // Move the function pointer to the end of the arguments for indirect calls
549 auto FnPtr
= CallParams
.getOperand(0);
550 CallParams
.RemoveOperand(0);
551 CallParams
.addOperand(FnPtr
);
554 for (auto Def
: CallResults
.defs())
558 // Placeholder for the type index.
560 // The table into which this call_indirect indexes.
561 MCSymbolWasm
*Table
= IsFuncrefCall
562 ? WebAssembly::getOrCreateFuncrefCallTableSymbol(
563 MF
.getContext(), Subtarget
)
564 : WebAssembly::getOrCreateFunctionTableSymbol(
565 MF
.getContext(), Subtarget
);
566 if (Subtarget
->hasReferenceTypes()) {
569 // For the MVP there is at most one table whose number is 0, but we can't
570 // write a table symbol or issue relocations. Instead we just ensure the
571 // table is live and write a zero.
577 for (auto Use
: CallParams
.uses())
580 BB
->insert(CallResults
.getIterator(), MIB
);
581 CallParams
.eraseFromParent();
582 CallResults
.eraseFromParent();
584 // If this is a funcref call, to avoid hidden GC roots, we need to clear the
585 // table slot with ref.null upon call_indirect return.
587 // This generates the following code, which comes right after a call_indirect
592 // table.set __funcref_call_table
593 if (IsIndirect
&& IsFuncrefCall
) {
594 MCSymbolWasm
*Table
= WebAssembly::getOrCreateFuncrefCallTableSymbol(
595 MF
.getContext(), Subtarget
);
597 MF
.getRegInfo().createVirtualRegister(&WebAssembly::I32RegClass
);
598 MachineInstr
*Const0
=
599 BuildMI(MF
, DL
, TII
.get(WebAssembly::CONST_I32
), RegZero
).addImm(0);
600 BB
->insertAfter(MIB
.getInstr()->getIterator(), Const0
);
602 Register RegFuncref
=
603 MF
.getRegInfo().createVirtualRegister(&WebAssembly::FUNCREFRegClass
);
604 MachineInstr
*RefNull
=
605 BuildMI(MF
, DL
, TII
.get(WebAssembly::REF_NULL_FUNCREF
), RegFuncref
)
606 .addImm(static_cast<int32_t>(WebAssembly::HeapType::Funcref
));
607 BB
->insertAfter(Const0
->getIterator(), RefNull
);
609 MachineInstr
*TableSet
=
610 BuildMI(MF
, DL
, TII
.get(WebAssembly::TABLE_SET_FUNCREF
))
614 BB
->insertAfter(RefNull
->getIterator(), TableSet
);
620 MachineBasicBlock
*WebAssemblyTargetLowering::EmitInstrWithCustomInserter(
621 MachineInstr
&MI
, MachineBasicBlock
*BB
) const {
622 const TargetInstrInfo
&TII
= *Subtarget
->getInstrInfo();
623 DebugLoc DL
= MI
.getDebugLoc();
625 switch (MI
.getOpcode()) {
627 llvm_unreachable("Unexpected instr type to insert");
628 case WebAssembly::FP_TO_SINT_I32_F32
:
629 return LowerFPToInt(MI
, DL
, BB
, TII
, false, false, false,
630 WebAssembly::I32_TRUNC_S_F32
);
631 case WebAssembly::FP_TO_UINT_I32_F32
:
632 return LowerFPToInt(MI
, DL
, BB
, TII
, true, false, false,
633 WebAssembly::I32_TRUNC_U_F32
);
634 case WebAssembly::FP_TO_SINT_I64_F32
:
635 return LowerFPToInt(MI
, DL
, BB
, TII
, false, true, false,
636 WebAssembly::I64_TRUNC_S_F32
);
637 case WebAssembly::FP_TO_UINT_I64_F32
:
638 return LowerFPToInt(MI
, DL
, BB
, TII
, true, true, false,
639 WebAssembly::I64_TRUNC_U_F32
);
640 case WebAssembly::FP_TO_SINT_I32_F64
:
641 return LowerFPToInt(MI
, DL
, BB
, TII
, false, false, true,
642 WebAssembly::I32_TRUNC_S_F64
);
643 case WebAssembly::FP_TO_UINT_I32_F64
:
644 return LowerFPToInt(MI
, DL
, BB
, TII
, true, false, true,
645 WebAssembly::I32_TRUNC_U_F64
);
646 case WebAssembly::FP_TO_SINT_I64_F64
:
647 return LowerFPToInt(MI
, DL
, BB
, TII
, false, true, true,
648 WebAssembly::I64_TRUNC_S_F64
);
649 case WebAssembly::FP_TO_UINT_I64_F64
:
650 return LowerFPToInt(MI
, DL
, BB
, TII
, true, true, true,
651 WebAssembly::I64_TRUNC_U_F64
);
652 case WebAssembly::CALL_RESULTS
:
653 case WebAssembly::RET_CALL_RESULTS
:
654 return LowerCallResults(MI
, DL
, BB
, Subtarget
, TII
);
659 WebAssemblyTargetLowering::getTargetNodeName(unsigned Opcode
) const {
660 switch (static_cast<WebAssemblyISD::NodeType
>(Opcode
)) {
661 case WebAssemblyISD::FIRST_NUMBER
:
662 case WebAssemblyISD::FIRST_MEM_OPCODE
:
664 #define HANDLE_NODETYPE(NODE) \
665 case WebAssemblyISD::NODE: \
666 return "WebAssemblyISD::" #NODE;
667 #define HANDLE_MEM_NODETYPE(NODE) HANDLE_NODETYPE(NODE)
668 #include "WebAssemblyISD.def"
669 #undef HANDLE_MEM_NODETYPE
670 #undef HANDLE_NODETYPE
675 std::pair
<unsigned, const TargetRegisterClass
*>
676 WebAssemblyTargetLowering::getRegForInlineAsmConstraint(
677 const TargetRegisterInfo
*TRI
, StringRef Constraint
, MVT VT
) const {
678 // First, see if this is a constraint that directly corresponds to a
679 // WebAssembly register class.
680 if (Constraint
.size() == 1) {
681 switch (Constraint
[0]) {
683 assert(VT
!= MVT::iPTR
&& "Pointer MVT not expected here");
684 if (Subtarget
->hasSIMD128() && VT
.isVector()) {
685 if (VT
.getSizeInBits() == 128)
686 return std::make_pair(0U, &WebAssembly::V128RegClass
);
688 if (VT
.isInteger() && !VT
.isVector()) {
689 if (VT
.getSizeInBits() <= 32)
690 return std::make_pair(0U, &WebAssembly::I32RegClass
);
691 if (VT
.getSizeInBits() <= 64)
692 return std::make_pair(0U, &WebAssembly::I64RegClass
);
694 if (VT
.isFloatingPoint() && !VT
.isVector()) {
695 switch (VT
.getSizeInBits()) {
697 return std::make_pair(0U, &WebAssembly::F32RegClass
);
699 return std::make_pair(0U, &WebAssembly::F64RegClass
);
710 return TargetLowering::getRegForInlineAsmConstraint(TRI
, Constraint
, VT
);
713 bool WebAssemblyTargetLowering::isCheapToSpeculateCttz() const {
714 // Assume ctz is a relatively cheap operation.
718 bool WebAssemblyTargetLowering::isCheapToSpeculateCtlz() const {
719 // Assume clz is a relatively cheap operation.
723 bool WebAssemblyTargetLowering::isLegalAddressingMode(const DataLayout
&DL
,
725 Type
*Ty
, unsigned AS
,
726 Instruction
*I
) const {
727 // WebAssembly offsets are added as unsigned without wrapping. The
728 // isLegalAddressingMode gives us no way to determine if wrapping could be
729 // happening, so we approximate this by accepting only non-negative offsets.
733 // WebAssembly has no scale register operands.
737 // Everything else is legal.
741 bool WebAssemblyTargetLowering::allowsMisalignedMemoryAccesses(
742 EVT
/*VT*/, unsigned /*AddrSpace*/, Align
/*Align*/,
743 MachineMemOperand::Flags
/*Flags*/, bool *Fast
) const {
744 // WebAssembly supports unaligned accesses, though it should be declared
745 // with the p2align attribute on loads and stores which do so, and there
746 // may be a performance impact. We tell LLVM they're "fast" because
747 // for the kinds of things that LLVM uses this for (merging adjacent stores
748 // of constants, etc.), WebAssembly implementations will either want the
749 // unaligned access or they'll split anyway.
755 bool WebAssemblyTargetLowering::isIntDivCheap(EVT VT
,
756 AttributeList Attr
) const {
757 // The current thinking is that wasm engines will perform this optimization,
758 // so we can save on code size.
762 bool WebAssemblyTargetLowering::isVectorLoadExtDesirable(SDValue ExtVal
) const {
763 EVT ExtT
= ExtVal
.getValueType();
764 EVT MemT
= cast
<LoadSDNode
>(ExtVal
->getOperand(0))->getValueType(0);
765 return (ExtT
== MVT::v8i16
&& MemT
== MVT::v8i8
) ||
766 (ExtT
== MVT::v4i32
&& MemT
== MVT::v4i16
) ||
767 (ExtT
== MVT::v2i64
&& MemT
== MVT::v2i32
);
770 bool WebAssemblyTargetLowering::isOffsetFoldingLegal(
771 const GlobalAddressSDNode
*GA
) const {
772 // Wasm doesn't support function addresses with offsets
773 const GlobalValue
*GV
= GA
->getGlobal();
774 return isa
<Function
>(GV
) ? false : TargetLowering::isOffsetFoldingLegal(GA
);
777 EVT
WebAssemblyTargetLowering::getSetCCResultType(const DataLayout
&DL
,
781 return VT
.changeVectorElementTypeToInteger();
783 // So far, all branch instructions in Wasm take an I32 condition.
784 // The default TargetLowering::getSetCCResultType returns the pointer size,
785 // which would be useful to reduce instruction counts when testing
786 // against 64-bit pointers/values if at some point Wasm supports that.
787 return EVT::getIntegerVT(C
, 32);
790 bool WebAssemblyTargetLowering::getTgtMemIntrinsic(IntrinsicInfo
&Info
,
793 unsigned Intrinsic
) const {
795 case Intrinsic::wasm_memory_atomic_notify
:
796 Info
.opc
= ISD::INTRINSIC_W_CHAIN
;
797 Info
.memVT
= MVT::i32
;
798 Info
.ptrVal
= I
.getArgOperand(0);
800 Info
.align
= Align(4);
801 // atomic.notify instruction does not really load the memory specified with
802 // this argument, but MachineMemOperand should either be load or store, so
803 // we set this to a load.
804 // FIXME Volatile isn't really correct, but currently all LLVM atomic
805 // instructions are treated as volatiles in the backend, so we should be
806 // consistent. The same applies for wasm_atomic_wait intrinsics too.
807 Info
.flags
= MachineMemOperand::MOVolatile
| MachineMemOperand::MOLoad
;
809 case Intrinsic::wasm_memory_atomic_wait32
:
810 Info
.opc
= ISD::INTRINSIC_W_CHAIN
;
811 Info
.memVT
= MVT::i32
;
812 Info
.ptrVal
= I
.getArgOperand(0);
814 Info
.align
= Align(4);
815 Info
.flags
= MachineMemOperand::MOVolatile
| MachineMemOperand::MOLoad
;
817 case Intrinsic::wasm_memory_atomic_wait64
:
818 Info
.opc
= ISD::INTRINSIC_W_CHAIN
;
819 Info
.memVT
= MVT::i64
;
820 Info
.ptrVal
= I
.getArgOperand(0);
822 Info
.align
= Align(8);
823 Info
.flags
= MachineMemOperand::MOVolatile
| MachineMemOperand::MOLoad
;
830 void WebAssemblyTargetLowering::computeKnownBitsForTargetNode(
831 const SDValue Op
, KnownBits
&Known
, const APInt
&DemandedElts
,
832 const SelectionDAG
&DAG
, unsigned Depth
) const {
833 switch (Op
.getOpcode()) {
836 case ISD::INTRINSIC_WO_CHAIN
: {
837 unsigned IntNo
= Op
.getConstantOperandVal(0);
841 case Intrinsic::wasm_bitmask
: {
842 unsigned BitWidth
= Known
.getBitWidth();
843 EVT VT
= Op
.getOperand(1).getSimpleValueType();
844 unsigned PossibleBits
= VT
.getVectorNumElements();
845 APInt ZeroMask
= APInt::getHighBitsSet(BitWidth
, BitWidth
- PossibleBits
);
846 Known
.Zero
|= ZeroMask
;
854 TargetLoweringBase::LegalizeTypeAction
855 WebAssemblyTargetLowering::getPreferredVectorAction(MVT VT
) const {
856 if (VT
.isFixedLengthVector()) {
857 MVT EltVT
= VT
.getVectorElementType();
858 // We have legal vector types with these lane types, so widening the
859 // vector would let us use some of the lanes directly without having to
860 // extend or truncate values.
861 if (EltVT
== MVT::i8
|| EltVT
== MVT::i16
|| EltVT
== MVT::i32
||
862 EltVT
== MVT::i64
|| EltVT
== MVT::f32
|| EltVT
== MVT::f64
)
863 return TypeWidenVector
;
866 return TargetLoweringBase::getPreferredVectorAction(VT
);
869 //===----------------------------------------------------------------------===//
870 // WebAssembly Lowering private implementation.
871 //===----------------------------------------------------------------------===//
873 //===----------------------------------------------------------------------===//
875 //===----------------------------------------------------------------------===//
877 static void fail(const SDLoc
&DL
, SelectionDAG
&DAG
, const char *Msg
) {
878 MachineFunction
&MF
= DAG
.getMachineFunction();
879 DAG
.getContext()->diagnose(
880 DiagnosticInfoUnsupported(MF
.getFunction(), Msg
, DL
.getDebugLoc()));
883 // Test whether the given calling convention is supported.
884 static bool callingConvSupported(CallingConv::ID CallConv
) {
885 // We currently support the language-independent target-independent
886 // conventions. We don't yet have a way to annotate calls with properties like
887 // "cold", and we don't have any call-clobbered registers, so these are mostly
888 // all handled the same.
889 return CallConv
== CallingConv::C
|| CallConv
== CallingConv::Fast
||
890 CallConv
== CallingConv::Cold
||
891 CallConv
== CallingConv::PreserveMost
||
892 CallConv
== CallingConv::PreserveAll
||
893 CallConv
== CallingConv::CXX_FAST_TLS
||
894 CallConv
== CallingConv::WASM_EmscriptenInvoke
||
895 CallConv
== CallingConv::Swift
;
899 WebAssemblyTargetLowering::LowerCall(CallLoweringInfo
&CLI
,
900 SmallVectorImpl
<SDValue
> &InVals
) const {
901 SelectionDAG
&DAG
= CLI
.DAG
;
903 SDValue Chain
= CLI
.Chain
;
904 SDValue Callee
= CLI
.Callee
;
905 MachineFunction
&MF
= DAG
.getMachineFunction();
906 auto Layout
= MF
.getDataLayout();
908 CallingConv::ID CallConv
= CLI
.CallConv
;
909 if (!callingConvSupported(CallConv
))
911 "WebAssembly doesn't support language-specific or target-specific "
912 "calling conventions yet");
913 if (CLI
.IsPatchPoint
)
914 fail(DL
, DAG
, "WebAssembly doesn't support patch point yet");
916 if (CLI
.IsTailCall
) {
917 auto NoTail
= [&](const char *Msg
) {
918 if (CLI
.CB
&& CLI
.CB
->isMustTailCall())
920 CLI
.IsTailCall
= false;
923 if (!Subtarget
->hasTailCall())
924 NoTail("WebAssembly 'tail-call' feature not enabled");
926 // Varargs calls cannot be tail calls because the buffer is on the stack
928 NoTail("WebAssembly does not support varargs tail calls");
930 // Do not tail call unless caller and callee return types match
931 const Function
&F
= MF
.getFunction();
932 const TargetMachine
&TM
= getTargetMachine();
933 Type
*RetTy
= F
.getReturnType();
934 SmallVector
<MVT
, 4> CallerRetTys
;
935 SmallVector
<MVT
, 4> CalleeRetTys
;
936 computeLegalValueVTs(F
, TM
, RetTy
, CallerRetTys
);
937 computeLegalValueVTs(F
, TM
, CLI
.RetTy
, CalleeRetTys
);
938 bool TypesMatch
= CallerRetTys
.size() == CalleeRetTys
.size() &&
939 std::equal(CallerRetTys
.begin(), CallerRetTys
.end(),
940 CalleeRetTys
.begin());
942 NoTail("WebAssembly tail call requires caller and callee return types to "
945 // If pointers to local stack values are passed, we cannot tail call
947 for (auto &Arg
: CLI
.CB
->args()) {
948 Value
*Val
= Arg
.get();
949 // Trace the value back through pointer operations
951 Value
*Src
= Val
->stripPointerCastsAndAliases();
952 if (auto *GEP
= dyn_cast
<GetElementPtrInst
>(Src
))
953 Src
= GEP
->getPointerOperand();
958 if (isa
<AllocaInst
>(Val
)) {
960 "WebAssembly does not support tail calling with stack arguments");
967 SmallVectorImpl
<ISD::InputArg
> &Ins
= CLI
.Ins
;
968 SmallVectorImpl
<ISD::OutputArg
> &Outs
= CLI
.Outs
;
969 SmallVectorImpl
<SDValue
> &OutVals
= CLI
.OutVals
;
971 // The generic code may have added an sret argument. If we're lowering an
972 // invoke function, the ABI requires that the function pointer be the first
973 // argument, so we may have to swap the arguments.
974 if (CallConv
== CallingConv::WASM_EmscriptenInvoke
&& Outs
.size() >= 2 &&
975 Outs
[0].Flags
.isSRet()) {
976 std::swap(Outs
[0], Outs
[1]);
977 std::swap(OutVals
[0], OutVals
[1]);
980 bool HasSwiftSelfArg
= false;
981 bool HasSwiftErrorArg
= false;
982 unsigned NumFixedArgs
= 0;
983 for (unsigned I
= 0; I
< Outs
.size(); ++I
) {
984 const ISD::OutputArg
&Out
= Outs
[I
];
985 SDValue
&OutVal
= OutVals
[I
];
986 HasSwiftSelfArg
|= Out
.Flags
.isSwiftSelf();
987 HasSwiftErrorArg
|= Out
.Flags
.isSwiftError();
988 if (Out
.Flags
.isNest())
989 fail(DL
, DAG
, "WebAssembly hasn't implemented nest arguments");
990 if (Out
.Flags
.isInAlloca())
991 fail(DL
, DAG
, "WebAssembly hasn't implemented inalloca arguments");
992 if (Out
.Flags
.isInConsecutiveRegs())
993 fail(DL
, DAG
, "WebAssembly hasn't implemented cons regs arguments");
994 if (Out
.Flags
.isInConsecutiveRegsLast())
995 fail(DL
, DAG
, "WebAssembly hasn't implemented cons regs last arguments");
996 if (Out
.Flags
.isByVal() && Out
.Flags
.getByValSize() != 0) {
997 auto &MFI
= MF
.getFrameInfo();
998 int FI
= MFI
.CreateStackObject(Out
.Flags
.getByValSize(),
999 Out
.Flags
.getNonZeroByValAlign(),
1002 DAG
.getConstant(Out
.Flags
.getByValSize(), DL
, MVT::i32
);
1003 SDValue FINode
= DAG
.getFrameIndex(FI
, getPointerTy(Layout
));
1004 Chain
= DAG
.getMemcpy(
1005 Chain
, DL
, FINode
, OutVal
, SizeNode
, Out
.Flags
.getNonZeroByValAlign(),
1006 /*isVolatile*/ false, /*AlwaysInline=*/false,
1007 /*isTailCall*/ false, MachinePointerInfo(), MachinePointerInfo());
1010 // Count the number of fixed args *after* legalization.
1011 NumFixedArgs
+= Out
.IsFixed
;
1014 bool IsVarArg
= CLI
.IsVarArg
;
1015 auto PtrVT
= getPointerTy(Layout
);
1017 // For swiftcc, emit additional swiftself and swifterror arguments
1018 // if there aren't. These additional arguments are also added for callee
1019 // signature They are necessary to match callee and caller signature for
1021 if (CallConv
== CallingConv::Swift
) {
1022 if (!HasSwiftSelfArg
) {
1025 Arg
.Flags
.setSwiftSelf();
1026 CLI
.Outs
.push_back(Arg
);
1027 SDValue ArgVal
= DAG
.getUNDEF(PtrVT
);
1028 CLI
.OutVals
.push_back(ArgVal
);
1030 if (!HasSwiftErrorArg
) {
1033 Arg
.Flags
.setSwiftError();
1034 CLI
.Outs
.push_back(Arg
);
1035 SDValue ArgVal
= DAG
.getUNDEF(PtrVT
);
1036 CLI
.OutVals
.push_back(ArgVal
);
1040 // Analyze operands of the call, assigning locations to each operand.
1041 SmallVector
<CCValAssign
, 16> ArgLocs
;
1042 CCState
CCInfo(CallConv
, IsVarArg
, MF
, ArgLocs
, *DAG
.getContext());
1045 // Outgoing non-fixed arguments are placed in a buffer. First
1046 // compute their offsets and the total amount of buffer space needed.
1047 for (unsigned I
= NumFixedArgs
; I
< Outs
.size(); ++I
) {
1048 const ISD::OutputArg
&Out
= Outs
[I
];
1049 SDValue
&Arg
= OutVals
[I
];
1050 EVT VT
= Arg
.getValueType();
1051 assert(VT
!= MVT::iPTR
&& "Legalized args should be concrete");
1052 Type
*Ty
= VT
.getTypeForEVT(*DAG
.getContext());
1054 std::max(Out
.Flags
.getNonZeroOrigAlign(), Layout
.getABITypeAlign(Ty
));
1056 CCInfo
.AllocateStack(Layout
.getTypeAllocSize(Ty
), Alignment
);
1057 CCInfo
.addLoc(CCValAssign::getMem(ArgLocs
.size(), VT
.getSimpleVT(),
1058 Offset
, VT
.getSimpleVT(),
1059 CCValAssign::Full
));
1063 unsigned NumBytes
= CCInfo
.getAlignedCallFrameSize();
1066 if (IsVarArg
&& NumBytes
) {
1067 // For non-fixed arguments, next emit stores to store the argument values
1068 // to the stack buffer at the offsets computed above.
1069 int FI
= MF
.getFrameInfo().CreateStackObject(NumBytes
,
1070 Layout
.getStackAlignment(),
1073 SmallVector
<SDValue
, 8> Chains
;
1074 for (SDValue Arg
: drop_begin(OutVals
, NumFixedArgs
)) {
1075 assert(ArgLocs
[ValNo
].getValNo() == ValNo
&&
1076 "ArgLocs should remain in order and only hold varargs args");
1077 unsigned Offset
= ArgLocs
[ValNo
++].getLocMemOffset();
1078 FINode
= DAG
.getFrameIndex(FI
, getPointerTy(Layout
));
1079 SDValue Add
= DAG
.getNode(ISD::ADD
, DL
, PtrVT
, FINode
,
1080 DAG
.getConstant(Offset
, DL
, PtrVT
));
1082 DAG
.getStore(Chain
, DL
, Arg
, Add
,
1083 MachinePointerInfo::getFixedStack(MF
, FI
, Offset
)));
1085 if (!Chains
.empty())
1086 Chain
= DAG
.getNode(ISD::TokenFactor
, DL
, MVT::Other
, Chains
);
1087 } else if (IsVarArg
) {
1088 FINode
= DAG
.getIntPtrConstant(0, DL
);
1091 if (Callee
->getOpcode() == ISD::GlobalAddress
) {
1092 // If the callee is a GlobalAddress node (quite common, every direct call
1093 // is) turn it into a TargetGlobalAddress node so that LowerGlobalAddress
1094 // doesn't at MO_GOT which is not needed for direct calls.
1095 GlobalAddressSDNode
* GA
= cast
<GlobalAddressSDNode
>(Callee
);
1096 Callee
= DAG
.getTargetGlobalAddress(GA
->getGlobal(), DL
,
1097 getPointerTy(DAG
.getDataLayout()),
1099 Callee
= DAG
.getNode(WebAssemblyISD::Wrapper
, DL
,
1100 getPointerTy(DAG
.getDataLayout()), Callee
);
1103 // Compute the operands for the CALLn node.
1104 SmallVector
<SDValue
, 16> Ops
;
1105 Ops
.push_back(Chain
);
1106 Ops
.push_back(Callee
);
1108 // Add all fixed arguments. Note that for non-varargs calls, NumFixedArgs
1110 Ops
.append(OutVals
.begin(),
1111 IsVarArg
? OutVals
.begin() + NumFixedArgs
: OutVals
.end());
1112 // Add a pointer to the vararg buffer.
1114 Ops
.push_back(FINode
);
1116 SmallVector
<EVT
, 8> InTys
;
1117 for (const auto &In
: Ins
) {
1118 assert(!In
.Flags
.isByVal() && "byval is not valid for return values");
1119 assert(!In
.Flags
.isNest() && "nest is not valid for return values");
1120 if (In
.Flags
.isInAlloca())
1121 fail(DL
, DAG
, "WebAssembly hasn't implemented inalloca return values");
1122 if (In
.Flags
.isInConsecutiveRegs())
1123 fail(DL
, DAG
, "WebAssembly hasn't implemented cons regs return values");
1124 if (In
.Flags
.isInConsecutiveRegsLast())
1126 "WebAssembly hasn't implemented cons regs last return values");
1127 // Ignore In.getNonZeroOrigAlign() because all our arguments are passed in
1129 InTys
.push_back(In
.VT
);
1132 // Lastly, if this is a call to a funcref we need to add an instruction
1133 // table.set to the chain and transform the call.
1134 if (CLI
.CB
&& isFuncrefType(CLI
.CB
->getCalledOperand()->getType())) {
1135 // In the absence of function references proposal where a funcref call is
1136 // lowered to call_ref, using reference types we generate a table.set to set
1137 // the funcref to a special table used solely for this purpose, followed by
1138 // a call_indirect. Here we just generate the table set, and return the
1139 // SDValue of the table.set so that LowerCall can finalize the lowering by
1140 // generating the call_indirect.
1141 SDValue Chain
= Ops
[0];
1143 MCSymbolWasm
*Table
= WebAssembly::getOrCreateFuncrefCallTableSymbol(
1144 MF
.getContext(), Subtarget
);
1145 SDValue Sym
= DAG
.getMCSymbol(Table
, PtrVT
);
1146 SDValue TableSlot
= DAG
.getConstant(0, DL
, MVT::i32
);
1147 SDValue TableSetOps
[] = {Chain
, Sym
, TableSlot
, Callee
};
1148 SDValue TableSet
= DAG
.getMemIntrinsicNode(
1149 WebAssemblyISD::TABLE_SET
, DL
, DAG
.getVTList(MVT::Other
), TableSetOps
,
1151 // Machine Mem Operand args
1152 MachinePointerInfo(WasmAddressSpace::FUNCREF
),
1153 CLI
.CB
->getCalledOperand()->getPointerAlignment(DAG
.getDataLayout()),
1154 MachineMemOperand::MOStore
);
1156 Ops
[0] = TableSet
; // The new chain is the TableSet itself
1159 if (CLI
.IsTailCall
) {
1160 // ret_calls do not return values to the current frame
1161 SDVTList NodeTys
= DAG
.getVTList(MVT::Other
, MVT::Glue
);
1162 return DAG
.getNode(WebAssemblyISD::RET_CALL
, DL
, NodeTys
, Ops
);
1165 InTys
.push_back(MVT::Other
);
1166 SDVTList InTyList
= DAG
.getVTList(InTys
);
1167 SDValue Res
= DAG
.getNode(WebAssemblyISD::CALL
, DL
, InTyList
, Ops
);
1169 for (size_t I
= 0; I
< Ins
.size(); ++I
)
1170 InVals
.push_back(Res
.getValue(I
));
1173 return Res
.getValue(Ins
.size());
1176 bool WebAssemblyTargetLowering::CanLowerReturn(
1177 CallingConv::ID
/*CallConv*/, MachineFunction
& /*MF*/, bool /*IsVarArg*/,
1178 const SmallVectorImpl
<ISD::OutputArg
> &Outs
,
1179 LLVMContext
& /*Context*/) const {
1180 // WebAssembly can only handle returning tuples with multivalue enabled
1181 return Subtarget
->hasMultivalue() || Outs
.size() <= 1;
1184 SDValue
WebAssemblyTargetLowering::LowerReturn(
1185 SDValue Chain
, CallingConv::ID CallConv
, bool /*IsVarArg*/,
1186 const SmallVectorImpl
<ISD::OutputArg
> &Outs
,
1187 const SmallVectorImpl
<SDValue
> &OutVals
, const SDLoc
&DL
,
1188 SelectionDAG
&DAG
) const {
1189 assert((Subtarget
->hasMultivalue() || Outs
.size() <= 1) &&
1190 "MVP WebAssembly can only return up to one value");
1191 if (!callingConvSupported(CallConv
))
1192 fail(DL
, DAG
, "WebAssembly doesn't support non-C calling conventions");
1194 SmallVector
<SDValue
, 4> RetOps(1, Chain
);
1195 RetOps
.append(OutVals
.begin(), OutVals
.end());
1196 Chain
= DAG
.getNode(WebAssemblyISD::RETURN
, DL
, MVT::Other
, RetOps
);
1198 // Record the number and types of the return values.
1199 for (const ISD::OutputArg
&Out
: Outs
) {
1200 assert(!Out
.Flags
.isByVal() && "byval is not valid for return values");
1201 assert(!Out
.Flags
.isNest() && "nest is not valid for return values");
1202 assert(Out
.IsFixed
&& "non-fixed return value is not valid");
1203 if (Out
.Flags
.isInAlloca())
1204 fail(DL
, DAG
, "WebAssembly hasn't implemented inalloca results");
1205 if (Out
.Flags
.isInConsecutiveRegs())
1206 fail(DL
, DAG
, "WebAssembly hasn't implemented cons regs results");
1207 if (Out
.Flags
.isInConsecutiveRegsLast())
1208 fail(DL
, DAG
, "WebAssembly hasn't implemented cons regs last results");
1214 SDValue
WebAssemblyTargetLowering::LowerFormalArguments(
1215 SDValue Chain
, CallingConv::ID CallConv
, bool IsVarArg
,
1216 const SmallVectorImpl
<ISD::InputArg
> &Ins
, const SDLoc
&DL
,
1217 SelectionDAG
&DAG
, SmallVectorImpl
<SDValue
> &InVals
) const {
1218 if (!callingConvSupported(CallConv
))
1219 fail(DL
, DAG
, "WebAssembly doesn't support non-C calling conventions");
1221 MachineFunction
&MF
= DAG
.getMachineFunction();
1222 auto *MFI
= MF
.getInfo
<WebAssemblyFunctionInfo
>();
1224 // Set up the incoming ARGUMENTS value, which serves to represent the liveness
1225 // of the incoming values before they're represented by virtual registers.
1226 MF
.getRegInfo().addLiveIn(WebAssembly::ARGUMENTS
);
1228 bool HasSwiftErrorArg
= false;
1229 bool HasSwiftSelfArg
= false;
1230 for (const ISD::InputArg
&In
: Ins
) {
1231 HasSwiftSelfArg
|= In
.Flags
.isSwiftSelf();
1232 HasSwiftErrorArg
|= In
.Flags
.isSwiftError();
1233 if (In
.Flags
.isInAlloca())
1234 fail(DL
, DAG
, "WebAssembly hasn't implemented inalloca arguments");
1235 if (In
.Flags
.isNest())
1236 fail(DL
, DAG
, "WebAssembly hasn't implemented nest arguments");
1237 if (In
.Flags
.isInConsecutiveRegs())
1238 fail(DL
, DAG
, "WebAssembly hasn't implemented cons regs arguments");
1239 if (In
.Flags
.isInConsecutiveRegsLast())
1240 fail(DL
, DAG
, "WebAssembly hasn't implemented cons regs last arguments");
1241 // Ignore In.getNonZeroOrigAlign() because all our arguments are passed in
1243 InVals
.push_back(In
.Used
? DAG
.getNode(WebAssemblyISD::ARGUMENT
, DL
, In
.VT
,
1244 DAG
.getTargetConstant(InVals
.size(),
1246 : DAG
.getUNDEF(In
.VT
));
1248 // Record the number and types of arguments.
1249 MFI
->addParam(In
.VT
);
1252 // For swiftcc, emit additional swiftself and swifterror arguments
1253 // if there aren't. These additional arguments are also added for callee
1254 // signature They are necessary to match callee and caller signature for
1256 auto PtrVT
= getPointerTy(MF
.getDataLayout());
1257 if (CallConv
== CallingConv::Swift
) {
1258 if (!HasSwiftSelfArg
) {
1259 MFI
->addParam(PtrVT
);
1261 if (!HasSwiftErrorArg
) {
1262 MFI
->addParam(PtrVT
);
1265 // Varargs are copied into a buffer allocated by the caller, and a pointer to
1266 // the buffer is passed as an argument.
1268 MVT PtrVT
= getPointerTy(MF
.getDataLayout());
1269 Register VarargVreg
=
1270 MF
.getRegInfo().createVirtualRegister(getRegClassFor(PtrVT
));
1271 MFI
->setVarargBufferVreg(VarargVreg
);
1272 Chain
= DAG
.getCopyToReg(
1273 Chain
, DL
, VarargVreg
,
1274 DAG
.getNode(WebAssemblyISD::ARGUMENT
, DL
, PtrVT
,
1275 DAG
.getTargetConstant(Ins
.size(), DL
, MVT::i32
)));
1276 MFI
->addParam(PtrVT
);
1279 // Record the number and types of arguments and results.
1280 SmallVector
<MVT
, 4> Params
;
1281 SmallVector
<MVT
, 4> Results
;
1282 computeSignatureVTs(MF
.getFunction().getFunctionType(), &MF
.getFunction(),
1283 MF
.getFunction(), DAG
.getTarget(), Params
, Results
);
1284 for (MVT VT
: Results
)
1286 // TODO: Use signatures in WebAssemblyMachineFunctionInfo too and unify
1287 // the param logic here with ComputeSignatureVTs
1288 assert(MFI
->getParams().size() == Params
.size() &&
1289 std::equal(MFI
->getParams().begin(), MFI
->getParams().end(),
1295 void WebAssemblyTargetLowering::ReplaceNodeResults(
1296 SDNode
*N
, SmallVectorImpl
<SDValue
> &Results
, SelectionDAG
&DAG
) const {
1297 switch (N
->getOpcode()) {
1298 case ISD::SIGN_EXTEND_INREG
:
1299 // Do not add any results, signifying that N should not be custom lowered
1300 // after all. This happens because simd128 turns on custom lowering for
1301 // SIGN_EXTEND_INREG, but for non-vector sign extends the result might be an
1306 "ReplaceNodeResults not implemented for this op for WebAssembly!");
1310 //===----------------------------------------------------------------------===//
1311 // Custom lowering hooks.
1312 //===----------------------------------------------------------------------===//
1314 SDValue
WebAssemblyTargetLowering::LowerOperation(SDValue Op
,
1315 SelectionDAG
&DAG
) const {
1317 switch (Op
.getOpcode()) {
1319 llvm_unreachable("unimplemented operation lowering");
1321 case ISD::FrameIndex
:
1322 return LowerFrameIndex(Op
, DAG
);
1323 case ISD::GlobalAddress
:
1324 return LowerGlobalAddress(Op
, DAG
);
1325 case ISD::GlobalTLSAddress
:
1326 return LowerGlobalTLSAddress(Op
, DAG
);
1327 case ISD::ExternalSymbol
:
1328 return LowerExternalSymbol(Op
, DAG
);
1329 case ISD::JumpTable
:
1330 return LowerJumpTable(Op
, DAG
);
1332 return LowerBR_JT(Op
, DAG
);
1334 return LowerVASTART(Op
, DAG
);
1335 case ISD::BlockAddress
:
1337 fail(DL
, DAG
, "WebAssembly hasn't implemented computed gotos");
1339 case ISD::RETURNADDR
:
1340 return LowerRETURNADDR(Op
, DAG
);
1341 case ISD::FRAMEADDR
:
1342 return LowerFRAMEADDR(Op
, DAG
);
1343 case ISD::CopyToReg
:
1344 return LowerCopyToReg(Op
, DAG
);
1345 case ISD::EXTRACT_VECTOR_ELT
:
1346 case ISD::INSERT_VECTOR_ELT
:
1347 return LowerAccessVectorElement(Op
, DAG
);
1348 case ISD::INTRINSIC_VOID
:
1349 case ISD::INTRINSIC_WO_CHAIN
:
1350 case ISD::INTRINSIC_W_CHAIN
:
1351 return LowerIntrinsic(Op
, DAG
);
1352 case ISD::SIGN_EXTEND_INREG
:
1353 return LowerSIGN_EXTEND_INREG(Op
, DAG
);
1354 case ISD::BUILD_VECTOR
:
1355 return LowerBUILD_VECTOR(Op
, DAG
);
1356 case ISD::VECTOR_SHUFFLE
:
1357 return LowerVECTOR_SHUFFLE(Op
, DAG
);
1359 return LowerSETCC(Op
, DAG
);
1363 return LowerShift(Op
, DAG
);
1364 case ISD::FP_TO_SINT_SAT
:
1365 case ISD::FP_TO_UINT_SAT
:
1366 return LowerFP_TO_INT_SAT(Op
, DAG
);
1368 return LowerLoad(Op
, DAG
);
1370 return LowerStore(Op
, DAG
);
1374 static bool IsWebAssemblyGlobal(SDValue Op
) {
1375 if (const GlobalAddressSDNode
*GA
= dyn_cast
<GlobalAddressSDNode
>(Op
))
1376 return WebAssembly::isWasmVarAddressSpace(GA
->getAddressSpace());
1381 static Optional
<unsigned> IsWebAssemblyLocal(SDValue Op
, SelectionDAG
&DAG
) {
1382 const FrameIndexSDNode
*FI
= dyn_cast
<FrameIndexSDNode
>(Op
);
1386 auto &MF
= DAG
.getMachineFunction();
1387 return WebAssemblyFrameLowering::getLocalForStackObject(MF
, FI
->getIndex());
1390 bool WebAssemblyTargetLowering::isFuncrefType(const Type
*Ty
) {
1391 return isa
<PointerType
>(Ty
) &&
1392 Ty
->getPointerAddressSpace() == WasmAddressSpace::FUNCREF
;
1395 bool WebAssemblyTargetLowering::isExternrefType(const Type
*Ty
) {
1396 return isa
<PointerType
>(Ty
) &&
1397 Ty
->getPointerAddressSpace() == WasmAddressSpace::EXTERNREF
;
1400 SDValue
WebAssemblyTargetLowering::LowerStore(SDValue Op
,
1401 SelectionDAG
&DAG
) const {
1403 StoreSDNode
*SN
= cast
<StoreSDNode
>(Op
.getNode());
1404 const SDValue
&Value
= SN
->getValue();
1405 const SDValue
&Base
= SN
->getBasePtr();
1406 const SDValue
&Offset
= SN
->getOffset();
1408 if (IsWebAssemblyGlobal(Base
)) {
1409 if (!Offset
->isUndef())
1410 report_fatal_error("unexpected offset when storing to webassembly global",
1413 SDVTList Tys
= DAG
.getVTList(MVT::Other
);
1414 SDValue Ops
[] = {SN
->getChain(), Value
, Base
};
1415 return DAG
.getMemIntrinsicNode(WebAssemblyISD::GLOBAL_SET
, DL
, Tys
, Ops
,
1416 SN
->getMemoryVT(), SN
->getMemOperand());
1419 if (Optional
<unsigned> Local
= IsWebAssemblyLocal(Base
, DAG
)) {
1420 if (!Offset
->isUndef())
1421 report_fatal_error("unexpected offset when storing to webassembly local",
1424 SDValue Idx
= DAG
.getTargetConstant(*Local
, Base
, MVT::i32
);
1425 SDVTList Tys
= DAG
.getVTList(MVT::Other
); // The chain.
1426 SDValue Ops
[] = {SN
->getChain(), Idx
, Value
};
1427 return DAG
.getNode(WebAssemblyISD::LOCAL_SET
, DL
, Tys
, Ops
);
1433 SDValue
WebAssemblyTargetLowering::LowerLoad(SDValue Op
,
1434 SelectionDAG
&DAG
) const {
1436 LoadSDNode
*LN
= cast
<LoadSDNode
>(Op
.getNode());
1437 const SDValue
&Base
= LN
->getBasePtr();
1438 const SDValue
&Offset
= LN
->getOffset();
1440 if (IsWebAssemblyGlobal(Base
)) {
1441 if (!Offset
->isUndef())
1443 "unexpected offset when loading from webassembly global", false);
1445 SDVTList Tys
= DAG
.getVTList(LN
->getValueType(0), MVT::Other
);
1446 SDValue Ops
[] = {LN
->getChain(), Base
};
1447 return DAG
.getMemIntrinsicNode(WebAssemblyISD::GLOBAL_GET
, DL
, Tys
, Ops
,
1448 LN
->getMemoryVT(), LN
->getMemOperand());
1451 if (Optional
<unsigned> Local
= IsWebAssemblyLocal(Base
, DAG
)) {
1452 if (!Offset
->isUndef())
1454 "unexpected offset when loading from webassembly local", false);
1456 SDValue Idx
= DAG
.getTargetConstant(*Local
, Base
, MVT::i32
);
1457 EVT LocalVT
= LN
->getValueType(0);
1458 SDValue LocalGet
= DAG
.getNode(WebAssemblyISD::LOCAL_GET
, DL
, LocalVT
,
1459 {LN
->getChain(), Idx
});
1460 SDValue Result
= DAG
.getMergeValues({LocalGet
, LN
->getChain()}, DL
);
1461 assert(Result
->getNumValues() == 2 && "Loads must carry a chain!");
1468 SDValue
WebAssemblyTargetLowering::LowerCopyToReg(SDValue Op
,
1469 SelectionDAG
&DAG
) const {
1470 SDValue Src
= Op
.getOperand(2);
1471 if (isa
<FrameIndexSDNode
>(Src
.getNode())) {
1472 // CopyToReg nodes don't support FrameIndex operands. Other targets select
1473 // the FI to some LEA-like instruction, but since we don't have that, we
1474 // need to insert some kind of instruction that can take an FI operand and
1475 // produces a value usable by CopyToReg (i.e. in a vreg). So insert a dummy
1476 // local.copy between Op and its FI operand.
1477 SDValue Chain
= Op
.getOperand(0);
1479 unsigned Reg
= cast
<RegisterSDNode
>(Op
.getOperand(1))->getReg();
1480 EVT VT
= Src
.getValueType();
1481 SDValue
Copy(DAG
.getMachineNode(VT
== MVT::i32
? WebAssembly::COPY_I32
1482 : WebAssembly::COPY_I64
,
1485 return Op
.getNode()->getNumValues() == 1
1486 ? DAG
.getCopyToReg(Chain
, DL
, Reg
, Copy
)
1487 : DAG
.getCopyToReg(Chain
, DL
, Reg
, Copy
,
1488 Op
.getNumOperands() == 4 ? Op
.getOperand(3)
1494 SDValue
WebAssemblyTargetLowering::LowerFrameIndex(SDValue Op
,
1495 SelectionDAG
&DAG
) const {
1496 int FI
= cast
<FrameIndexSDNode
>(Op
)->getIndex();
1497 return DAG
.getTargetFrameIndex(FI
, Op
.getValueType());
1500 SDValue
WebAssemblyTargetLowering::LowerRETURNADDR(SDValue Op
,
1501 SelectionDAG
&DAG
) const {
1504 if (!Subtarget
->getTargetTriple().isOSEmscripten()) {
1506 "Non-Emscripten WebAssembly hasn't implemented "
1507 "__builtin_return_address");
1511 if (verifyReturnAddressArgumentIsConstant(Op
, DAG
))
1514 unsigned Depth
= Op
.getConstantOperandVal(0);
1515 MakeLibCallOptions CallOptions
;
1516 return makeLibCall(DAG
, RTLIB::RETURN_ADDRESS
, Op
.getValueType(),
1517 {DAG
.getConstant(Depth
, DL
, MVT::i32
)}, CallOptions
, DL
)
1521 SDValue
WebAssemblyTargetLowering::LowerFRAMEADDR(SDValue Op
,
1522 SelectionDAG
&DAG
) const {
1523 // Non-zero depths are not supported by WebAssembly currently. Use the
1524 // legalizer's default expansion, which is to return 0 (what this function is
1525 // documented to do).
1526 if (Op
.getConstantOperandVal(0) > 0)
1529 DAG
.getMachineFunction().getFrameInfo().setFrameAddressIsTaken(true);
1530 EVT VT
= Op
.getValueType();
1532 Subtarget
->getRegisterInfo()->getFrameRegister(DAG
.getMachineFunction());
1533 return DAG
.getCopyFromReg(DAG
.getEntryNode(), SDLoc(Op
), FP
, VT
);
1537 WebAssemblyTargetLowering::LowerGlobalTLSAddress(SDValue Op
,
1538 SelectionDAG
&DAG
) const {
1540 const auto *GA
= cast
<GlobalAddressSDNode
>(Op
);
1541 MVT PtrVT
= getPointerTy(DAG
.getDataLayout());
1543 MachineFunction
&MF
= DAG
.getMachineFunction();
1544 if (!MF
.getSubtarget
<WebAssemblySubtarget
>().hasBulkMemory())
1545 report_fatal_error("cannot use thread-local storage without bulk memory",
1548 const GlobalValue
*GV
= GA
->getGlobal();
1550 // Currently Emscripten does not support dynamic linking with threads.
1551 // Therefore, if we have thread-local storage, only the local-exec model
1553 // TODO: remove this and implement proper TLS models once Emscripten
1554 // supports dynamic linking with threads.
1555 if (GV
->getThreadLocalMode() != GlobalValue::LocalExecTLSModel
&&
1556 !Subtarget
->getTargetTriple().isOSEmscripten()) {
1557 report_fatal_error("only -ftls-model=local-exec is supported for now on "
1558 "non-Emscripten OSes: variable " +
1563 auto GlobalGet
= PtrVT
== MVT::i64
? WebAssembly::GLOBAL_GET_I64
1564 : WebAssembly::GLOBAL_GET_I32
;
1565 const char *BaseName
= MF
.createExternalSymbolName("__tls_base");
1568 DAG
.getMachineNode(GlobalGet
, DL
, PtrVT
,
1569 DAG
.getTargetExternalSymbol(BaseName
, PtrVT
)),
1572 SDValue TLSOffset
= DAG
.getTargetGlobalAddress(
1573 GV
, DL
, PtrVT
, GA
->getOffset(), WebAssemblyII::MO_TLS_BASE_REL
);
1574 SDValue SymAddr
= DAG
.getNode(WebAssemblyISD::Wrapper
, DL
, PtrVT
, TLSOffset
);
1576 return DAG
.getNode(ISD::ADD
, DL
, PtrVT
, BaseAddr
, SymAddr
);
1579 SDValue
WebAssemblyTargetLowering::LowerGlobalAddress(SDValue Op
,
1580 SelectionDAG
&DAG
) const {
1582 const auto *GA
= cast
<GlobalAddressSDNode
>(Op
);
1583 EVT VT
= Op
.getValueType();
1584 assert(GA
->getTargetFlags() == 0 &&
1585 "Unexpected target flags on generic GlobalAddressSDNode");
1586 if (!WebAssembly::isValidAddressSpace(GA
->getAddressSpace()))
1587 fail(DL
, DAG
, "Invalid address space for WebAssembly target");
1589 unsigned OperandFlags
= 0;
1590 if (isPositionIndependent()) {
1591 const GlobalValue
*GV
= GA
->getGlobal();
1592 if (getTargetMachine().shouldAssumeDSOLocal(*GV
->getParent(), GV
)) {
1593 MachineFunction
&MF
= DAG
.getMachineFunction();
1594 MVT PtrVT
= getPointerTy(MF
.getDataLayout());
1595 const char *BaseName
;
1596 if (GV
->getValueType()->isFunctionTy()) {
1597 BaseName
= MF
.createExternalSymbolName("__table_base");
1598 OperandFlags
= WebAssemblyII::MO_TABLE_BASE_REL
;
1601 BaseName
= MF
.createExternalSymbolName("__memory_base");
1602 OperandFlags
= WebAssemblyII::MO_MEMORY_BASE_REL
;
1605 DAG
.getNode(WebAssemblyISD::Wrapper
, DL
, PtrVT
,
1606 DAG
.getTargetExternalSymbol(BaseName
, PtrVT
));
1608 SDValue SymAddr
= DAG
.getNode(
1609 WebAssemblyISD::WrapperPIC
, DL
, VT
,
1610 DAG
.getTargetGlobalAddress(GA
->getGlobal(), DL
, VT
, GA
->getOffset(),
1613 return DAG
.getNode(ISD::ADD
, DL
, VT
, BaseAddr
, SymAddr
);
1615 OperandFlags
= WebAssemblyII::MO_GOT
;
1619 return DAG
.getNode(WebAssemblyISD::Wrapper
, DL
, VT
,
1620 DAG
.getTargetGlobalAddress(GA
->getGlobal(), DL
, VT
,
1621 GA
->getOffset(), OperandFlags
));
1625 WebAssemblyTargetLowering::LowerExternalSymbol(SDValue Op
,
1626 SelectionDAG
&DAG
) const {
1628 const auto *ES
= cast
<ExternalSymbolSDNode
>(Op
);
1629 EVT VT
= Op
.getValueType();
1630 assert(ES
->getTargetFlags() == 0 &&
1631 "Unexpected target flags on generic ExternalSymbolSDNode");
1632 return DAG
.getNode(WebAssemblyISD::Wrapper
, DL
, VT
,
1633 DAG
.getTargetExternalSymbol(ES
->getSymbol(), VT
));
1636 SDValue
WebAssemblyTargetLowering::LowerJumpTable(SDValue Op
,
1637 SelectionDAG
&DAG
) const {
1638 // There's no need for a Wrapper node because we always incorporate a jump
1639 // table operand into a BR_TABLE instruction, rather than ever
1640 // materializing it in a register.
1641 const JumpTableSDNode
*JT
= cast
<JumpTableSDNode
>(Op
);
1642 return DAG
.getTargetJumpTable(JT
->getIndex(), Op
.getValueType(),
1643 JT
->getTargetFlags());
1646 SDValue
WebAssemblyTargetLowering::LowerBR_JT(SDValue Op
,
1647 SelectionDAG
&DAG
) const {
1649 SDValue Chain
= Op
.getOperand(0);
1650 const auto *JT
= cast
<JumpTableSDNode
>(Op
.getOperand(1));
1651 SDValue Index
= Op
.getOperand(2);
1652 assert(JT
->getTargetFlags() == 0 && "WebAssembly doesn't set target flags");
1654 SmallVector
<SDValue
, 8> Ops
;
1655 Ops
.push_back(Chain
);
1656 Ops
.push_back(Index
);
1658 MachineJumpTableInfo
*MJTI
= DAG
.getMachineFunction().getJumpTableInfo();
1659 const auto &MBBs
= MJTI
->getJumpTables()[JT
->getIndex()].MBBs
;
1661 // Add an operand for each case.
1662 for (auto MBB
: MBBs
)
1663 Ops
.push_back(DAG
.getBasicBlock(MBB
));
1665 // Add the first MBB as a dummy default target for now. This will be replaced
1666 // with the proper default target (and the preceding range check eliminated)
1667 // if possible by WebAssemblyFixBrTableDefaults.
1668 Ops
.push_back(DAG
.getBasicBlock(*MBBs
.begin()));
1669 return DAG
.getNode(WebAssemblyISD::BR_TABLE
, DL
, MVT::Other
, Ops
);
1672 SDValue
WebAssemblyTargetLowering::LowerVASTART(SDValue Op
,
1673 SelectionDAG
&DAG
) const {
1675 EVT PtrVT
= getPointerTy(DAG
.getMachineFunction().getDataLayout());
1677 auto *MFI
= DAG
.getMachineFunction().getInfo
<WebAssemblyFunctionInfo
>();
1678 const Value
*SV
= cast
<SrcValueSDNode
>(Op
.getOperand(2))->getValue();
1680 SDValue ArgN
= DAG
.getCopyFromReg(DAG
.getEntryNode(), DL
,
1681 MFI
->getVarargBufferVreg(), PtrVT
);
1682 return DAG
.getStore(Op
.getOperand(0), DL
, ArgN
, Op
.getOperand(1),
1683 MachinePointerInfo(SV
));
1686 SDValue
WebAssemblyTargetLowering::LowerIntrinsic(SDValue Op
,
1687 SelectionDAG
&DAG
) const {
1688 MachineFunction
&MF
= DAG
.getMachineFunction();
1690 switch (Op
.getOpcode()) {
1691 case ISD::INTRINSIC_VOID
:
1692 case ISD::INTRINSIC_W_CHAIN
:
1693 IntNo
= Op
.getConstantOperandVal(1);
1695 case ISD::INTRINSIC_WO_CHAIN
:
1696 IntNo
= Op
.getConstantOperandVal(0);
1699 llvm_unreachable("Invalid intrinsic");
1705 return SDValue(); // Don't custom lower most intrinsics.
1707 case Intrinsic::wasm_lsda
: {
1708 EVT VT
= Op
.getValueType();
1709 const TargetLowering
&TLI
= DAG
.getTargetLoweringInfo();
1710 MVT PtrVT
= TLI
.getPointerTy(DAG
.getDataLayout());
1711 auto &Context
= MF
.getMMI().getContext();
1712 MCSymbol
*S
= Context
.getOrCreateSymbol(Twine("GCC_except_table") +
1713 Twine(MF
.getFunctionNumber()));
1714 return DAG
.getNode(WebAssemblyISD::Wrapper
, DL
, VT
,
1715 DAG
.getMCSymbol(S
, PtrVT
));
1718 case Intrinsic::wasm_shuffle
: {
1719 // Drop in-chain and replace undefs, but otherwise pass through unchanged
1722 Ops
[OpIdx
++] = Op
.getOperand(1);
1723 Ops
[OpIdx
++] = Op
.getOperand(2);
1724 while (OpIdx
< 18) {
1725 const SDValue
&MaskIdx
= Op
.getOperand(OpIdx
+ 1);
1726 if (MaskIdx
.isUndef() ||
1727 cast
<ConstantSDNode
>(MaskIdx
.getNode())->getZExtValue() >= 32) {
1728 Ops
[OpIdx
++] = DAG
.getConstant(0, DL
, MVT::i32
);
1730 Ops
[OpIdx
++] = MaskIdx
;
1733 return DAG
.getNode(WebAssemblyISD::SHUFFLE
, DL
, Op
.getValueType(), Ops
);
1739 WebAssemblyTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op
,
1740 SelectionDAG
&DAG
) const {
1742 // If sign extension operations are disabled, allow sext_inreg only if operand
1743 // is a vector extract of an i8 or i16 lane. SIMD does not depend on sign
1744 // extension operations, but allowing sext_inreg in this context lets us have
1745 // simple patterns to select extract_lane_s instructions. Expanding sext_inreg
1746 // everywhere would be simpler in this file, but would necessitate large and
1747 // brittle patterns to undo the expansion and select extract_lane_s
1749 assert(!Subtarget
->hasSignExt() && Subtarget
->hasSIMD128());
1750 if (Op
.getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT
)
1753 const SDValue
&Extract
= Op
.getOperand(0);
1754 MVT VecT
= Extract
.getOperand(0).getSimpleValueType();
1755 if (VecT
.getVectorElementType().getSizeInBits() > 32)
1757 MVT ExtractedLaneT
=
1758 cast
<VTSDNode
>(Op
.getOperand(1).getNode())->getVT().getSimpleVT();
1760 MVT::getVectorVT(ExtractedLaneT
, 128 / ExtractedLaneT
.getSizeInBits());
1761 if (ExtractedVecT
== VecT
)
1764 // Bitcast vector to appropriate type to ensure ISel pattern coverage
1765 const SDNode
*Index
= Extract
.getOperand(1).getNode();
1766 if (!isa
<ConstantSDNode
>(Index
))
1768 unsigned IndexVal
= cast
<ConstantSDNode
>(Index
)->getZExtValue();
1770 ExtractedVecT
.getVectorNumElements() / VecT
.getVectorNumElements();
1773 DAG
.getConstant(IndexVal
* Scale
, DL
, Index
->getValueType(0));
1774 SDValue NewExtract
= DAG
.getNode(
1775 ISD::EXTRACT_VECTOR_ELT
, DL
, Extract
.getValueType(),
1776 DAG
.getBitcast(ExtractedVecT
, Extract
.getOperand(0)), NewIndex
);
1777 return DAG
.getNode(ISD::SIGN_EXTEND_INREG
, DL
, Op
.getValueType(), NewExtract
,
1781 static SDValue
LowerConvertLow(SDValue Op
, SelectionDAG
&DAG
) {
1783 if (Op
.getValueType() != MVT::v2f64
)
1786 auto GetConvertedLane
= [](SDValue Op
, unsigned &Opcode
, SDValue
&SrcVec
,
1787 unsigned &Index
) -> bool {
1788 switch (Op
.getOpcode()) {
1789 case ISD::SINT_TO_FP
:
1790 Opcode
= WebAssemblyISD::CONVERT_LOW_S
;
1792 case ISD::UINT_TO_FP
:
1793 Opcode
= WebAssemblyISD::CONVERT_LOW_U
;
1795 case ISD::FP_EXTEND
:
1796 Opcode
= WebAssemblyISD::PROMOTE_LOW
;
1802 auto ExtractVector
= Op
.getOperand(0);
1803 if (ExtractVector
.getOpcode() != ISD::EXTRACT_VECTOR_ELT
)
1806 if (!isa
<ConstantSDNode
>(ExtractVector
.getOperand(1).getNode()))
1809 SrcVec
= ExtractVector
.getOperand(0);
1810 Index
= ExtractVector
.getConstantOperandVal(1);
1814 unsigned LHSOpcode
, RHSOpcode
, LHSIndex
, RHSIndex
;
1815 SDValue LHSSrcVec
, RHSSrcVec
;
1816 if (!GetConvertedLane(Op
.getOperand(0), LHSOpcode
, LHSSrcVec
, LHSIndex
) ||
1817 !GetConvertedLane(Op
.getOperand(1), RHSOpcode
, RHSSrcVec
, RHSIndex
))
1820 if (LHSOpcode
!= RHSOpcode
)
1824 switch (LHSOpcode
) {
1825 case WebAssemblyISD::CONVERT_LOW_S
:
1826 case WebAssemblyISD::CONVERT_LOW_U
:
1827 ExpectedSrcVT
= MVT::v4i32
;
1829 case WebAssemblyISD::PROMOTE_LOW
:
1830 ExpectedSrcVT
= MVT::v4f32
;
1833 if (LHSSrcVec
.getValueType() != ExpectedSrcVT
)
1836 auto Src
= LHSSrcVec
;
1837 if (LHSIndex
!= 0 || RHSIndex
!= 1 || LHSSrcVec
!= RHSSrcVec
) {
1838 // Shuffle the source vector so that the converted lanes are the low lanes.
1839 Src
= DAG
.getVectorShuffle(
1840 ExpectedSrcVT
, DL
, LHSSrcVec
, RHSSrcVec
,
1841 {static_cast<int>(LHSIndex
), static_cast<int>(RHSIndex
) + 4, -1, -1});
1843 return DAG
.getNode(LHSOpcode
, DL
, MVT::v2f64
, Src
);
1846 SDValue
WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op
,
1847 SelectionDAG
&DAG
) const {
1848 if (auto ConvertLow
= LowerConvertLow(Op
, DAG
))
1852 const EVT VecT
= Op
.getValueType();
1853 const EVT LaneT
= Op
.getOperand(0).getValueType();
1854 const size_t Lanes
= Op
.getNumOperands();
1855 bool CanSwizzle
= VecT
== MVT::v16i8
;
1857 // BUILD_VECTORs are lowered to the instruction that initializes the highest
1858 // possible number of lanes at once followed by a sequence of replace_lane
1859 // instructions to individually initialize any remaining lanes.
1861 // TODO: Tune this. For example, lanewise swizzling is very expensive, so
1862 // swizzled lanes should be given greater weight.
1864 // TODO: Investigate looping rather than always extracting/replacing specific
1865 // lanes to fill gaps.
1867 auto IsConstant
= [](const SDValue
&V
) {
1868 return V
.getOpcode() == ISD::Constant
|| V
.getOpcode() == ISD::ConstantFP
;
1871 // Returns the source vector and index vector pair if they exist. Checks for:
1872 // (extract_vector_elt
1874 // (sign_extend_inreg (extract_vector_elt $indices, $i))
1876 auto GetSwizzleSrcs
= [](size_t I
, const SDValue
&Lane
) {
1877 auto Bail
= std::make_pair(SDValue(), SDValue());
1878 if (Lane
->getOpcode() != ISD::EXTRACT_VECTOR_ELT
)
1880 const SDValue
&SwizzleSrc
= Lane
->getOperand(0);
1881 const SDValue
&IndexExt
= Lane
->getOperand(1);
1882 if (IndexExt
->getOpcode() != ISD::SIGN_EXTEND_INREG
)
1884 const SDValue
&Index
= IndexExt
->getOperand(0);
1885 if (Index
->getOpcode() != ISD::EXTRACT_VECTOR_ELT
)
1887 const SDValue
&SwizzleIndices
= Index
->getOperand(0);
1888 if (SwizzleSrc
.getValueType() != MVT::v16i8
||
1889 SwizzleIndices
.getValueType() != MVT::v16i8
||
1890 Index
->getOperand(1)->getOpcode() != ISD::Constant
||
1891 Index
->getConstantOperandVal(1) != I
)
1893 return std::make_pair(SwizzleSrc
, SwizzleIndices
);
1896 // If the lane is extracted from another vector at a constant index, return
1897 // that vector. The source vector must not have more lanes than the dest
1898 // because the shufflevector indices are in terms of the destination lanes and
1899 // would not be able to address the smaller individual source lanes.
1900 auto GetShuffleSrc
= [&](const SDValue
&Lane
) {
1901 if (Lane
->getOpcode() != ISD::EXTRACT_VECTOR_ELT
)
1903 if (!isa
<ConstantSDNode
>(Lane
->getOperand(1).getNode()))
1905 if (Lane
->getOperand(0).getValueType().getVectorNumElements() >
1906 VecT
.getVectorNumElements())
1908 return Lane
->getOperand(0);
1911 using ValueEntry
= std::pair
<SDValue
, size_t>;
1912 SmallVector
<ValueEntry
, 16> SplatValueCounts
;
1914 using SwizzleEntry
= std::pair
<std::pair
<SDValue
, SDValue
>, size_t>;
1915 SmallVector
<SwizzleEntry
, 16> SwizzleCounts
;
1917 using ShuffleEntry
= std::pair
<SDValue
, size_t>;
1918 SmallVector
<ShuffleEntry
, 16> ShuffleCounts
;
1920 auto AddCount
= [](auto &Counts
, const auto &Val
) {
1922 llvm::find_if(Counts
, [&Val
](auto E
) { return E
.first
== Val
; });
1923 if (CountIt
== Counts
.end()) {
1924 Counts
.emplace_back(Val
, 1);
1930 auto GetMostCommon
= [](auto &Counts
) {
1932 std::max_element(Counts
.begin(), Counts
.end(),
1933 [](auto A
, auto B
) { return A
.second
< B
.second
; });
1934 assert(CommonIt
!= Counts
.end() && "Unexpected all-undef build_vector");
1938 size_t NumConstantLanes
= 0;
1940 // Count eligible lanes for each type of vector creation op
1941 for (size_t I
= 0; I
< Lanes
; ++I
) {
1942 const SDValue
&Lane
= Op
->getOperand(I
);
1946 AddCount(SplatValueCounts
, Lane
);
1948 if (IsConstant(Lane
))
1950 if (auto ShuffleSrc
= GetShuffleSrc(Lane
))
1951 AddCount(ShuffleCounts
, ShuffleSrc
);
1953 auto SwizzleSrcs
= GetSwizzleSrcs(I
, Lane
);
1954 if (SwizzleSrcs
.first
)
1955 AddCount(SwizzleCounts
, SwizzleSrcs
);
1960 size_t NumSplatLanes
;
1961 std::tie(SplatValue
, NumSplatLanes
) = GetMostCommon(SplatValueCounts
);
1964 SDValue SwizzleIndices
;
1965 size_t NumSwizzleLanes
= 0;
1966 if (SwizzleCounts
.size())
1967 std::forward_as_tuple(std::tie(SwizzleSrc
, SwizzleIndices
),
1968 NumSwizzleLanes
) = GetMostCommon(SwizzleCounts
);
1970 // Shuffles can draw from up to two vectors, so find the two most common
1972 SDValue ShuffleSrc1
, ShuffleSrc2
;
1973 size_t NumShuffleLanes
= 0;
1974 if (ShuffleCounts
.size()) {
1975 std::tie(ShuffleSrc1
, NumShuffleLanes
) = GetMostCommon(ShuffleCounts
);
1976 ShuffleCounts
.erase(std::remove_if(ShuffleCounts
.begin(),
1977 ShuffleCounts
.end(),
1978 [&](const auto &Pair
) {
1979 return Pair
.first
== ShuffleSrc1
;
1981 ShuffleCounts
.end());
1983 if (ShuffleCounts
.size()) {
1984 size_t AdditionalShuffleLanes
;
1985 std::tie(ShuffleSrc2
, AdditionalShuffleLanes
) =
1986 GetMostCommon(ShuffleCounts
);
1987 NumShuffleLanes
+= AdditionalShuffleLanes
;
1990 // Predicate returning true if the lane is properly initialized by the
1991 // original instruction
1992 std::function
<bool(size_t, const SDValue
&)> IsLaneConstructed
;
1994 // Prefer swizzles over shuffles over vector consts over splats
1995 if (NumSwizzleLanes
>= NumShuffleLanes
&&
1996 NumSwizzleLanes
>= NumConstantLanes
&& NumSwizzleLanes
>= NumSplatLanes
) {
1997 Result
= DAG
.getNode(WebAssemblyISD::SWIZZLE
, DL
, VecT
, SwizzleSrc
,
1999 auto Swizzled
= std::make_pair(SwizzleSrc
, SwizzleIndices
);
2000 IsLaneConstructed
= [&, Swizzled
](size_t I
, const SDValue
&Lane
) {
2001 return Swizzled
== GetSwizzleSrcs(I
, Lane
);
2003 } else if (NumShuffleLanes
>= NumConstantLanes
&&
2004 NumShuffleLanes
>= NumSplatLanes
) {
2005 size_t DestLaneSize
= VecT
.getVectorElementType().getFixedSizeInBits() / 8;
2006 size_t DestLaneCount
= VecT
.getVectorNumElements();
2009 SDValue Src1
= ShuffleSrc1
;
2010 SDValue Src2
= ShuffleSrc2
? ShuffleSrc2
: DAG
.getUNDEF(VecT
);
2011 if (Src1
.getValueType() != VecT
) {
2013 Src1
.getValueType().getVectorElementType().getFixedSizeInBits() / 8;
2014 assert(LaneSize
> DestLaneSize
);
2015 Scale1
= LaneSize
/ DestLaneSize
;
2016 Src1
= DAG
.getBitcast(VecT
, Src1
);
2018 if (Src2
.getValueType() != VecT
) {
2020 Src2
.getValueType().getVectorElementType().getFixedSizeInBits() / 8;
2021 assert(LaneSize
> DestLaneSize
);
2022 Scale2
= LaneSize
/ DestLaneSize
;
2023 Src2
= DAG
.getBitcast(VecT
, Src2
);
2027 assert(DestLaneCount
<= 16);
2028 for (size_t I
= 0; I
< DestLaneCount
; ++I
) {
2029 const SDValue
&Lane
= Op
->getOperand(I
);
2030 SDValue Src
= GetShuffleSrc(Lane
);
2031 if (Src
== ShuffleSrc1
) {
2032 Mask
[I
] = Lane
->getConstantOperandVal(1) * Scale1
;
2033 } else if (Src
&& Src
== ShuffleSrc2
) {
2034 Mask
[I
] = DestLaneCount
+ Lane
->getConstantOperandVal(1) * Scale2
;
2039 ArrayRef
<int> MaskRef(Mask
, DestLaneCount
);
2040 Result
= DAG
.getVectorShuffle(VecT
, DL
, Src1
, Src2
, MaskRef
);
2041 IsLaneConstructed
= [&](size_t, const SDValue
&Lane
) {
2042 auto Src
= GetShuffleSrc(Lane
);
2043 return Src
== ShuffleSrc1
|| (Src
&& Src
== ShuffleSrc2
);
2045 } else if (NumConstantLanes
>= NumSplatLanes
) {
2046 SmallVector
<SDValue
, 16> ConstLanes
;
2047 for (const SDValue
&Lane
: Op
->op_values()) {
2048 if (IsConstant(Lane
)) {
2049 ConstLanes
.push_back(Lane
);
2050 } else if (LaneT
.isFloatingPoint()) {
2051 ConstLanes
.push_back(DAG
.getConstantFP(0, DL
, LaneT
));
2053 ConstLanes
.push_back(DAG
.getConstant(0, DL
, LaneT
));
2056 Result
= DAG
.getBuildVector(VecT
, DL
, ConstLanes
);
2057 IsLaneConstructed
= [&IsConstant
](size_t _
, const SDValue
&Lane
) {
2058 return IsConstant(Lane
);
2061 // Use a splat, but possibly a load_splat
2062 LoadSDNode
*SplattedLoad
;
2063 if ((SplattedLoad
= dyn_cast
<LoadSDNode
>(SplatValue
)) &&
2064 SplattedLoad
->getMemoryVT() == VecT
.getVectorElementType()) {
2065 Result
= DAG
.getMemIntrinsicNode(
2066 WebAssemblyISD::LOAD_SPLAT
, DL
, DAG
.getVTList(VecT
),
2067 {SplattedLoad
->getChain(), SplattedLoad
->getBasePtr(),
2068 SplattedLoad
->getOffset()},
2069 SplattedLoad
->getMemoryVT(), SplattedLoad
->getMemOperand());
2071 Result
= DAG
.getSplatBuildVector(VecT
, DL
, SplatValue
);
2073 IsLaneConstructed
= [&SplatValue
](size_t _
, const SDValue
&Lane
) {
2074 return Lane
== SplatValue
;
2079 assert(IsLaneConstructed
);
2081 // Add replace_lane instructions for any unhandled values
2082 for (size_t I
= 0; I
< Lanes
; ++I
) {
2083 const SDValue
&Lane
= Op
->getOperand(I
);
2084 if (!Lane
.isUndef() && !IsLaneConstructed(I
, Lane
))
2085 Result
= DAG
.getNode(ISD::INSERT_VECTOR_ELT
, DL
, VecT
, Result
, Lane
,
2086 DAG
.getConstant(I
, DL
, MVT::i32
));
2093 WebAssemblyTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op
,
2094 SelectionDAG
&DAG
) const {
2096 ArrayRef
<int> Mask
= cast
<ShuffleVectorSDNode
>(Op
.getNode())->getMask();
2097 MVT VecType
= Op
.getOperand(0).getSimpleValueType();
2098 assert(VecType
.is128BitVector() && "Unexpected shuffle vector type");
2099 size_t LaneBytes
= VecType
.getVectorElementType().getSizeInBits() / 8;
2101 // Space for two vector args and sixteen mask indices
2104 Ops
[OpIdx
++] = Op
.getOperand(0);
2105 Ops
[OpIdx
++] = Op
.getOperand(1);
2107 // Expand mask indices to byte indices and materialize them as operands
2108 for (int M
: Mask
) {
2109 for (size_t J
= 0; J
< LaneBytes
; ++J
) {
2110 // Lower undefs (represented by -1 in mask) to zero
2111 uint64_t ByteIndex
= M
== -1 ? 0 : (uint64_t)M
* LaneBytes
+ J
;
2112 Ops
[OpIdx
++] = DAG
.getConstant(ByteIndex
, DL
, MVT::i32
);
2116 return DAG
.getNode(WebAssemblyISD::SHUFFLE
, DL
, Op
.getValueType(), Ops
);
2119 SDValue
WebAssemblyTargetLowering::LowerSETCC(SDValue Op
,
2120 SelectionDAG
&DAG
) const {
2122 // The legalizer does not know how to expand the unsupported comparison modes
2123 // of i64x2 vectors, so we manually unroll them here.
2124 assert(Op
->getOperand(0)->getSimpleValueType(0) == MVT::v2i64
);
2125 SmallVector
<SDValue
, 2> LHS
, RHS
;
2126 DAG
.ExtractVectorElements(Op
->getOperand(0), LHS
);
2127 DAG
.ExtractVectorElements(Op
->getOperand(1), RHS
);
2128 const SDValue
&CC
= Op
->getOperand(2);
2129 auto MakeLane
= [&](unsigned I
) {
2130 return DAG
.getNode(ISD::SELECT_CC
, DL
, MVT::i64
, LHS
[I
], RHS
[I
],
2131 DAG
.getConstant(uint64_t(-1), DL
, MVT::i64
),
2132 DAG
.getConstant(uint64_t(0), DL
, MVT::i64
), CC
);
2134 return DAG
.getBuildVector(Op
->getValueType(0), DL
,
2135 {MakeLane(0), MakeLane(1)});
2139 WebAssemblyTargetLowering::LowerAccessVectorElement(SDValue Op
,
2140 SelectionDAG
&DAG
) const {
2141 // Allow constant lane indices, expand variable lane indices
2142 SDNode
*IdxNode
= Op
.getOperand(Op
.getNumOperands() - 1).getNode();
2143 if (isa
<ConstantSDNode
>(IdxNode
) || IdxNode
->isUndef())
2146 // Perform default expansion
2150 static SDValue
unrollVectorShift(SDValue Op
, SelectionDAG
&DAG
) {
2151 EVT LaneT
= Op
.getSimpleValueType().getVectorElementType();
2152 // 32-bit and 64-bit unrolled shifts will have proper semantics
2153 if (LaneT
.bitsGE(MVT::i32
))
2154 return DAG
.UnrollVectorOp(Op
.getNode());
2155 // Otherwise mask the shift value to get proper semantics from 32-bit shift
2157 size_t NumLanes
= Op
.getSimpleValueType().getVectorNumElements();
2158 SDValue Mask
= DAG
.getConstant(LaneT
.getSizeInBits() - 1, DL
, MVT::i32
);
2159 unsigned ShiftOpcode
= Op
.getOpcode();
2160 SmallVector
<SDValue
, 16> ShiftedElements
;
2161 DAG
.ExtractVectorElements(Op
.getOperand(0), ShiftedElements
, 0, 0, MVT::i32
);
2162 SmallVector
<SDValue
, 16> ShiftElements
;
2163 DAG
.ExtractVectorElements(Op
.getOperand(1), ShiftElements
, 0, 0, MVT::i32
);
2164 SmallVector
<SDValue
, 16> UnrolledOps
;
2165 for (size_t i
= 0; i
< NumLanes
; ++i
) {
2166 SDValue MaskedShiftValue
=
2167 DAG
.getNode(ISD::AND
, DL
, MVT::i32
, ShiftElements
[i
], Mask
);
2168 SDValue ShiftedValue
= ShiftedElements
[i
];
2169 if (ShiftOpcode
== ISD::SRA
)
2170 ShiftedValue
= DAG
.getNode(ISD::SIGN_EXTEND_INREG
, DL
, MVT::i32
,
2171 ShiftedValue
, DAG
.getValueType(LaneT
));
2172 UnrolledOps
.push_back(
2173 DAG
.getNode(ShiftOpcode
, DL
, MVT::i32
, ShiftedValue
, MaskedShiftValue
));
2175 return DAG
.getBuildVector(Op
.getValueType(), DL
, UnrolledOps
);
2178 SDValue
WebAssemblyTargetLowering::LowerShift(SDValue Op
,
2179 SelectionDAG
&DAG
) const {
2182 // Only manually lower vector shifts
2183 assert(Op
.getSimpleValueType().isVector());
2185 auto ShiftVal
= DAG
.getSplatValue(Op
.getOperand(1));
2187 return unrollVectorShift(Op
, DAG
);
2189 // Use anyext because none of the high bits can affect the shift
2190 ShiftVal
= DAG
.getAnyExtOrTrunc(ShiftVal
, DL
, MVT::i32
);
2193 switch (Op
.getOpcode()) {
2195 Opcode
= WebAssemblyISD::VEC_SHL
;
2198 Opcode
= WebAssemblyISD::VEC_SHR_S
;
2201 Opcode
= WebAssemblyISD::VEC_SHR_U
;
2204 llvm_unreachable("unexpected opcode");
2207 return DAG
.getNode(Opcode
, DL
, Op
.getValueType(), Op
.getOperand(0), ShiftVal
);
2210 SDValue
WebAssemblyTargetLowering::LowerFP_TO_INT_SAT(SDValue Op
,
2211 SelectionDAG
&DAG
) const {
2213 EVT ResT
= Op
.getValueType();
2214 EVT SatVT
= cast
<VTSDNode
>(Op
.getOperand(1))->getVT();
2216 if ((ResT
== MVT::i32
|| ResT
== MVT::i64
) &&
2217 (SatVT
== MVT::i32
|| SatVT
== MVT::i64
))
2220 if (ResT
== MVT::v4i32
&& SatVT
== MVT::i32
)
2226 //===----------------------------------------------------------------------===//
2227 // Custom DAG combine hooks
2228 //===----------------------------------------------------------------------===//
2230 performVECTOR_SHUFFLECombine(SDNode
*N
, TargetLowering::DAGCombinerInfo
&DCI
) {
2231 auto &DAG
= DCI
.DAG
;
2232 auto Shuffle
= cast
<ShuffleVectorSDNode
>(N
);
2234 // Hoist vector bitcasts that don't change the number of lanes out of unary
2235 // shuffles, where they are less likely to get in the way of other combines.
2236 // (shuffle (vNxT1 (bitcast (vNxT0 x))), undef, mask) ->
2237 // (vNxT1 (bitcast (vNxT0 (shuffle x, undef, mask))))
2238 SDValue Bitcast
= N
->getOperand(0);
2239 if (Bitcast
.getOpcode() != ISD::BITCAST
)
2241 if (!N
->getOperand(1).isUndef())
2243 SDValue CastOp
= Bitcast
.getOperand(0);
2244 MVT SrcType
= CastOp
.getSimpleValueType();
2245 MVT DstType
= Bitcast
.getSimpleValueType();
2246 if (!SrcType
.is128BitVector() ||
2247 SrcType
.getVectorNumElements() != DstType
.getVectorNumElements())
2249 SDValue NewShuffle
= DAG
.getVectorShuffle(
2250 SrcType
, SDLoc(N
), CastOp
, DAG
.getUNDEF(SrcType
), Shuffle
->getMask());
2251 return DAG
.getBitcast(DstType
, NewShuffle
);
2255 performVectorExtendCombine(SDNode
*N
, TargetLowering::DAGCombinerInfo
&DCI
) {
2256 auto &DAG
= DCI
.DAG
;
2257 assert(N
->getOpcode() == ISD::SIGN_EXTEND
||
2258 N
->getOpcode() == ISD::ZERO_EXTEND
);
2260 // Combine ({s,z}ext (extract_subvector src, i)) into a widening operation if
2261 // possible before the extract_subvector can be expanded.
2262 auto Extract
= N
->getOperand(0);
2263 if (Extract
.getOpcode() != ISD::EXTRACT_SUBVECTOR
)
2265 auto Source
= Extract
.getOperand(0);
2266 auto *IndexNode
= dyn_cast
<ConstantSDNode
>(Extract
.getOperand(1));
2267 if (IndexNode
== nullptr)
2269 auto Index
= IndexNode
->getZExtValue();
2271 // Only v8i8, v4i16, and v2i32 extracts can be widened, and only if the
2272 // extracted subvector is the low or high half of its source.
2273 EVT ResVT
= N
->getValueType(0);
2274 if (ResVT
== MVT::v8i16
) {
2275 if (Extract
.getValueType() != MVT::v8i8
||
2276 Source
.getValueType() != MVT::v16i8
|| (Index
!= 0 && Index
!= 8))
2278 } else if (ResVT
== MVT::v4i32
) {
2279 if (Extract
.getValueType() != MVT::v4i16
||
2280 Source
.getValueType() != MVT::v8i16
|| (Index
!= 0 && Index
!= 4))
2282 } else if (ResVT
== MVT::v2i64
) {
2283 if (Extract
.getValueType() != MVT::v2i32
||
2284 Source
.getValueType() != MVT::v4i32
|| (Index
!= 0 && Index
!= 2))
2290 bool IsSext
= N
->getOpcode() == ISD::SIGN_EXTEND
;
2291 bool IsLow
= Index
== 0;
2293 unsigned Op
= IsSext
? (IsLow
? WebAssemblyISD::EXTEND_LOW_S
2294 : WebAssemblyISD::EXTEND_HIGH_S
)
2295 : (IsLow
? WebAssemblyISD::EXTEND_LOW_U
2296 : WebAssemblyISD::EXTEND_HIGH_U
);
2298 return DAG
.getNode(Op
, SDLoc(N
), ResVT
, Source
);
2302 performVectorTruncZeroCombine(SDNode
*N
, TargetLowering::DAGCombinerInfo
&DCI
) {
2303 auto &DAG
= DCI
.DAG
;
2305 auto GetWasmConversionOp
= [](unsigned Op
) {
2307 case ISD::FP_TO_SINT_SAT
:
2308 return WebAssemblyISD::TRUNC_SAT_ZERO_S
;
2309 case ISD::FP_TO_UINT_SAT
:
2310 return WebAssemblyISD::TRUNC_SAT_ZERO_U
;
2312 return WebAssemblyISD::DEMOTE_ZERO
;
2314 llvm_unreachable("unexpected op");
2317 auto IsZeroSplat
= [](SDValue SplatVal
) {
2318 auto *Splat
= dyn_cast
<BuildVectorSDNode
>(SplatVal
.getNode());
2319 APInt SplatValue
, SplatUndef
;
2320 unsigned SplatBitSize
;
2323 Splat
->isConstantSplat(SplatValue
, SplatUndef
, SplatBitSize
,
2328 if (N
->getOpcode() == ISD::CONCAT_VECTORS
) {
2331 // (concat_vectors (v2i32 (fp_to_{s,u}int_sat $x, 32)), (v2i32 (splat 0)))
2333 // into (i32x4.trunc_sat_f64x2_zero_{s,u} $x).
2337 // (concat_vectors (v2f32 (fp_round (v2f64 $x))), (v2f32 (splat 0)))
2339 // into (f32x4.demote_zero_f64x2 $x).
2341 EVT ExpectedConversionType
;
2342 auto Conversion
= N
->getOperand(0);
2343 auto ConversionOp
= Conversion
.getOpcode();
2344 switch (ConversionOp
) {
2345 case ISD::FP_TO_SINT_SAT
:
2346 case ISD::FP_TO_UINT_SAT
:
2348 ExpectedConversionType
= MVT::v2i32
;
2352 ExpectedConversionType
= MVT::v2f32
;
2358 if (N
->getValueType(0) != ResVT
)
2361 if (Conversion
.getValueType() != ExpectedConversionType
)
2364 auto Source
= Conversion
.getOperand(0);
2365 if (Source
.getValueType() != MVT::v2f64
)
2368 if (!IsZeroSplat(N
->getOperand(1)) ||
2369 N
->getOperand(1).getValueType() != ExpectedConversionType
)
2372 unsigned Op
= GetWasmConversionOp(ConversionOp
);
2373 return DAG
.getNode(Op
, SDLoc(N
), ResVT
, Source
);
2378 // (fp_to_{s,u}int_sat (concat_vectors $x, (v2f64 (splat 0))), 32)
2380 // into (i32x4.trunc_sat_f64x2_zero_{s,u} $x).
2384 // (v4f32 (fp_round (concat_vectors $x, (v2f64 (splat 0)))))
2386 // into (f32x4.demote_zero_f64x2 $x).
2388 auto ConversionOp
= N
->getOpcode();
2389 switch (ConversionOp
) {
2390 case ISD::FP_TO_SINT_SAT
:
2391 case ISD::FP_TO_UINT_SAT
:
2398 llvm_unreachable("unexpected op");
2401 if (N
->getValueType(0) != ResVT
)
2404 auto Concat
= N
->getOperand(0);
2405 if (Concat
.getValueType() != MVT::v4f64
)
2408 auto Source
= Concat
.getOperand(0);
2409 if (Source
.getValueType() != MVT::v2f64
)
2412 if (!IsZeroSplat(Concat
.getOperand(1)) ||
2413 Concat
.getOperand(1).getValueType() != MVT::v2f64
)
2416 unsigned Op
= GetWasmConversionOp(ConversionOp
);
2417 return DAG
.getNode(Op
, SDLoc(N
), ResVT
, Source
);
2421 WebAssemblyTargetLowering::PerformDAGCombine(SDNode
*N
,
2422 DAGCombinerInfo
&DCI
) const {
2423 switch (N
->getOpcode()) {
2426 case ISD::VECTOR_SHUFFLE
:
2427 return performVECTOR_SHUFFLECombine(N
, DCI
);
2428 case ISD::SIGN_EXTEND
:
2429 case ISD::ZERO_EXTEND
:
2430 return performVectorExtendCombine(N
, DCI
);
2431 case ISD::FP_TO_SINT_SAT
:
2432 case ISD::FP_TO_UINT_SAT
:
2434 case ISD::CONCAT_VECTORS
:
2435 return performVectorTruncZeroCombine(N
, DCI
);