1 //=- WebAssemblyISelLowering.cpp - WebAssembly DAG Lowering Implementation -==//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 /// This file implements the WebAssemblyTargetLowering class.
12 //===----------------------------------------------------------------------===//
14 #include "WebAssemblyISelLowering.h"
15 #include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
16 #include "Utils/WebAssemblyTypeUtilities.h"
17 #include "Utils/WebAssemblyUtilities.h"
18 #include "WebAssemblyMachineFunctionInfo.h"
19 #include "WebAssemblySubtarget.h"
20 #include "WebAssemblyTargetMachine.h"
21 #include "llvm/CodeGen/CallingConvLower.h"
22 #include "llvm/CodeGen/MachineInstrBuilder.h"
23 #include "llvm/CodeGen/MachineJumpTableInfo.h"
24 #include "llvm/CodeGen/MachineModuleInfo.h"
25 #include "llvm/CodeGen/MachineRegisterInfo.h"
26 #include "llvm/CodeGen/SelectionDAG.h"
27 #include "llvm/CodeGen/SelectionDAGNodes.h"
28 #include "llvm/IR/DiagnosticInfo.h"
29 #include "llvm/IR/DiagnosticPrinter.h"
30 #include "llvm/IR/Function.h"
31 #include "llvm/IR/Intrinsics.h"
32 #include "llvm/IR/IntrinsicsWebAssembly.h"
33 #include "llvm/Support/Debug.h"
34 #include "llvm/Support/ErrorHandling.h"
35 #include "llvm/Support/KnownBits.h"
36 #include "llvm/Support/MathExtras.h"
37 #include "llvm/Support/raw_ostream.h"
38 #include "llvm/Target/TargetOptions.h"
41 #define DEBUG_TYPE "wasm-lower"
43 WebAssemblyTargetLowering::WebAssemblyTargetLowering(
44 const TargetMachine
&TM
, const WebAssemblySubtarget
&STI
)
45 : TargetLowering(TM
), Subtarget(&STI
) {
46 auto MVTPtr
= Subtarget
->hasAddr64() ? MVT::i64
: MVT::i32
;
48 // Booleans always contain 0 or 1.
49 setBooleanContents(ZeroOrOneBooleanContent
);
50 // Except in SIMD vectors
51 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent
);
52 // We don't know the microarchitecture here, so just reduce register pressure.
53 setSchedulingPreference(Sched::RegPressure
);
54 // Tell ISel that we have a stack pointer.
55 setStackPointerRegisterToSaveRestore(
56 Subtarget
->hasAddr64() ? WebAssembly::SP64
: WebAssembly::SP32
);
57 // Set up the register classes.
58 addRegisterClass(MVT::i32
, &WebAssembly::I32RegClass
);
59 addRegisterClass(MVT::i64
, &WebAssembly::I64RegClass
);
60 addRegisterClass(MVT::f32
, &WebAssembly::F32RegClass
);
61 addRegisterClass(MVT::f64
, &WebAssembly::F64RegClass
);
62 if (Subtarget
->hasSIMD128()) {
63 addRegisterClass(MVT::v16i8
, &WebAssembly::V128RegClass
);
64 addRegisterClass(MVT::v8i16
, &WebAssembly::V128RegClass
);
65 addRegisterClass(MVT::v4i32
, &WebAssembly::V128RegClass
);
66 addRegisterClass(MVT::v4f32
, &WebAssembly::V128RegClass
);
67 addRegisterClass(MVT::v2i64
, &WebAssembly::V128RegClass
);
68 addRegisterClass(MVT::v2f64
, &WebAssembly::V128RegClass
);
70 if (Subtarget
->hasReferenceTypes()) {
71 addRegisterClass(MVT::externref
, &WebAssembly::EXTERNREFRegClass
);
72 addRegisterClass(MVT::funcref
, &WebAssembly::FUNCREFRegClass
);
74 // Compute derived properties from the register classes.
75 computeRegisterProperties(Subtarget
->getRegisterInfo());
77 // Transform loads and stores to pointers in address space 1 to loads and
78 // stores to WebAssembly global variables, outside linear memory.
79 for (auto T
: {MVT::i32
, MVT::i64
, MVT::f32
, MVT::f64
}) {
80 setOperationAction(ISD::LOAD
, T
, Custom
);
81 setOperationAction(ISD::STORE
, T
, Custom
);
83 if (Subtarget
->hasSIMD128()) {
84 for (auto T
: {MVT::v16i8
, MVT::v8i16
, MVT::v4i32
, MVT::v4f32
, MVT::v2i64
,
86 setOperationAction(ISD::LOAD
, T
, Custom
);
87 setOperationAction(ISD::STORE
, T
, Custom
);
90 if (Subtarget
->hasReferenceTypes()) {
91 // We need custom load and store lowering for both externref, funcref and
92 // Other. The MVT::Other here represents tables of reference types.
93 for (auto T
: {MVT::externref
, MVT::funcref
, MVT::Other
}) {
94 setOperationAction(ISD::LOAD
, T
, Custom
);
95 setOperationAction(ISD::STORE
, T
, Custom
);
99 setOperationAction(ISD::GlobalAddress
, MVTPtr
, Custom
);
100 setOperationAction(ISD::GlobalTLSAddress
, MVTPtr
, Custom
);
101 setOperationAction(ISD::ExternalSymbol
, MVTPtr
, Custom
);
102 setOperationAction(ISD::JumpTable
, MVTPtr
, Custom
);
103 setOperationAction(ISD::BlockAddress
, MVTPtr
, Custom
);
104 setOperationAction(ISD::BRIND
, MVT::Other
, Custom
);
106 // Take the default expansion for va_arg, va_copy, and va_end. There is no
107 // default action for va_start, so we do that custom.
108 setOperationAction(ISD::VASTART
, MVT::Other
, Custom
);
109 setOperationAction(ISD::VAARG
, MVT::Other
, Expand
);
110 setOperationAction(ISD::VACOPY
, MVT::Other
, Expand
);
111 setOperationAction(ISD::VAEND
, MVT::Other
, Expand
);
113 for (auto T
: {MVT::f32
, MVT::f64
, MVT::v4f32
, MVT::v2f64
}) {
114 // Don't expand the floating-point types to constant pools.
115 setOperationAction(ISD::ConstantFP
, T
, Legal
);
116 // Expand floating-point comparisons.
117 for (auto CC
: {ISD::SETO
, ISD::SETUO
, ISD::SETUEQ
, ISD::SETONE
,
118 ISD::SETULT
, ISD::SETULE
, ISD::SETUGT
, ISD::SETUGE
})
119 setCondCodeAction(CC
, T
, Expand
);
120 // Expand floating-point library function operators.
122 {ISD::FSIN
, ISD::FCOS
, ISD::FSINCOS
, ISD::FPOW
, ISD::FREM
, ISD::FMA
})
123 setOperationAction(Op
, T
, Expand
);
124 // Note supported floating-point library function operators that otherwise
125 // default to expand.
127 {ISD::FCEIL
, ISD::FFLOOR
, ISD::FTRUNC
, ISD::FNEARBYINT
, ISD::FRINT
})
128 setOperationAction(Op
, T
, Legal
);
129 // Support minimum and maximum, which otherwise default to expand.
130 setOperationAction(ISD::FMINIMUM
, T
, Legal
);
131 setOperationAction(ISD::FMAXIMUM
, T
, Legal
);
132 // WebAssembly currently has no builtin f16 support.
133 setOperationAction(ISD::FP16_TO_FP
, T
, Expand
);
134 setOperationAction(ISD::FP_TO_FP16
, T
, Expand
);
135 setLoadExtAction(ISD::EXTLOAD
, T
, MVT::f16
, Expand
);
136 setTruncStoreAction(T
, MVT::f16
, Expand
);
139 // Expand unavailable integer operations.
141 {ISD::BSWAP
, ISD::SMUL_LOHI
, ISD::UMUL_LOHI
, ISD::MULHS
, ISD::MULHU
,
142 ISD::SDIVREM
, ISD::UDIVREM
, ISD::SHL_PARTS
, ISD::SRA_PARTS
,
143 ISD::SRL_PARTS
, ISD::ADDC
, ISD::ADDE
, ISD::SUBC
, ISD::SUBE
}) {
144 for (auto T
: {MVT::i32
, MVT::i64
})
145 setOperationAction(Op
, T
, Expand
);
146 if (Subtarget
->hasSIMD128())
147 for (auto T
: {MVT::v16i8
, MVT::v8i16
, MVT::v4i32
, MVT::v2i64
})
148 setOperationAction(Op
, T
, Expand
);
151 if (Subtarget
->hasNontrappingFPToInt())
152 for (auto Op
: {ISD::FP_TO_SINT_SAT
, ISD::FP_TO_UINT_SAT
})
153 for (auto T
: {MVT::i32
, MVT::i64
})
154 setOperationAction(Op
, T
, Custom
);
156 // SIMD-specific configuration
157 if (Subtarget
->hasSIMD128()) {
158 // Hoist bitcasts out of shuffles
159 setTargetDAGCombine(ISD::VECTOR_SHUFFLE
);
161 // Combine extends of extract_subvectors into widening ops
162 setTargetDAGCombine(ISD::SIGN_EXTEND
);
163 setTargetDAGCombine(ISD::ZERO_EXTEND
);
165 // Combine int_to_fp or fp_extend of extract_vectors and vice versa into
167 setTargetDAGCombine(ISD::SINT_TO_FP
);
168 setTargetDAGCombine(ISD::UINT_TO_FP
);
169 setTargetDAGCombine(ISD::FP_EXTEND
);
170 setTargetDAGCombine(ISD::EXTRACT_SUBVECTOR
);
172 // Combine fp_to_{s,u}int_sat or fp_round of concat_vectors or vice versa
173 // into conversion ops
174 setTargetDAGCombine(ISD::FP_TO_SINT_SAT
);
175 setTargetDAGCombine(ISD::FP_TO_UINT_SAT
);
176 setTargetDAGCombine(ISD::FP_ROUND
);
177 setTargetDAGCombine(ISD::CONCAT_VECTORS
);
179 setTargetDAGCombine(ISD::TRUNCATE
);
181 // Support saturating add for i8x16 and i16x8
182 for (auto Op
: {ISD::SADDSAT
, ISD::UADDSAT
})
183 for (auto T
: {MVT::v16i8
, MVT::v8i16
})
184 setOperationAction(Op
, T
, Legal
);
186 // Support integer abs
187 for (auto T
: {MVT::v16i8
, MVT::v8i16
, MVT::v4i32
, MVT::v2i64
})
188 setOperationAction(ISD::ABS
, T
, Legal
);
190 // Custom lower BUILD_VECTORs to minimize number of replace_lanes
191 for (auto T
: {MVT::v16i8
, MVT::v8i16
, MVT::v4i32
, MVT::v4f32
, MVT::v2i64
,
193 setOperationAction(ISD::BUILD_VECTOR
, T
, Custom
);
195 // We have custom shuffle lowering to expose the shuffle mask
196 for (auto T
: {MVT::v16i8
, MVT::v8i16
, MVT::v4i32
, MVT::v4f32
, MVT::v2i64
,
198 setOperationAction(ISD::VECTOR_SHUFFLE
, T
, Custom
);
200 // Custom lowering since wasm shifts must have a scalar shift amount
201 for (auto Op
: {ISD::SHL
, ISD::SRA
, ISD::SRL
})
202 for (auto T
: {MVT::v16i8
, MVT::v8i16
, MVT::v4i32
, MVT::v2i64
})
203 setOperationAction(Op
, T
, Custom
);
205 // Custom lower lane accesses to expand out variable indices
206 for (auto Op
: {ISD::EXTRACT_VECTOR_ELT
, ISD::INSERT_VECTOR_ELT
})
207 for (auto T
: {MVT::v16i8
, MVT::v8i16
, MVT::v4i32
, MVT::v4f32
, MVT::v2i64
,
209 setOperationAction(Op
, T
, Custom
);
211 // There is no i8x16.mul instruction
212 setOperationAction(ISD::MUL
, MVT::v16i8
, Expand
);
214 // There is no vector conditional select instruction
215 for (auto T
: {MVT::v16i8
, MVT::v8i16
, MVT::v4i32
, MVT::v4f32
, MVT::v2i64
,
217 setOperationAction(ISD::SELECT_CC
, T
, Expand
);
219 // Expand integer operations supported for scalars but not SIMD
221 {ISD::SDIV
, ISD::UDIV
, ISD::SREM
, ISD::UREM
, ISD::ROTL
, ISD::ROTR
})
222 for (auto T
: {MVT::v16i8
, MVT::v8i16
, MVT::v4i32
, MVT::v2i64
})
223 setOperationAction(Op
, T
, Expand
);
225 // But we do have integer min and max operations
226 for (auto Op
: {ISD::SMIN
, ISD::SMAX
, ISD::UMIN
, ISD::UMAX
})
227 for (auto T
: {MVT::v16i8
, MVT::v8i16
, MVT::v4i32
})
228 setOperationAction(Op
, T
, Legal
);
230 // And we have popcnt for i8x16. It can be used to expand ctlz/cttz.
231 setOperationAction(ISD::CTPOP
, MVT::v16i8
, Legal
);
232 setOperationAction(ISD::CTLZ
, MVT::v16i8
, Expand
);
233 setOperationAction(ISD::CTTZ
, MVT::v16i8
, Expand
);
235 // Custom lower bit counting operations for other types to scalarize them.
236 for (auto Op
: {ISD::CTLZ
, ISD::CTTZ
, ISD::CTPOP
})
237 for (auto T
: {MVT::v8i16
, MVT::v4i32
, MVT::v2i64
})
238 setOperationAction(Op
, T
, Custom
);
240 // Expand float operations supported for scalars but not SIMD
241 for (auto Op
: {ISD::FCOPYSIGN
, ISD::FLOG
, ISD::FLOG2
, ISD::FLOG10
,
242 ISD::FEXP
, ISD::FEXP2
, ISD::FRINT
})
243 for (auto T
: {MVT::v4f32
, MVT::v2f64
})
244 setOperationAction(Op
, T
, Expand
);
246 // Unsigned comparison operations are unavailable for i64x2 vectors.
247 for (auto CC
: {ISD::SETUGT
, ISD::SETUGE
, ISD::SETULT
, ISD::SETULE
})
248 setCondCodeAction(CC
, MVT::v2i64
, Custom
);
250 // 64x2 conversions are not in the spec
252 {ISD::SINT_TO_FP
, ISD::UINT_TO_FP
, ISD::FP_TO_SINT
, ISD::FP_TO_UINT
})
253 for (auto T
: {MVT::v2i64
, MVT::v2f64
})
254 setOperationAction(Op
, T
, Expand
);
256 // But saturating fp_to_int converstions are
257 for (auto Op
: {ISD::FP_TO_SINT_SAT
, ISD::FP_TO_UINT_SAT
})
258 setOperationAction(Op
, MVT::v4i32
, Custom
);
261 // As a special case, these operators use the type to mean the type to
263 setOperationAction(ISD::SIGN_EXTEND_INREG
, MVT::i1
, Expand
);
264 if (!Subtarget
->hasSignExt()) {
265 // Sign extends are legal only when extending a vector extract
266 auto Action
= Subtarget
->hasSIMD128() ? Custom
: Expand
;
267 for (auto T
: {MVT::i8
, MVT::i16
, MVT::i32
})
268 setOperationAction(ISD::SIGN_EXTEND_INREG
, T
, Action
);
270 for (auto T
: MVT::integer_fixedlen_vector_valuetypes())
271 setOperationAction(ISD::SIGN_EXTEND_INREG
, T
, Expand
);
273 // Dynamic stack allocation: use the default expansion.
274 setOperationAction(ISD::STACKSAVE
, MVT::Other
, Expand
);
275 setOperationAction(ISD::STACKRESTORE
, MVT::Other
, Expand
);
276 setOperationAction(ISD::DYNAMIC_STACKALLOC
, MVTPtr
, Expand
);
278 setOperationAction(ISD::FrameIndex
, MVT::i32
, Custom
);
279 setOperationAction(ISD::FrameIndex
, MVT::i64
, Custom
);
280 setOperationAction(ISD::CopyToReg
, MVT::Other
, Custom
);
282 // Expand these forms; we pattern-match the forms that we can handle in isel.
283 for (auto T
: {MVT::i32
, MVT::i64
, MVT::f32
, MVT::f64
})
284 for (auto Op
: {ISD::BR_CC
, ISD::SELECT_CC
})
285 setOperationAction(Op
, T
, Expand
);
287 // We have custom switch handling.
288 setOperationAction(ISD::BR_JT
, MVT::Other
, Custom
);
290 // WebAssembly doesn't have:
291 // - Floating-point extending loads.
292 // - Floating-point truncating stores.
293 // - i1 extending loads.
294 // - truncating SIMD stores and most extending loads
295 setLoadExtAction(ISD::EXTLOAD
, MVT::f64
, MVT::f32
, Expand
);
296 setTruncStoreAction(MVT::f64
, MVT::f32
, Expand
);
297 for (auto T
: MVT::integer_valuetypes())
298 for (auto Ext
: {ISD::EXTLOAD
, ISD::ZEXTLOAD
, ISD::SEXTLOAD
})
299 setLoadExtAction(Ext
, T
, MVT::i1
, Promote
);
300 if (Subtarget
->hasSIMD128()) {
301 for (auto T
: {MVT::v16i8
, MVT::v8i16
, MVT::v4i32
, MVT::v2i64
, MVT::v4f32
,
303 for (auto MemT
: MVT::fixedlen_vector_valuetypes()) {
304 if (MVT(T
) != MemT
) {
305 setTruncStoreAction(T
, MemT
, Expand
);
306 for (auto Ext
: {ISD::EXTLOAD
, ISD::ZEXTLOAD
, ISD::SEXTLOAD
})
307 setLoadExtAction(Ext
, T
, MemT
, Expand
);
311 // But some vector extending loads are legal
312 for (auto Ext
: {ISD::EXTLOAD
, ISD::SEXTLOAD
, ISD::ZEXTLOAD
}) {
313 setLoadExtAction(Ext
, MVT::v8i16
, MVT::v8i8
, Legal
);
314 setLoadExtAction(Ext
, MVT::v4i32
, MVT::v4i16
, Legal
);
315 setLoadExtAction(Ext
, MVT::v2i64
, MVT::v2i32
, Legal
);
317 setLoadExtAction(ISD::EXTLOAD
, MVT::v2f64
, MVT::v2f32
, Legal
);
320 // Don't do anything clever with build_pairs
321 setOperationAction(ISD::BUILD_PAIR
, MVT::i64
, Expand
);
323 // Trap lowers to wasm unreachable
324 setOperationAction(ISD::TRAP
, MVT::Other
, Legal
);
325 setOperationAction(ISD::DEBUGTRAP
, MVT::Other
, Legal
);
327 // Exception handling intrinsics
328 setOperationAction(ISD::INTRINSIC_WO_CHAIN
, MVT::Other
, Custom
);
329 setOperationAction(ISD::INTRINSIC_W_CHAIN
, MVT::Other
, Custom
);
330 setOperationAction(ISD::INTRINSIC_VOID
, MVT::Other
, Custom
);
332 setMaxAtomicSizeInBitsSupported(64);
334 // Override the __gnu_f2h_ieee/__gnu_h2f_ieee names so that the f32 name is
335 // consistent with the f64 and f128 names.
336 setLibcallName(RTLIB::FPEXT_F16_F32
, "__extendhfsf2");
337 setLibcallName(RTLIB::FPROUND_F32_F16
, "__truncsfhf2");
339 // Define the emscripten name for return address helper.
340 // TODO: when implementing other Wasm backends, make this generic or only do
341 // this on emscripten depending on what they end up doing.
342 setLibcallName(RTLIB::RETURN_ADDRESS
, "emscripten_return_address");
344 // Always convert switches to br_tables unless there is only one case, which
345 // is equivalent to a simple branch. This reduces code size for wasm, and we
346 // defer possible jump table optimizations to the VM.
347 setMinimumJumpTableEntries(2);
350 MVT
WebAssemblyTargetLowering::getPointerTy(const DataLayout
&DL
,
352 if (AS
== WebAssembly::WasmAddressSpace::WASM_ADDRESS_SPACE_EXTERNREF
)
353 return MVT::externref
;
354 if (AS
== WebAssembly::WasmAddressSpace::WASM_ADDRESS_SPACE_FUNCREF
)
356 return TargetLowering::getPointerTy(DL
, AS
);
359 MVT
WebAssemblyTargetLowering::getPointerMemTy(const DataLayout
&DL
,
361 if (AS
== WebAssembly::WasmAddressSpace::WASM_ADDRESS_SPACE_EXTERNREF
)
362 return MVT::externref
;
363 if (AS
== WebAssembly::WasmAddressSpace::WASM_ADDRESS_SPACE_FUNCREF
)
365 return TargetLowering::getPointerMemTy(DL
, AS
);
368 TargetLowering::AtomicExpansionKind
369 WebAssemblyTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst
*AI
) const {
370 // We have wasm instructions for these
371 switch (AI
->getOperation()) {
372 case AtomicRMWInst::Add
:
373 case AtomicRMWInst::Sub
:
374 case AtomicRMWInst::And
:
375 case AtomicRMWInst::Or
:
376 case AtomicRMWInst::Xor
:
377 case AtomicRMWInst::Xchg
:
378 return AtomicExpansionKind::None
;
382 return AtomicExpansionKind::CmpXChg
;
385 bool WebAssemblyTargetLowering::shouldScalarizeBinop(SDValue VecOp
) const {
386 // Implementation copied from X86TargetLowering.
387 unsigned Opc
= VecOp
.getOpcode();
389 // Assume target opcodes can't be scalarized.
390 // TODO - do we have any exceptions?
391 if (Opc
>= ISD::BUILTIN_OP_END
)
394 // If the vector op is not supported, try to convert to scalar.
395 EVT VecVT
= VecOp
.getValueType();
396 if (!isOperationLegalOrCustomOrPromote(Opc
, VecVT
))
399 // If the vector op is supported, but the scalar op is not, the transform may
400 // not be worthwhile.
401 EVT ScalarVT
= VecVT
.getScalarType();
402 return isOperationLegalOrCustomOrPromote(Opc
, ScalarVT
);
405 FastISel
*WebAssemblyTargetLowering::createFastISel(
406 FunctionLoweringInfo
&FuncInfo
, const TargetLibraryInfo
*LibInfo
) const {
407 return WebAssembly::createFastISel(FuncInfo
, LibInfo
);
410 MVT
WebAssemblyTargetLowering::getScalarShiftAmountTy(const DataLayout
& /*DL*/,
412 unsigned BitWidth
= NextPowerOf2(VT
.getSizeInBits() - 1);
413 if (BitWidth
> 1 && BitWidth
< 8)
417 // The shift will be lowered to a libcall, and compiler-rt libcalls expect
418 // the count to be an i32.
420 assert(BitWidth
>= Log2_32_Ceil(VT
.getSizeInBits()) &&
421 "32-bit shift counts ought to be enough for anyone");
424 MVT Result
= MVT::getIntegerVT(BitWidth
);
425 assert(Result
!= MVT::INVALID_SIMPLE_VALUE_TYPE
&&
426 "Unable to represent scalar shift amount type");
430 // Lower an fp-to-int conversion operator from the LLVM opcode, which has an
431 // undefined result on invalid/overflow, to the WebAssembly opcode, which
432 // traps on invalid/overflow.
433 static MachineBasicBlock
*LowerFPToInt(MachineInstr
&MI
, DebugLoc DL
,
434 MachineBasicBlock
*BB
,
435 const TargetInstrInfo
&TII
,
436 bool IsUnsigned
, bool Int64
,
437 bool Float64
, unsigned LoweredOpcode
) {
438 MachineRegisterInfo
&MRI
= BB
->getParent()->getRegInfo();
440 Register OutReg
= MI
.getOperand(0).getReg();
441 Register InReg
= MI
.getOperand(1).getReg();
443 unsigned Abs
= Float64
? WebAssembly::ABS_F64
: WebAssembly::ABS_F32
;
444 unsigned FConst
= Float64
? WebAssembly::CONST_F64
: WebAssembly::CONST_F32
;
445 unsigned LT
= Float64
? WebAssembly::LT_F64
: WebAssembly::LT_F32
;
446 unsigned GE
= Float64
? WebAssembly::GE_F64
: WebAssembly::GE_F32
;
447 unsigned IConst
= Int64
? WebAssembly::CONST_I64
: WebAssembly::CONST_I32
;
448 unsigned Eqz
= WebAssembly::EQZ_I32
;
449 unsigned And
= WebAssembly::AND_I32
;
450 int64_t Limit
= Int64
? INT64_MIN
: INT32_MIN
;
451 int64_t Substitute
= IsUnsigned
? 0 : Limit
;
452 double CmpVal
= IsUnsigned
? -(double)Limit
* 2.0 : -(double)Limit
;
453 auto &Context
= BB
->getParent()->getFunction().getContext();
454 Type
*Ty
= Float64
? Type::getDoubleTy(Context
) : Type::getFloatTy(Context
);
456 const BasicBlock
*LLVMBB
= BB
->getBasicBlock();
457 MachineFunction
*F
= BB
->getParent();
458 MachineBasicBlock
*TrueMBB
= F
->CreateMachineBasicBlock(LLVMBB
);
459 MachineBasicBlock
*FalseMBB
= F
->CreateMachineBasicBlock(LLVMBB
);
460 MachineBasicBlock
*DoneMBB
= F
->CreateMachineBasicBlock(LLVMBB
);
462 MachineFunction::iterator It
= ++BB
->getIterator();
463 F
->insert(It
, FalseMBB
);
464 F
->insert(It
, TrueMBB
);
465 F
->insert(It
, DoneMBB
);
467 // Transfer the remainder of BB and its successor edges to DoneMBB.
468 DoneMBB
->splice(DoneMBB
->begin(), BB
, std::next(MI
.getIterator()), BB
->end());
469 DoneMBB
->transferSuccessorsAndUpdatePHIs(BB
);
471 BB
->addSuccessor(TrueMBB
);
472 BB
->addSuccessor(FalseMBB
);
473 TrueMBB
->addSuccessor(DoneMBB
);
474 FalseMBB
->addSuccessor(DoneMBB
);
476 unsigned Tmp0
, Tmp1
, CmpReg
, EqzReg
, FalseReg
, TrueReg
;
477 Tmp0
= MRI
.createVirtualRegister(MRI
.getRegClass(InReg
));
478 Tmp1
= MRI
.createVirtualRegister(MRI
.getRegClass(InReg
));
479 CmpReg
= MRI
.createVirtualRegister(&WebAssembly::I32RegClass
);
480 EqzReg
= MRI
.createVirtualRegister(&WebAssembly::I32RegClass
);
481 FalseReg
= MRI
.createVirtualRegister(MRI
.getRegClass(OutReg
));
482 TrueReg
= MRI
.createVirtualRegister(MRI
.getRegClass(OutReg
));
484 MI
.eraseFromParent();
485 // For signed numbers, we can do a single comparison to determine whether
486 // fabs(x) is within range.
490 BuildMI(BB
, DL
, TII
.get(Abs
), Tmp0
).addReg(InReg
);
492 BuildMI(BB
, DL
, TII
.get(FConst
), Tmp1
)
493 .addFPImm(cast
<ConstantFP
>(ConstantFP::get(Ty
, CmpVal
)));
494 BuildMI(BB
, DL
, TII
.get(LT
), CmpReg
).addReg(Tmp0
).addReg(Tmp1
);
496 // For unsigned numbers, we have to do a separate comparison with zero.
498 Tmp1
= MRI
.createVirtualRegister(MRI
.getRegClass(InReg
));
499 Register SecondCmpReg
=
500 MRI
.createVirtualRegister(&WebAssembly::I32RegClass
);
501 Register AndReg
= MRI
.createVirtualRegister(&WebAssembly::I32RegClass
);
502 BuildMI(BB
, DL
, TII
.get(FConst
), Tmp1
)
503 .addFPImm(cast
<ConstantFP
>(ConstantFP::get(Ty
, 0.0)));
504 BuildMI(BB
, DL
, TII
.get(GE
), SecondCmpReg
).addReg(Tmp0
).addReg(Tmp1
);
505 BuildMI(BB
, DL
, TII
.get(And
), AndReg
).addReg(CmpReg
).addReg(SecondCmpReg
);
509 BuildMI(BB
, DL
, TII
.get(Eqz
), EqzReg
).addReg(CmpReg
);
511 // Create the CFG diamond to select between doing the conversion or using
512 // the substitute value.
513 BuildMI(BB
, DL
, TII
.get(WebAssembly::BR_IF
)).addMBB(TrueMBB
).addReg(EqzReg
);
514 BuildMI(FalseMBB
, DL
, TII
.get(LoweredOpcode
), FalseReg
).addReg(InReg
);
515 BuildMI(FalseMBB
, DL
, TII
.get(WebAssembly::BR
)).addMBB(DoneMBB
);
516 BuildMI(TrueMBB
, DL
, TII
.get(IConst
), TrueReg
).addImm(Substitute
);
517 BuildMI(*DoneMBB
, DoneMBB
->begin(), DL
, TII
.get(TargetOpcode::PHI
), OutReg
)
526 static MachineBasicBlock
*
527 LowerCallResults(MachineInstr
&CallResults
, DebugLoc DL
, MachineBasicBlock
*BB
,
528 const WebAssemblySubtarget
*Subtarget
,
529 const TargetInstrInfo
&TII
) {
530 MachineInstr
&CallParams
= *CallResults
.getPrevNode();
531 assert(CallParams
.getOpcode() == WebAssembly::CALL_PARAMS
);
532 assert(CallResults
.getOpcode() == WebAssembly::CALL_RESULTS
||
533 CallResults
.getOpcode() == WebAssembly::RET_CALL_RESULTS
);
535 bool IsIndirect
= CallParams
.getOperand(0).isReg();
536 bool IsRetCall
= CallResults
.getOpcode() == WebAssembly::RET_CALL_RESULTS
;
538 bool IsFuncrefCall
= false;
540 Register Reg
= CallParams
.getOperand(0).getReg();
541 const MachineFunction
*MF
= BB
->getParent();
542 const MachineRegisterInfo
&MRI
= MF
->getRegInfo();
543 const TargetRegisterClass
*TRC
= MRI
.getRegClass(Reg
);
544 IsFuncrefCall
= (TRC
== &WebAssembly::FUNCREFRegClass
);
545 assert(!IsFuncrefCall
|| Subtarget
->hasReferenceTypes());
549 if (IsIndirect
&& IsRetCall
) {
550 CallOp
= WebAssembly::RET_CALL_INDIRECT
;
551 } else if (IsIndirect
) {
552 CallOp
= WebAssembly::CALL_INDIRECT
;
553 } else if (IsRetCall
) {
554 CallOp
= WebAssembly::RET_CALL
;
556 CallOp
= WebAssembly::CALL
;
559 MachineFunction
&MF
= *BB
->getParent();
560 const MCInstrDesc
&MCID
= TII
.get(CallOp
);
561 MachineInstrBuilder
MIB(MF
, MF
.CreateMachineInstr(MCID
, DL
));
563 // See if we must truncate the function pointer.
564 // CALL_INDIRECT takes an i32, but in wasm64 we represent function pointers
565 // as 64-bit for uniformity with other pointer types.
566 // See also: WebAssemblyFastISel::selectCall
567 if (IsIndirect
&& MF
.getSubtarget
<WebAssemblySubtarget
>().hasAddr64()) {
569 MF
.getRegInfo().createVirtualRegister(&WebAssembly::I32RegClass
);
570 auto &FnPtr
= CallParams
.getOperand(0);
571 BuildMI(*BB
, CallResults
.getIterator(), DL
,
572 TII
.get(WebAssembly::I32_WRAP_I64
), Reg32
)
573 .addReg(FnPtr
.getReg());
577 // Move the function pointer to the end of the arguments for indirect calls
579 auto FnPtr
= CallParams
.getOperand(0);
580 CallParams
.RemoveOperand(0);
582 // For funcrefs, call_indirect is done through __funcref_call_table and the
583 // funcref is always installed in slot 0 of the table, therefore instead of having
584 // the function pointer added at the end of the params list, a zero (the index in
585 // __funcref_call_table is added).
588 MF
.getRegInfo().createVirtualRegister(&WebAssembly::I32RegClass
);
589 MachineInstrBuilder MIBC0
=
590 BuildMI(MF
, DL
, TII
.get(WebAssembly::CONST_I32
), RegZero
).addImm(0);
592 BB
->insert(CallResults
.getIterator(), MIBC0
);
593 MachineInstrBuilder(MF
, CallParams
).addReg(RegZero
);
595 CallParams
.addOperand(FnPtr
);
598 for (auto Def
: CallResults
.defs())
602 // Placeholder for the type index.
604 // The table into which this call_indirect indexes.
605 MCSymbolWasm
*Table
= IsFuncrefCall
606 ? WebAssembly::getOrCreateFuncrefCallTableSymbol(
607 MF
.getContext(), Subtarget
)
608 : WebAssembly::getOrCreateFunctionTableSymbol(
609 MF
.getContext(), Subtarget
);
610 if (Subtarget
->hasReferenceTypes()) {
613 // For the MVP there is at most one table whose number is 0, but we can't
614 // write a table symbol or issue relocations. Instead we just ensure the
615 // table is live and write a zero.
621 for (auto Use
: CallParams
.uses())
624 BB
->insert(CallResults
.getIterator(), MIB
);
625 CallParams
.eraseFromParent();
626 CallResults
.eraseFromParent();
628 // If this is a funcref call, to avoid hidden GC roots, we need to clear the
629 // table slot with ref.null upon call_indirect return.
631 // This generates the following code, which comes right after a call_indirect
636 // table.set __funcref_call_table
637 if (IsIndirect
&& IsFuncrefCall
) {
638 MCSymbolWasm
*Table
= WebAssembly::getOrCreateFuncrefCallTableSymbol(
639 MF
.getContext(), Subtarget
);
641 MF
.getRegInfo().createVirtualRegister(&WebAssembly::I32RegClass
);
642 MachineInstr
*Const0
=
643 BuildMI(MF
, DL
, TII
.get(WebAssembly::CONST_I32
), RegZero
).addImm(0);
644 BB
->insertAfter(MIB
.getInstr()->getIterator(), Const0
);
646 Register RegFuncref
=
647 MF
.getRegInfo().createVirtualRegister(&WebAssembly::FUNCREFRegClass
);
648 MachineInstr
*RefNull
=
649 BuildMI(MF
, DL
, TII
.get(WebAssembly::REF_NULL_FUNCREF
), RegFuncref
);
650 BB
->insertAfter(Const0
->getIterator(), RefNull
);
652 MachineInstr
*TableSet
=
653 BuildMI(MF
, DL
, TII
.get(WebAssembly::TABLE_SET_FUNCREF
))
657 BB
->insertAfter(RefNull
->getIterator(), TableSet
);
663 MachineBasicBlock
*WebAssemblyTargetLowering::EmitInstrWithCustomInserter(
664 MachineInstr
&MI
, MachineBasicBlock
*BB
) const {
665 const TargetInstrInfo
&TII
= *Subtarget
->getInstrInfo();
666 DebugLoc DL
= MI
.getDebugLoc();
668 switch (MI
.getOpcode()) {
670 llvm_unreachable("Unexpected instr type to insert");
671 case WebAssembly::FP_TO_SINT_I32_F32
:
672 return LowerFPToInt(MI
, DL
, BB
, TII
, false, false, false,
673 WebAssembly::I32_TRUNC_S_F32
);
674 case WebAssembly::FP_TO_UINT_I32_F32
:
675 return LowerFPToInt(MI
, DL
, BB
, TII
, true, false, false,
676 WebAssembly::I32_TRUNC_U_F32
);
677 case WebAssembly::FP_TO_SINT_I64_F32
:
678 return LowerFPToInt(MI
, DL
, BB
, TII
, false, true, false,
679 WebAssembly::I64_TRUNC_S_F32
);
680 case WebAssembly::FP_TO_UINT_I64_F32
:
681 return LowerFPToInt(MI
, DL
, BB
, TII
, true, true, false,
682 WebAssembly::I64_TRUNC_U_F32
);
683 case WebAssembly::FP_TO_SINT_I32_F64
:
684 return LowerFPToInt(MI
, DL
, BB
, TII
, false, false, true,
685 WebAssembly::I32_TRUNC_S_F64
);
686 case WebAssembly::FP_TO_UINT_I32_F64
:
687 return LowerFPToInt(MI
, DL
, BB
, TII
, true, false, true,
688 WebAssembly::I32_TRUNC_U_F64
);
689 case WebAssembly::FP_TO_SINT_I64_F64
:
690 return LowerFPToInt(MI
, DL
, BB
, TII
, false, true, true,
691 WebAssembly::I64_TRUNC_S_F64
);
692 case WebAssembly::FP_TO_UINT_I64_F64
:
693 return LowerFPToInt(MI
, DL
, BB
, TII
, true, true, true,
694 WebAssembly::I64_TRUNC_U_F64
);
695 case WebAssembly::CALL_RESULTS
:
696 case WebAssembly::RET_CALL_RESULTS
:
697 return LowerCallResults(MI
, DL
, BB
, Subtarget
, TII
);
702 WebAssemblyTargetLowering::getTargetNodeName(unsigned Opcode
) const {
703 switch (static_cast<WebAssemblyISD::NodeType
>(Opcode
)) {
704 case WebAssemblyISD::FIRST_NUMBER
:
705 case WebAssemblyISD::FIRST_MEM_OPCODE
:
707 #define HANDLE_NODETYPE(NODE) \
708 case WebAssemblyISD::NODE: \
709 return "WebAssemblyISD::" #NODE;
710 #define HANDLE_MEM_NODETYPE(NODE) HANDLE_NODETYPE(NODE)
711 #include "WebAssemblyISD.def"
712 #undef HANDLE_MEM_NODETYPE
713 #undef HANDLE_NODETYPE
718 std::pair
<unsigned, const TargetRegisterClass
*>
719 WebAssemblyTargetLowering::getRegForInlineAsmConstraint(
720 const TargetRegisterInfo
*TRI
, StringRef Constraint
, MVT VT
) const {
721 // First, see if this is a constraint that directly corresponds to a
722 // WebAssembly register class.
723 if (Constraint
.size() == 1) {
724 switch (Constraint
[0]) {
726 assert(VT
!= MVT::iPTR
&& "Pointer MVT not expected here");
727 if (Subtarget
->hasSIMD128() && VT
.isVector()) {
728 if (VT
.getSizeInBits() == 128)
729 return std::make_pair(0U, &WebAssembly::V128RegClass
);
731 if (VT
.isInteger() && !VT
.isVector()) {
732 if (VT
.getSizeInBits() <= 32)
733 return std::make_pair(0U, &WebAssembly::I32RegClass
);
734 if (VT
.getSizeInBits() <= 64)
735 return std::make_pair(0U, &WebAssembly::I64RegClass
);
737 if (VT
.isFloatingPoint() && !VT
.isVector()) {
738 switch (VT
.getSizeInBits()) {
740 return std::make_pair(0U, &WebAssembly::F32RegClass
);
742 return std::make_pair(0U, &WebAssembly::F64RegClass
);
753 return TargetLowering::getRegForInlineAsmConstraint(TRI
, Constraint
, VT
);
756 bool WebAssemblyTargetLowering::isCheapToSpeculateCttz() const {
757 // Assume ctz is a relatively cheap operation.
761 bool WebAssemblyTargetLowering::isCheapToSpeculateCtlz() const {
762 // Assume clz is a relatively cheap operation.
766 bool WebAssemblyTargetLowering::isLegalAddressingMode(const DataLayout
&DL
,
768 Type
*Ty
, unsigned AS
,
769 Instruction
*I
) const {
770 // WebAssembly offsets are added as unsigned without wrapping. The
771 // isLegalAddressingMode gives us no way to determine if wrapping could be
772 // happening, so we approximate this by accepting only non-negative offsets.
776 // WebAssembly has no scale register operands.
780 // Everything else is legal.
784 bool WebAssemblyTargetLowering::allowsMisalignedMemoryAccesses(
785 EVT
/*VT*/, unsigned /*AddrSpace*/, Align
/*Align*/,
786 MachineMemOperand::Flags
/*Flags*/, bool *Fast
) const {
787 // WebAssembly supports unaligned accesses, though it should be declared
788 // with the p2align attribute on loads and stores which do so, and there
789 // may be a performance impact. We tell LLVM they're "fast" because
790 // for the kinds of things that LLVM uses this for (merging adjacent stores
791 // of constants, etc.), WebAssembly implementations will either want the
792 // unaligned access or they'll split anyway.
798 bool WebAssemblyTargetLowering::isIntDivCheap(EVT VT
,
799 AttributeList Attr
) const {
800 // The current thinking is that wasm engines will perform this optimization,
801 // so we can save on code size.
805 bool WebAssemblyTargetLowering::isVectorLoadExtDesirable(SDValue ExtVal
) const {
806 EVT ExtT
= ExtVal
.getValueType();
807 EVT MemT
= cast
<LoadSDNode
>(ExtVal
->getOperand(0))->getValueType(0);
808 return (ExtT
== MVT::v8i16
&& MemT
== MVT::v8i8
) ||
809 (ExtT
== MVT::v4i32
&& MemT
== MVT::v4i16
) ||
810 (ExtT
== MVT::v2i64
&& MemT
== MVT::v2i32
);
813 bool WebAssemblyTargetLowering::isOffsetFoldingLegal(
814 const GlobalAddressSDNode
*GA
) const {
815 // Wasm doesn't support function addresses with offsets
816 const GlobalValue
*GV
= GA
->getGlobal();
817 return isa
<Function
>(GV
) ? false : TargetLowering::isOffsetFoldingLegal(GA
);
820 EVT
WebAssemblyTargetLowering::getSetCCResultType(const DataLayout
&DL
,
824 return VT
.changeVectorElementTypeToInteger();
826 // So far, all branch instructions in Wasm take an I32 condition.
827 // The default TargetLowering::getSetCCResultType returns the pointer size,
828 // which would be useful to reduce instruction counts when testing
829 // against 64-bit pointers/values if at some point Wasm supports that.
830 return EVT::getIntegerVT(C
, 32);
833 bool WebAssemblyTargetLowering::getTgtMemIntrinsic(IntrinsicInfo
&Info
,
836 unsigned Intrinsic
) const {
838 case Intrinsic::wasm_memory_atomic_notify
:
839 Info
.opc
= ISD::INTRINSIC_W_CHAIN
;
840 Info
.memVT
= MVT::i32
;
841 Info
.ptrVal
= I
.getArgOperand(0);
843 Info
.align
= Align(4);
844 // atomic.notify instruction does not really load the memory specified with
845 // this argument, but MachineMemOperand should either be load or store, so
846 // we set this to a load.
847 // FIXME Volatile isn't really correct, but currently all LLVM atomic
848 // instructions are treated as volatiles in the backend, so we should be
849 // consistent. The same applies for wasm_atomic_wait intrinsics too.
850 Info
.flags
= MachineMemOperand::MOVolatile
| MachineMemOperand::MOLoad
;
852 case Intrinsic::wasm_memory_atomic_wait32
:
853 Info
.opc
= ISD::INTRINSIC_W_CHAIN
;
854 Info
.memVT
= MVT::i32
;
855 Info
.ptrVal
= I
.getArgOperand(0);
857 Info
.align
= Align(4);
858 Info
.flags
= MachineMemOperand::MOVolatile
| MachineMemOperand::MOLoad
;
860 case Intrinsic::wasm_memory_atomic_wait64
:
861 Info
.opc
= ISD::INTRINSIC_W_CHAIN
;
862 Info
.memVT
= MVT::i64
;
863 Info
.ptrVal
= I
.getArgOperand(0);
865 Info
.align
= Align(8);
866 Info
.flags
= MachineMemOperand::MOVolatile
| MachineMemOperand::MOLoad
;
873 void WebAssemblyTargetLowering::computeKnownBitsForTargetNode(
874 const SDValue Op
, KnownBits
&Known
, const APInt
&DemandedElts
,
875 const SelectionDAG
&DAG
, unsigned Depth
) const {
876 switch (Op
.getOpcode()) {
879 case ISD::INTRINSIC_WO_CHAIN
: {
880 unsigned IntNo
= Op
.getConstantOperandVal(0);
884 case Intrinsic::wasm_bitmask
: {
885 unsigned BitWidth
= Known
.getBitWidth();
886 EVT VT
= Op
.getOperand(1).getSimpleValueType();
887 unsigned PossibleBits
= VT
.getVectorNumElements();
888 APInt ZeroMask
= APInt::getHighBitsSet(BitWidth
, BitWidth
- PossibleBits
);
889 Known
.Zero
|= ZeroMask
;
897 TargetLoweringBase::LegalizeTypeAction
898 WebAssemblyTargetLowering::getPreferredVectorAction(MVT VT
) const {
899 if (VT
.isFixedLengthVector()) {
900 MVT EltVT
= VT
.getVectorElementType();
901 // We have legal vector types with these lane types, so widening the
902 // vector would let us use some of the lanes directly without having to
903 // extend or truncate values.
904 if (EltVT
== MVT::i8
|| EltVT
== MVT::i16
|| EltVT
== MVT::i32
||
905 EltVT
== MVT::i64
|| EltVT
== MVT::f32
|| EltVT
== MVT::f64
)
906 return TypeWidenVector
;
909 return TargetLoweringBase::getPreferredVectorAction(VT
);
912 //===----------------------------------------------------------------------===//
913 // WebAssembly Lowering private implementation.
914 //===----------------------------------------------------------------------===//
916 //===----------------------------------------------------------------------===//
918 //===----------------------------------------------------------------------===//
920 static void fail(const SDLoc
&DL
, SelectionDAG
&DAG
, const char *Msg
) {
921 MachineFunction
&MF
= DAG
.getMachineFunction();
922 DAG
.getContext()->diagnose(
923 DiagnosticInfoUnsupported(MF
.getFunction(), Msg
, DL
.getDebugLoc()));
926 // Test whether the given calling convention is supported.
927 static bool callingConvSupported(CallingConv::ID CallConv
) {
928 // We currently support the language-independent target-independent
929 // conventions. We don't yet have a way to annotate calls with properties like
930 // "cold", and we don't have any call-clobbered registers, so these are mostly
931 // all handled the same.
932 return CallConv
== CallingConv::C
|| CallConv
== CallingConv::Fast
||
933 CallConv
== CallingConv::Cold
||
934 CallConv
== CallingConv::PreserveMost
||
935 CallConv
== CallingConv::PreserveAll
||
936 CallConv
== CallingConv::CXX_FAST_TLS
||
937 CallConv
== CallingConv::WASM_EmscriptenInvoke
||
938 CallConv
== CallingConv::Swift
;
942 WebAssemblyTargetLowering::LowerCall(CallLoweringInfo
&CLI
,
943 SmallVectorImpl
<SDValue
> &InVals
) const {
944 SelectionDAG
&DAG
= CLI
.DAG
;
946 SDValue Chain
= CLI
.Chain
;
947 SDValue Callee
= CLI
.Callee
;
948 MachineFunction
&MF
= DAG
.getMachineFunction();
949 auto Layout
= MF
.getDataLayout();
951 CallingConv::ID CallConv
= CLI
.CallConv
;
952 if (!callingConvSupported(CallConv
))
954 "WebAssembly doesn't support language-specific or target-specific "
955 "calling conventions yet");
956 if (CLI
.IsPatchPoint
)
957 fail(DL
, DAG
, "WebAssembly doesn't support patch point yet");
959 if (CLI
.IsTailCall
) {
960 auto NoTail
= [&](const char *Msg
) {
961 if (CLI
.CB
&& CLI
.CB
->isMustTailCall())
963 CLI
.IsTailCall
= false;
966 if (!Subtarget
->hasTailCall())
967 NoTail("WebAssembly 'tail-call' feature not enabled");
969 // Varargs calls cannot be tail calls because the buffer is on the stack
971 NoTail("WebAssembly does not support varargs tail calls");
973 // Do not tail call unless caller and callee return types match
974 const Function
&F
= MF
.getFunction();
975 const TargetMachine
&TM
= getTargetMachine();
976 Type
*RetTy
= F
.getReturnType();
977 SmallVector
<MVT
, 4> CallerRetTys
;
978 SmallVector
<MVT
, 4> CalleeRetTys
;
979 computeLegalValueVTs(F
, TM
, RetTy
, CallerRetTys
);
980 computeLegalValueVTs(F
, TM
, CLI
.RetTy
, CalleeRetTys
);
981 bool TypesMatch
= CallerRetTys
.size() == CalleeRetTys
.size() &&
982 std::equal(CallerRetTys
.begin(), CallerRetTys
.end(),
983 CalleeRetTys
.begin());
985 NoTail("WebAssembly tail call requires caller and callee return types to "
988 // If pointers to local stack values are passed, we cannot tail call
990 for (auto &Arg
: CLI
.CB
->args()) {
991 Value
*Val
= Arg
.get();
992 // Trace the value back through pointer operations
994 Value
*Src
= Val
->stripPointerCastsAndAliases();
995 if (auto *GEP
= dyn_cast
<GetElementPtrInst
>(Src
))
996 Src
= GEP
->getPointerOperand();
1001 if (isa
<AllocaInst
>(Val
)) {
1003 "WebAssembly does not support tail calling with stack arguments");
1010 SmallVectorImpl
<ISD::InputArg
> &Ins
= CLI
.Ins
;
1011 SmallVectorImpl
<ISD::OutputArg
> &Outs
= CLI
.Outs
;
1012 SmallVectorImpl
<SDValue
> &OutVals
= CLI
.OutVals
;
1014 // The generic code may have added an sret argument. If we're lowering an
1015 // invoke function, the ABI requires that the function pointer be the first
1016 // argument, so we may have to swap the arguments.
1017 if (CallConv
== CallingConv::WASM_EmscriptenInvoke
&& Outs
.size() >= 2 &&
1018 Outs
[0].Flags
.isSRet()) {
1019 std::swap(Outs
[0], Outs
[1]);
1020 std::swap(OutVals
[0], OutVals
[1]);
1023 bool HasSwiftSelfArg
= false;
1024 bool HasSwiftErrorArg
= false;
1025 unsigned NumFixedArgs
= 0;
1026 for (unsigned I
= 0; I
< Outs
.size(); ++I
) {
1027 const ISD::OutputArg
&Out
= Outs
[I
];
1028 SDValue
&OutVal
= OutVals
[I
];
1029 HasSwiftSelfArg
|= Out
.Flags
.isSwiftSelf();
1030 HasSwiftErrorArg
|= Out
.Flags
.isSwiftError();
1031 if (Out
.Flags
.isNest())
1032 fail(DL
, DAG
, "WebAssembly hasn't implemented nest arguments");
1033 if (Out
.Flags
.isInAlloca())
1034 fail(DL
, DAG
, "WebAssembly hasn't implemented inalloca arguments");
1035 if (Out
.Flags
.isInConsecutiveRegs())
1036 fail(DL
, DAG
, "WebAssembly hasn't implemented cons regs arguments");
1037 if (Out
.Flags
.isInConsecutiveRegsLast())
1038 fail(DL
, DAG
, "WebAssembly hasn't implemented cons regs last arguments");
1039 if (Out
.Flags
.isByVal() && Out
.Flags
.getByValSize() != 0) {
1040 auto &MFI
= MF
.getFrameInfo();
1041 int FI
= MFI
.CreateStackObject(Out
.Flags
.getByValSize(),
1042 Out
.Flags
.getNonZeroByValAlign(),
1045 DAG
.getConstant(Out
.Flags
.getByValSize(), DL
, MVT::i32
);
1046 SDValue FINode
= DAG
.getFrameIndex(FI
, getPointerTy(Layout
));
1047 Chain
= DAG
.getMemcpy(
1048 Chain
, DL
, FINode
, OutVal
, SizeNode
, Out
.Flags
.getNonZeroByValAlign(),
1049 /*isVolatile*/ false, /*AlwaysInline=*/false,
1050 /*isTailCall*/ false, MachinePointerInfo(), MachinePointerInfo());
1053 // Count the number of fixed args *after* legalization.
1054 NumFixedArgs
+= Out
.IsFixed
;
1057 bool IsVarArg
= CLI
.IsVarArg
;
1058 auto PtrVT
= getPointerTy(Layout
);
1060 // For swiftcc, emit additional swiftself and swifterror arguments
1061 // if there aren't. These additional arguments are also added for callee
1062 // signature They are necessary to match callee and caller signature for
1064 if (CallConv
== CallingConv::Swift
) {
1065 if (!HasSwiftSelfArg
) {
1068 Arg
.Flags
.setSwiftSelf();
1069 CLI
.Outs
.push_back(Arg
);
1070 SDValue ArgVal
= DAG
.getUNDEF(PtrVT
);
1071 CLI
.OutVals
.push_back(ArgVal
);
1073 if (!HasSwiftErrorArg
) {
1076 Arg
.Flags
.setSwiftError();
1077 CLI
.Outs
.push_back(Arg
);
1078 SDValue ArgVal
= DAG
.getUNDEF(PtrVT
);
1079 CLI
.OutVals
.push_back(ArgVal
);
1083 // Analyze operands of the call, assigning locations to each operand.
1084 SmallVector
<CCValAssign
, 16> ArgLocs
;
1085 CCState
CCInfo(CallConv
, IsVarArg
, MF
, ArgLocs
, *DAG
.getContext());
1088 // Outgoing non-fixed arguments are placed in a buffer. First
1089 // compute their offsets and the total amount of buffer space needed.
1090 for (unsigned I
= NumFixedArgs
; I
< Outs
.size(); ++I
) {
1091 const ISD::OutputArg
&Out
= Outs
[I
];
1092 SDValue
&Arg
= OutVals
[I
];
1093 EVT VT
= Arg
.getValueType();
1094 assert(VT
!= MVT::iPTR
&& "Legalized args should be concrete");
1095 Type
*Ty
= VT
.getTypeForEVT(*DAG
.getContext());
1097 std::max(Out
.Flags
.getNonZeroOrigAlign(), Layout
.getABITypeAlign(Ty
));
1099 CCInfo
.AllocateStack(Layout
.getTypeAllocSize(Ty
), Alignment
);
1100 CCInfo
.addLoc(CCValAssign::getMem(ArgLocs
.size(), VT
.getSimpleVT(),
1101 Offset
, VT
.getSimpleVT(),
1102 CCValAssign::Full
));
1106 unsigned NumBytes
= CCInfo
.getAlignedCallFrameSize();
1109 if (IsVarArg
&& NumBytes
) {
1110 // For non-fixed arguments, next emit stores to store the argument values
1111 // to the stack buffer at the offsets computed above.
1112 int FI
= MF
.getFrameInfo().CreateStackObject(NumBytes
,
1113 Layout
.getStackAlignment(),
1116 SmallVector
<SDValue
, 8> Chains
;
1117 for (SDValue Arg
: drop_begin(OutVals
, NumFixedArgs
)) {
1118 assert(ArgLocs
[ValNo
].getValNo() == ValNo
&&
1119 "ArgLocs should remain in order and only hold varargs args");
1120 unsigned Offset
= ArgLocs
[ValNo
++].getLocMemOffset();
1121 FINode
= DAG
.getFrameIndex(FI
, getPointerTy(Layout
));
1122 SDValue Add
= DAG
.getNode(ISD::ADD
, DL
, PtrVT
, FINode
,
1123 DAG
.getConstant(Offset
, DL
, PtrVT
));
1125 DAG
.getStore(Chain
, DL
, Arg
, Add
,
1126 MachinePointerInfo::getFixedStack(MF
, FI
, Offset
)));
1128 if (!Chains
.empty())
1129 Chain
= DAG
.getNode(ISD::TokenFactor
, DL
, MVT::Other
, Chains
);
1130 } else if (IsVarArg
) {
1131 FINode
= DAG
.getIntPtrConstant(0, DL
);
1134 if (Callee
->getOpcode() == ISD::GlobalAddress
) {
1135 // If the callee is a GlobalAddress node (quite common, every direct call
1136 // is) turn it into a TargetGlobalAddress node so that LowerGlobalAddress
1137 // doesn't at MO_GOT which is not needed for direct calls.
1138 GlobalAddressSDNode
* GA
= cast
<GlobalAddressSDNode
>(Callee
);
1139 Callee
= DAG
.getTargetGlobalAddress(GA
->getGlobal(), DL
,
1140 getPointerTy(DAG
.getDataLayout()),
1142 Callee
= DAG
.getNode(WebAssemblyISD::Wrapper
, DL
,
1143 getPointerTy(DAG
.getDataLayout()), Callee
);
1146 // Compute the operands for the CALLn node.
1147 SmallVector
<SDValue
, 16> Ops
;
1148 Ops
.push_back(Chain
);
1149 Ops
.push_back(Callee
);
1151 // Add all fixed arguments. Note that for non-varargs calls, NumFixedArgs
1153 Ops
.append(OutVals
.begin(),
1154 IsVarArg
? OutVals
.begin() + NumFixedArgs
: OutVals
.end());
1155 // Add a pointer to the vararg buffer.
1157 Ops
.push_back(FINode
);
1159 SmallVector
<EVT
, 8> InTys
;
1160 for (const auto &In
: Ins
) {
1161 assert(!In
.Flags
.isByVal() && "byval is not valid for return values");
1162 assert(!In
.Flags
.isNest() && "nest is not valid for return values");
1163 if (In
.Flags
.isInAlloca())
1164 fail(DL
, DAG
, "WebAssembly hasn't implemented inalloca return values");
1165 if (In
.Flags
.isInConsecutiveRegs())
1166 fail(DL
, DAG
, "WebAssembly hasn't implemented cons regs return values");
1167 if (In
.Flags
.isInConsecutiveRegsLast())
1169 "WebAssembly hasn't implemented cons regs last return values");
1170 // Ignore In.getNonZeroOrigAlign() because all our arguments are passed in
1172 InTys
.push_back(In
.VT
);
1175 // Lastly, if this is a call to a funcref we need to add an instruction
1176 // table.set to the chain and transform the call.
1178 WebAssembly::isFuncrefType(CLI
.CB
->getCalledOperand()->getType())) {
1179 // In the absence of function references proposal where a funcref call is
1180 // lowered to call_ref, using reference types we generate a table.set to set
1181 // the funcref to a special table used solely for this purpose, followed by
1182 // a call_indirect. Here we just generate the table set, and return the
1183 // SDValue of the table.set so that LowerCall can finalize the lowering by
1184 // generating the call_indirect.
1185 SDValue Chain
= Ops
[0];
1187 MCSymbolWasm
*Table
= WebAssembly::getOrCreateFuncrefCallTableSymbol(
1188 MF
.getContext(), Subtarget
);
1189 SDValue Sym
= DAG
.getMCSymbol(Table
, PtrVT
);
1190 SDValue TableSlot
= DAG
.getConstant(0, DL
, MVT::i32
);
1191 SDValue TableSetOps
[] = {Chain
, Sym
, TableSlot
, Callee
};
1192 SDValue TableSet
= DAG
.getMemIntrinsicNode(
1193 WebAssemblyISD::TABLE_SET
, DL
, DAG
.getVTList(MVT::Other
), TableSetOps
,
1195 // Machine Mem Operand args
1197 WebAssembly::WasmAddressSpace::WASM_ADDRESS_SPACE_FUNCREF
),
1198 CLI
.CB
->getCalledOperand()->getPointerAlignment(DAG
.getDataLayout()),
1199 MachineMemOperand::MOStore
);
1201 Ops
[0] = TableSet
; // The new chain is the TableSet itself
1204 if (CLI
.IsTailCall
) {
1205 // ret_calls do not return values to the current frame
1206 SDVTList NodeTys
= DAG
.getVTList(MVT::Other
, MVT::Glue
);
1207 return DAG
.getNode(WebAssemblyISD::RET_CALL
, DL
, NodeTys
, Ops
);
1210 InTys
.push_back(MVT::Other
);
1211 SDVTList InTyList
= DAG
.getVTList(InTys
);
1212 SDValue Res
= DAG
.getNode(WebAssemblyISD::CALL
, DL
, InTyList
, Ops
);
1214 for (size_t I
= 0; I
< Ins
.size(); ++I
)
1215 InVals
.push_back(Res
.getValue(I
));
1218 return Res
.getValue(Ins
.size());
1221 bool WebAssemblyTargetLowering::CanLowerReturn(
1222 CallingConv::ID
/*CallConv*/, MachineFunction
& /*MF*/, bool /*IsVarArg*/,
1223 const SmallVectorImpl
<ISD::OutputArg
> &Outs
,
1224 LLVMContext
& /*Context*/) const {
1225 // WebAssembly can only handle returning tuples with multivalue enabled
1226 return Subtarget
->hasMultivalue() || Outs
.size() <= 1;
1229 SDValue
WebAssemblyTargetLowering::LowerReturn(
1230 SDValue Chain
, CallingConv::ID CallConv
, bool /*IsVarArg*/,
1231 const SmallVectorImpl
<ISD::OutputArg
> &Outs
,
1232 const SmallVectorImpl
<SDValue
> &OutVals
, const SDLoc
&DL
,
1233 SelectionDAG
&DAG
) const {
1234 assert((Subtarget
->hasMultivalue() || Outs
.size() <= 1) &&
1235 "MVP WebAssembly can only return up to one value");
1236 if (!callingConvSupported(CallConv
))
1237 fail(DL
, DAG
, "WebAssembly doesn't support non-C calling conventions");
1239 SmallVector
<SDValue
, 4> RetOps(1, Chain
);
1240 RetOps
.append(OutVals
.begin(), OutVals
.end());
1241 Chain
= DAG
.getNode(WebAssemblyISD::RETURN
, DL
, MVT::Other
, RetOps
);
1243 // Record the number and types of the return values.
1244 for (const ISD::OutputArg
&Out
: Outs
) {
1245 assert(!Out
.Flags
.isByVal() && "byval is not valid for return values");
1246 assert(!Out
.Flags
.isNest() && "nest is not valid for return values");
1247 assert(Out
.IsFixed
&& "non-fixed return value is not valid");
1248 if (Out
.Flags
.isInAlloca())
1249 fail(DL
, DAG
, "WebAssembly hasn't implemented inalloca results");
1250 if (Out
.Flags
.isInConsecutiveRegs())
1251 fail(DL
, DAG
, "WebAssembly hasn't implemented cons regs results");
1252 if (Out
.Flags
.isInConsecutiveRegsLast())
1253 fail(DL
, DAG
, "WebAssembly hasn't implemented cons regs last results");
1259 SDValue
WebAssemblyTargetLowering::LowerFormalArguments(
1260 SDValue Chain
, CallingConv::ID CallConv
, bool IsVarArg
,
1261 const SmallVectorImpl
<ISD::InputArg
> &Ins
, const SDLoc
&DL
,
1262 SelectionDAG
&DAG
, SmallVectorImpl
<SDValue
> &InVals
) const {
1263 if (!callingConvSupported(CallConv
))
1264 fail(DL
, DAG
, "WebAssembly doesn't support non-C calling conventions");
1266 MachineFunction
&MF
= DAG
.getMachineFunction();
1267 auto *MFI
= MF
.getInfo
<WebAssemblyFunctionInfo
>();
1269 // Set up the incoming ARGUMENTS value, which serves to represent the liveness
1270 // of the incoming values before they're represented by virtual registers.
1271 MF
.getRegInfo().addLiveIn(WebAssembly::ARGUMENTS
);
1273 bool HasSwiftErrorArg
= false;
1274 bool HasSwiftSelfArg
= false;
1275 for (const ISD::InputArg
&In
: Ins
) {
1276 HasSwiftSelfArg
|= In
.Flags
.isSwiftSelf();
1277 HasSwiftErrorArg
|= In
.Flags
.isSwiftError();
1278 if (In
.Flags
.isInAlloca())
1279 fail(DL
, DAG
, "WebAssembly hasn't implemented inalloca arguments");
1280 if (In
.Flags
.isNest())
1281 fail(DL
, DAG
, "WebAssembly hasn't implemented nest arguments");
1282 if (In
.Flags
.isInConsecutiveRegs())
1283 fail(DL
, DAG
, "WebAssembly hasn't implemented cons regs arguments");
1284 if (In
.Flags
.isInConsecutiveRegsLast())
1285 fail(DL
, DAG
, "WebAssembly hasn't implemented cons regs last arguments");
1286 // Ignore In.getNonZeroOrigAlign() because all our arguments are passed in
1288 InVals
.push_back(In
.Used
? DAG
.getNode(WebAssemblyISD::ARGUMENT
, DL
, In
.VT
,
1289 DAG
.getTargetConstant(InVals
.size(),
1291 : DAG
.getUNDEF(In
.VT
));
1293 // Record the number and types of arguments.
1294 MFI
->addParam(In
.VT
);
1297 // For swiftcc, emit additional swiftself and swifterror arguments
1298 // if there aren't. These additional arguments are also added for callee
1299 // signature They are necessary to match callee and caller signature for
1301 auto PtrVT
= getPointerTy(MF
.getDataLayout());
1302 if (CallConv
== CallingConv::Swift
) {
1303 if (!HasSwiftSelfArg
) {
1304 MFI
->addParam(PtrVT
);
1306 if (!HasSwiftErrorArg
) {
1307 MFI
->addParam(PtrVT
);
1310 // Varargs are copied into a buffer allocated by the caller, and a pointer to
1311 // the buffer is passed as an argument.
1313 MVT PtrVT
= getPointerTy(MF
.getDataLayout());
1314 Register VarargVreg
=
1315 MF
.getRegInfo().createVirtualRegister(getRegClassFor(PtrVT
));
1316 MFI
->setVarargBufferVreg(VarargVreg
);
1317 Chain
= DAG
.getCopyToReg(
1318 Chain
, DL
, VarargVreg
,
1319 DAG
.getNode(WebAssemblyISD::ARGUMENT
, DL
, PtrVT
,
1320 DAG
.getTargetConstant(Ins
.size(), DL
, MVT::i32
)));
1321 MFI
->addParam(PtrVT
);
1324 // Record the number and types of arguments and results.
1325 SmallVector
<MVT
, 4> Params
;
1326 SmallVector
<MVT
, 4> Results
;
1327 computeSignatureVTs(MF
.getFunction().getFunctionType(), &MF
.getFunction(),
1328 MF
.getFunction(), DAG
.getTarget(), Params
, Results
);
1329 for (MVT VT
: Results
)
1331 // TODO: Use signatures in WebAssemblyMachineFunctionInfo too and unify
1332 // the param logic here with ComputeSignatureVTs
1333 assert(MFI
->getParams().size() == Params
.size() &&
1334 std::equal(MFI
->getParams().begin(), MFI
->getParams().end(),
1340 void WebAssemblyTargetLowering::ReplaceNodeResults(
1341 SDNode
*N
, SmallVectorImpl
<SDValue
> &Results
, SelectionDAG
&DAG
) const {
1342 switch (N
->getOpcode()) {
1343 case ISD::SIGN_EXTEND_INREG
:
1344 // Do not add any results, signifying that N should not be custom lowered
1345 // after all. This happens because simd128 turns on custom lowering for
1346 // SIGN_EXTEND_INREG, but for non-vector sign extends the result might be an
1351 "ReplaceNodeResults not implemented for this op for WebAssembly!");
1355 //===----------------------------------------------------------------------===//
1356 // Custom lowering hooks.
1357 //===----------------------------------------------------------------------===//
1359 SDValue
WebAssemblyTargetLowering::LowerOperation(SDValue Op
,
1360 SelectionDAG
&DAG
) const {
1362 switch (Op
.getOpcode()) {
1364 llvm_unreachable("unimplemented operation lowering");
1366 case ISD::FrameIndex
:
1367 return LowerFrameIndex(Op
, DAG
);
1368 case ISD::GlobalAddress
:
1369 return LowerGlobalAddress(Op
, DAG
);
1370 case ISD::GlobalTLSAddress
:
1371 return LowerGlobalTLSAddress(Op
, DAG
);
1372 case ISD::ExternalSymbol
:
1373 return LowerExternalSymbol(Op
, DAG
);
1374 case ISD::JumpTable
:
1375 return LowerJumpTable(Op
, DAG
);
1377 return LowerBR_JT(Op
, DAG
);
1379 return LowerVASTART(Op
, DAG
);
1380 case ISD::BlockAddress
:
1382 fail(DL
, DAG
, "WebAssembly hasn't implemented computed gotos");
1384 case ISD::RETURNADDR
:
1385 return LowerRETURNADDR(Op
, DAG
);
1386 case ISD::FRAMEADDR
:
1387 return LowerFRAMEADDR(Op
, DAG
);
1388 case ISD::CopyToReg
:
1389 return LowerCopyToReg(Op
, DAG
);
1390 case ISD::EXTRACT_VECTOR_ELT
:
1391 case ISD::INSERT_VECTOR_ELT
:
1392 return LowerAccessVectorElement(Op
, DAG
);
1393 case ISD::INTRINSIC_VOID
:
1394 case ISD::INTRINSIC_WO_CHAIN
:
1395 case ISD::INTRINSIC_W_CHAIN
:
1396 return LowerIntrinsic(Op
, DAG
);
1397 case ISD::SIGN_EXTEND_INREG
:
1398 return LowerSIGN_EXTEND_INREG(Op
, DAG
);
1399 case ISD::BUILD_VECTOR
:
1400 return LowerBUILD_VECTOR(Op
, DAG
);
1401 case ISD::VECTOR_SHUFFLE
:
1402 return LowerVECTOR_SHUFFLE(Op
, DAG
);
1404 return LowerSETCC(Op
, DAG
);
1408 return LowerShift(Op
, DAG
);
1409 case ISD::FP_TO_SINT_SAT
:
1410 case ISD::FP_TO_UINT_SAT
:
1411 return LowerFP_TO_INT_SAT(Op
, DAG
);
1413 return LowerLoad(Op
, DAG
);
1415 return LowerStore(Op
, DAG
);
1419 return DAG
.UnrollVectorOp(Op
.getNode());
1423 static bool IsWebAssemblyGlobal(SDValue Op
) {
1424 if (const GlobalAddressSDNode
*GA
= dyn_cast
<GlobalAddressSDNode
>(Op
))
1425 return WebAssembly::isWasmVarAddressSpace(GA
->getAddressSpace());
1430 static Optional
<unsigned> IsWebAssemblyLocal(SDValue Op
, SelectionDAG
&DAG
) {
1431 const FrameIndexSDNode
*FI
= dyn_cast
<FrameIndexSDNode
>(Op
);
1435 auto &MF
= DAG
.getMachineFunction();
1436 return WebAssemblyFrameLowering::getLocalForStackObject(MF
, FI
->getIndex());
1439 static bool IsWebAssemblyTable(SDValue Op
) {
1440 const GlobalAddressSDNode
*GA
= dyn_cast
<GlobalAddressSDNode
>(Op
);
1441 if (GA
&& WebAssembly::isWasmVarAddressSpace(GA
->getAddressSpace())) {
1442 const GlobalValue
*Value
= GA
->getGlobal();
1443 const Type
*Ty
= Value
->getValueType();
1445 if (Ty
->isArrayTy() && WebAssembly::isRefType(Ty
->getArrayElementType()))
1451 // This function will accept as Op any access to a table, so Op can
1452 // be the actual table or an offset into the table.
1453 static bool IsWebAssemblyTableWithOffset(SDValue Op
) {
1454 if (Op
->getOpcode() == ISD::ADD
&& Op
->getNumOperands() == 2)
1455 return (Op
->getOperand(1).getSimpleValueType() == MVT::i32
&&
1456 IsWebAssemblyTableWithOffset(Op
->getOperand(0))) ||
1457 (Op
->getOperand(0).getSimpleValueType() == MVT::i32
&&
1458 IsWebAssemblyTableWithOffset(Op
->getOperand(1)));
1460 return IsWebAssemblyTable(Op
);
1463 // Helper for table pattern matching used in LowerStore and LowerLoad
1464 bool WebAssemblyTargetLowering::MatchTableForLowering(SelectionDAG
&DAG
,
1466 const SDValue
&Base
,
1467 GlobalAddressSDNode
*&GA
,
1468 SDValue
&Idx
) const {
1469 // We expect the following graph for a load of the form:
1470 // table[<var> + <constant offset>]
1473 // externref = load t1
1474 // t1: i32 = add t2, i32:<constant offset>
1475 // t2: i32 = add tX, table
1477 // This is in some cases simplified to just:
1479 // externref = load t1
1480 // t1: i32 = add t2, i32:tX
1482 // So, unfortunately we need to check for both cases and if we are in the
1483 // first case extract the table GlobalAddressNode and build a new node tY
1484 // that's tY: i32 = add i32:<constant offset>, i32:tX
1486 if (IsWebAssemblyTable(Base
)) {
1487 GA
= cast
<GlobalAddressSDNode
>(Base
);
1488 Idx
= DAG
.getConstant(0, DL
, MVT::i32
);
1490 GA
= dyn_cast
<GlobalAddressSDNode
>(Base
->getOperand(0));
1492 // We are in Case 2 above.
1493 Idx
= Base
->getOperand(1);
1494 assert(GA
->getNumValues() == 1);
1496 // This might be Case 1 above (or an error)
1497 SDValue V
= Base
->getOperand(0);
1498 GA
= dyn_cast
<GlobalAddressSDNode
>(V
->getOperand(1));
1500 if (V
->getOpcode() != ISD::ADD
|| V
->getNumOperands() != 2 || !GA
)
1503 SDValue IdxV
= DAG
.getNode(ISD::ADD
, DL
, MVT::i32
, Base
->getOperand(1),
1512 SDValue
WebAssemblyTargetLowering::LowerStore(SDValue Op
,
1513 SelectionDAG
&DAG
) const {
1515 StoreSDNode
*SN
= cast
<StoreSDNode
>(Op
.getNode());
1516 const SDValue
&Value
= SN
->getValue();
1517 const SDValue
&Base
= SN
->getBasePtr();
1518 const SDValue
&Offset
= SN
->getOffset();
1520 if (IsWebAssemblyTableWithOffset(Base
)) {
1521 if (!Offset
->isUndef())
1523 "unexpected offset when loading from webassembly table", false);
1526 GlobalAddressSDNode
*GA
;
1528 if (!MatchTableForLowering(DAG
, DL
, Base
, GA
, Idx
))
1529 report_fatal_error("failed pattern matching for lowering table store",
1532 SDVTList Tys
= DAG
.getVTList(MVT::Other
);
1533 SDValue TableSetOps
[] = {SN
->getChain(), SDValue(GA
, 0), Idx
, Value
};
1535 DAG
.getMemIntrinsicNode(WebAssemblyISD::TABLE_SET
, DL
, Tys
, TableSetOps
,
1536 SN
->getMemoryVT(), SN
->getMemOperand());
1540 if (IsWebAssemblyGlobal(Base
)) {
1541 if (!Offset
->isUndef())
1542 report_fatal_error("unexpected offset when storing to webassembly global",
1545 SDVTList Tys
= DAG
.getVTList(MVT::Other
);
1546 SDValue Ops
[] = {SN
->getChain(), Value
, Base
};
1547 return DAG
.getMemIntrinsicNode(WebAssemblyISD::GLOBAL_SET
, DL
, Tys
, Ops
,
1548 SN
->getMemoryVT(), SN
->getMemOperand());
1551 if (Optional
<unsigned> Local
= IsWebAssemblyLocal(Base
, DAG
)) {
1552 if (!Offset
->isUndef())
1553 report_fatal_error("unexpected offset when storing to webassembly local",
1556 SDValue Idx
= DAG
.getTargetConstant(*Local
, Base
, MVT::i32
);
1557 SDVTList Tys
= DAG
.getVTList(MVT::Other
); // The chain.
1558 SDValue Ops
[] = {SN
->getChain(), Idx
, Value
};
1559 return DAG
.getNode(WebAssemblyISD::LOCAL_SET
, DL
, Tys
, Ops
);
1565 SDValue
WebAssemblyTargetLowering::LowerLoad(SDValue Op
,
1566 SelectionDAG
&DAG
) const {
1568 LoadSDNode
*LN
= cast
<LoadSDNode
>(Op
.getNode());
1569 const SDValue
&Base
= LN
->getBasePtr();
1570 const SDValue
&Offset
= LN
->getOffset();
1572 if (IsWebAssemblyTableWithOffset(Base
)) {
1573 if (!Offset
->isUndef())
1575 "unexpected offset when loading from webassembly table", false);
1577 GlobalAddressSDNode
*GA
;
1580 if (!MatchTableForLowering(DAG
, DL
, Base
, GA
, Idx
))
1581 report_fatal_error("failed pattern matching for lowering table load",
1584 SDVTList Tys
= DAG
.getVTList(LN
->getValueType(0), MVT::Other
);
1585 SDValue TableGetOps
[] = {LN
->getChain(), SDValue(GA
, 0), Idx
};
1587 DAG
.getMemIntrinsicNode(WebAssemblyISD::TABLE_GET
, DL
, Tys
, TableGetOps
,
1588 LN
->getMemoryVT(), LN
->getMemOperand());
1592 if (IsWebAssemblyGlobal(Base
)) {
1593 if (!Offset
->isUndef())
1595 "unexpected offset when loading from webassembly global", false);
1597 SDVTList Tys
= DAG
.getVTList(LN
->getValueType(0), MVT::Other
);
1598 SDValue Ops
[] = {LN
->getChain(), Base
};
1599 return DAG
.getMemIntrinsicNode(WebAssemblyISD::GLOBAL_GET
, DL
, Tys
, Ops
,
1600 LN
->getMemoryVT(), LN
->getMemOperand());
1603 if (Optional
<unsigned> Local
= IsWebAssemblyLocal(Base
, DAG
)) {
1604 if (!Offset
->isUndef())
1606 "unexpected offset when loading from webassembly local", false);
1608 SDValue Idx
= DAG
.getTargetConstant(*Local
, Base
, MVT::i32
);
1609 EVT LocalVT
= LN
->getValueType(0);
1610 SDValue LocalGet
= DAG
.getNode(WebAssemblyISD::LOCAL_GET
, DL
, LocalVT
,
1611 {LN
->getChain(), Idx
});
1612 SDValue Result
= DAG
.getMergeValues({LocalGet
, LN
->getChain()}, DL
);
1613 assert(Result
->getNumValues() == 2 && "Loads must carry a chain!");
1620 SDValue
WebAssemblyTargetLowering::LowerCopyToReg(SDValue Op
,
1621 SelectionDAG
&DAG
) const {
1622 SDValue Src
= Op
.getOperand(2);
1623 if (isa
<FrameIndexSDNode
>(Src
.getNode())) {
1624 // CopyToReg nodes don't support FrameIndex operands. Other targets select
1625 // the FI to some LEA-like instruction, but since we don't have that, we
1626 // need to insert some kind of instruction that can take an FI operand and
1627 // produces a value usable by CopyToReg (i.e. in a vreg). So insert a dummy
1628 // local.copy between Op and its FI operand.
1629 SDValue Chain
= Op
.getOperand(0);
1631 Register Reg
= cast
<RegisterSDNode
>(Op
.getOperand(1))->getReg();
1632 EVT VT
= Src
.getValueType();
1633 SDValue
Copy(DAG
.getMachineNode(VT
== MVT::i32
? WebAssembly::COPY_I32
1634 : WebAssembly::COPY_I64
,
1637 return Op
.getNode()->getNumValues() == 1
1638 ? DAG
.getCopyToReg(Chain
, DL
, Reg
, Copy
)
1639 : DAG
.getCopyToReg(Chain
, DL
, Reg
, Copy
,
1640 Op
.getNumOperands() == 4 ? Op
.getOperand(3)
1646 SDValue
WebAssemblyTargetLowering::LowerFrameIndex(SDValue Op
,
1647 SelectionDAG
&DAG
) const {
1648 int FI
= cast
<FrameIndexSDNode
>(Op
)->getIndex();
1649 return DAG
.getTargetFrameIndex(FI
, Op
.getValueType());
1652 SDValue
WebAssemblyTargetLowering::LowerRETURNADDR(SDValue Op
,
1653 SelectionDAG
&DAG
) const {
1656 if (!Subtarget
->getTargetTriple().isOSEmscripten()) {
1658 "Non-Emscripten WebAssembly hasn't implemented "
1659 "__builtin_return_address");
1663 if (verifyReturnAddressArgumentIsConstant(Op
, DAG
))
1666 unsigned Depth
= Op
.getConstantOperandVal(0);
1667 MakeLibCallOptions CallOptions
;
1668 return makeLibCall(DAG
, RTLIB::RETURN_ADDRESS
, Op
.getValueType(),
1669 {DAG
.getConstant(Depth
, DL
, MVT::i32
)}, CallOptions
, DL
)
1673 SDValue
WebAssemblyTargetLowering::LowerFRAMEADDR(SDValue Op
,
1674 SelectionDAG
&DAG
) const {
1675 // Non-zero depths are not supported by WebAssembly currently. Use the
1676 // legalizer's default expansion, which is to return 0 (what this function is
1677 // documented to do).
1678 if (Op
.getConstantOperandVal(0) > 0)
1681 DAG
.getMachineFunction().getFrameInfo().setFrameAddressIsTaken(true);
1682 EVT VT
= Op
.getValueType();
1684 Subtarget
->getRegisterInfo()->getFrameRegister(DAG
.getMachineFunction());
1685 return DAG
.getCopyFromReg(DAG
.getEntryNode(), SDLoc(Op
), FP
, VT
);
1689 WebAssemblyTargetLowering::LowerGlobalTLSAddress(SDValue Op
,
1690 SelectionDAG
&DAG
) const {
1692 const auto *GA
= cast
<GlobalAddressSDNode
>(Op
);
1694 MachineFunction
&MF
= DAG
.getMachineFunction();
1695 if (!MF
.getSubtarget
<WebAssemblySubtarget
>().hasBulkMemory())
1696 report_fatal_error("cannot use thread-local storage without bulk memory",
1699 const GlobalValue
*GV
= GA
->getGlobal();
1701 // Currently Emscripten does not support dynamic linking with threads.
1702 // Therefore, if we have thread-local storage, only the local-exec model
1704 // TODO: remove this and implement proper TLS models once Emscripten
1705 // supports dynamic linking with threads.
1706 if (GV
->getThreadLocalMode() != GlobalValue::LocalExecTLSModel
&&
1707 !Subtarget
->getTargetTriple().isOSEmscripten()) {
1708 report_fatal_error("only -ftls-model=local-exec is supported for now on "
1709 "non-Emscripten OSes: variable " +
1714 auto model
= GV
->getThreadLocalMode();
1716 // Unsupported TLS modes
1717 assert(model
!= GlobalValue::NotThreadLocal
);
1718 assert(model
!= GlobalValue::InitialExecTLSModel
);
1720 if (model
== GlobalValue::LocalExecTLSModel
||
1721 model
== GlobalValue::LocalDynamicTLSModel
||
1722 (model
== GlobalValue::GeneralDynamicTLSModel
&&
1723 getTargetMachine().shouldAssumeDSOLocal(*GV
->getParent(), GV
))) {
1724 // For DSO-local TLS variables we use offset from __tls_base
1726 MVT PtrVT
= getPointerTy(DAG
.getDataLayout());
1727 auto GlobalGet
= PtrVT
== MVT::i64
? WebAssembly::GLOBAL_GET_I64
1728 : WebAssembly::GLOBAL_GET_I32
;
1729 const char *BaseName
= MF
.createExternalSymbolName("__tls_base");
1732 DAG
.getMachineNode(GlobalGet
, DL
, PtrVT
,
1733 DAG
.getTargetExternalSymbol(BaseName
, PtrVT
)),
1736 SDValue TLSOffset
= DAG
.getTargetGlobalAddress(
1737 GV
, DL
, PtrVT
, GA
->getOffset(), WebAssemblyII::MO_TLS_BASE_REL
);
1739 DAG
.getNode(WebAssemblyISD::WrapperREL
, DL
, PtrVT
, TLSOffset
);
1741 return DAG
.getNode(ISD::ADD
, DL
, PtrVT
, BaseAddr
, SymOffset
);
1744 assert(model
== GlobalValue::GeneralDynamicTLSModel
);
1746 EVT VT
= Op
.getValueType();
1747 return DAG
.getNode(WebAssemblyISD::Wrapper
, DL
, VT
,
1748 DAG
.getTargetGlobalAddress(GA
->getGlobal(), DL
, VT
,
1750 WebAssemblyII::MO_GOT_TLS
));
1753 SDValue
WebAssemblyTargetLowering::LowerGlobalAddress(SDValue Op
,
1754 SelectionDAG
&DAG
) const {
1756 const auto *GA
= cast
<GlobalAddressSDNode
>(Op
);
1757 EVT VT
= Op
.getValueType();
1758 assert(GA
->getTargetFlags() == 0 &&
1759 "Unexpected target flags on generic GlobalAddressSDNode");
1760 if (!WebAssembly::isValidAddressSpace(GA
->getAddressSpace()))
1761 fail(DL
, DAG
, "Invalid address space for WebAssembly target");
1763 unsigned OperandFlags
= 0;
1764 if (isPositionIndependent()) {
1765 const GlobalValue
*GV
= GA
->getGlobal();
1766 if (getTargetMachine().shouldAssumeDSOLocal(*GV
->getParent(), GV
)) {
1767 MachineFunction
&MF
= DAG
.getMachineFunction();
1768 MVT PtrVT
= getPointerTy(MF
.getDataLayout());
1769 const char *BaseName
;
1770 if (GV
->getValueType()->isFunctionTy()) {
1771 BaseName
= MF
.createExternalSymbolName("__table_base");
1772 OperandFlags
= WebAssemblyII::MO_TABLE_BASE_REL
;
1775 BaseName
= MF
.createExternalSymbolName("__memory_base");
1776 OperandFlags
= WebAssemblyII::MO_MEMORY_BASE_REL
;
1779 DAG
.getNode(WebAssemblyISD::Wrapper
, DL
, PtrVT
,
1780 DAG
.getTargetExternalSymbol(BaseName
, PtrVT
));
1782 SDValue SymAddr
= DAG
.getNode(
1783 WebAssemblyISD::WrapperREL
, DL
, VT
,
1784 DAG
.getTargetGlobalAddress(GA
->getGlobal(), DL
, VT
, GA
->getOffset(),
1787 return DAG
.getNode(ISD::ADD
, DL
, VT
, BaseAddr
, SymAddr
);
1789 OperandFlags
= WebAssemblyII::MO_GOT
;
1792 return DAG
.getNode(WebAssemblyISD::Wrapper
, DL
, VT
,
1793 DAG
.getTargetGlobalAddress(GA
->getGlobal(), DL
, VT
,
1794 GA
->getOffset(), OperandFlags
));
1798 WebAssemblyTargetLowering::LowerExternalSymbol(SDValue Op
,
1799 SelectionDAG
&DAG
) const {
1801 const auto *ES
= cast
<ExternalSymbolSDNode
>(Op
);
1802 EVT VT
= Op
.getValueType();
1803 assert(ES
->getTargetFlags() == 0 &&
1804 "Unexpected target flags on generic ExternalSymbolSDNode");
1805 return DAG
.getNode(WebAssemblyISD::Wrapper
, DL
, VT
,
1806 DAG
.getTargetExternalSymbol(ES
->getSymbol(), VT
));
1809 SDValue
WebAssemblyTargetLowering::LowerJumpTable(SDValue Op
,
1810 SelectionDAG
&DAG
) const {
1811 // There's no need for a Wrapper node because we always incorporate a jump
1812 // table operand into a BR_TABLE instruction, rather than ever
1813 // materializing it in a register.
1814 const JumpTableSDNode
*JT
= cast
<JumpTableSDNode
>(Op
);
1815 return DAG
.getTargetJumpTable(JT
->getIndex(), Op
.getValueType(),
1816 JT
->getTargetFlags());
1819 SDValue
WebAssemblyTargetLowering::LowerBR_JT(SDValue Op
,
1820 SelectionDAG
&DAG
) const {
1822 SDValue Chain
= Op
.getOperand(0);
1823 const auto *JT
= cast
<JumpTableSDNode
>(Op
.getOperand(1));
1824 SDValue Index
= Op
.getOperand(2);
1825 assert(JT
->getTargetFlags() == 0 && "WebAssembly doesn't set target flags");
1827 SmallVector
<SDValue
, 8> Ops
;
1828 Ops
.push_back(Chain
);
1829 Ops
.push_back(Index
);
1831 MachineJumpTableInfo
*MJTI
= DAG
.getMachineFunction().getJumpTableInfo();
1832 const auto &MBBs
= MJTI
->getJumpTables()[JT
->getIndex()].MBBs
;
1834 // Add an operand for each case.
1835 for (auto MBB
: MBBs
)
1836 Ops
.push_back(DAG
.getBasicBlock(MBB
));
1838 // Add the first MBB as a dummy default target for now. This will be replaced
1839 // with the proper default target (and the preceding range check eliminated)
1840 // if possible by WebAssemblyFixBrTableDefaults.
1841 Ops
.push_back(DAG
.getBasicBlock(*MBBs
.begin()));
1842 return DAG
.getNode(WebAssemblyISD::BR_TABLE
, DL
, MVT::Other
, Ops
);
1845 SDValue
WebAssemblyTargetLowering::LowerVASTART(SDValue Op
,
1846 SelectionDAG
&DAG
) const {
1848 EVT PtrVT
= getPointerTy(DAG
.getMachineFunction().getDataLayout());
1850 auto *MFI
= DAG
.getMachineFunction().getInfo
<WebAssemblyFunctionInfo
>();
1851 const Value
*SV
= cast
<SrcValueSDNode
>(Op
.getOperand(2))->getValue();
1853 SDValue ArgN
= DAG
.getCopyFromReg(DAG
.getEntryNode(), DL
,
1854 MFI
->getVarargBufferVreg(), PtrVT
);
1855 return DAG
.getStore(Op
.getOperand(0), DL
, ArgN
, Op
.getOperand(1),
1856 MachinePointerInfo(SV
));
1859 SDValue
WebAssemblyTargetLowering::LowerIntrinsic(SDValue Op
,
1860 SelectionDAG
&DAG
) const {
1861 MachineFunction
&MF
= DAG
.getMachineFunction();
1863 switch (Op
.getOpcode()) {
1864 case ISD::INTRINSIC_VOID
:
1865 case ISD::INTRINSIC_W_CHAIN
:
1866 IntNo
= Op
.getConstantOperandVal(1);
1868 case ISD::INTRINSIC_WO_CHAIN
:
1869 IntNo
= Op
.getConstantOperandVal(0);
1872 llvm_unreachable("Invalid intrinsic");
1878 return SDValue(); // Don't custom lower most intrinsics.
1880 case Intrinsic::wasm_lsda
: {
1881 auto PtrVT
= getPointerTy(MF
.getDataLayout());
1882 const char *SymName
= MF
.createExternalSymbolName(
1883 "GCC_except_table" + std::to_string(MF
.getFunctionNumber()));
1884 if (isPositionIndependent()) {
1885 SDValue Node
= DAG
.getTargetExternalSymbol(
1886 SymName
, PtrVT
, WebAssemblyII::MO_MEMORY_BASE_REL
);
1887 const char *BaseName
= MF
.createExternalSymbolName("__memory_base");
1889 DAG
.getNode(WebAssemblyISD::Wrapper
, DL
, PtrVT
,
1890 DAG
.getTargetExternalSymbol(BaseName
, PtrVT
));
1892 DAG
.getNode(WebAssemblyISD::WrapperREL
, DL
, PtrVT
, Node
);
1893 return DAG
.getNode(ISD::ADD
, DL
, PtrVT
, BaseAddr
, SymAddr
);
1895 SDValue Node
= DAG
.getTargetExternalSymbol(SymName
, PtrVT
);
1896 return DAG
.getNode(WebAssemblyISD::Wrapper
, DL
, PtrVT
, Node
);
1899 case Intrinsic::wasm_shuffle
: {
1900 // Drop in-chain and replace undefs, but otherwise pass through unchanged
1903 Ops
[OpIdx
++] = Op
.getOperand(1);
1904 Ops
[OpIdx
++] = Op
.getOperand(2);
1905 while (OpIdx
< 18) {
1906 const SDValue
&MaskIdx
= Op
.getOperand(OpIdx
+ 1);
1907 if (MaskIdx
.isUndef() ||
1908 cast
<ConstantSDNode
>(MaskIdx
.getNode())->getZExtValue() >= 32) {
1909 Ops
[OpIdx
++] = DAG
.getConstant(0, DL
, MVT::i32
);
1911 Ops
[OpIdx
++] = MaskIdx
;
1914 return DAG
.getNode(WebAssemblyISD::SHUFFLE
, DL
, Op
.getValueType(), Ops
);
1920 WebAssemblyTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op
,
1921 SelectionDAG
&DAG
) const {
1923 // If sign extension operations are disabled, allow sext_inreg only if operand
1924 // is a vector extract of an i8 or i16 lane. SIMD does not depend on sign
1925 // extension operations, but allowing sext_inreg in this context lets us have
1926 // simple patterns to select extract_lane_s instructions. Expanding sext_inreg
1927 // everywhere would be simpler in this file, but would necessitate large and
1928 // brittle patterns to undo the expansion and select extract_lane_s
1930 assert(!Subtarget
->hasSignExt() && Subtarget
->hasSIMD128());
1931 if (Op
.getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT
)
1934 const SDValue
&Extract
= Op
.getOperand(0);
1935 MVT VecT
= Extract
.getOperand(0).getSimpleValueType();
1936 if (VecT
.getVectorElementType().getSizeInBits() > 32)
1938 MVT ExtractedLaneT
=
1939 cast
<VTSDNode
>(Op
.getOperand(1).getNode())->getVT().getSimpleVT();
1941 MVT::getVectorVT(ExtractedLaneT
, 128 / ExtractedLaneT
.getSizeInBits());
1942 if (ExtractedVecT
== VecT
)
1945 // Bitcast vector to appropriate type to ensure ISel pattern coverage
1946 const SDNode
*Index
= Extract
.getOperand(1).getNode();
1947 if (!isa
<ConstantSDNode
>(Index
))
1949 unsigned IndexVal
= cast
<ConstantSDNode
>(Index
)->getZExtValue();
1951 ExtractedVecT
.getVectorNumElements() / VecT
.getVectorNumElements();
1954 DAG
.getConstant(IndexVal
* Scale
, DL
, Index
->getValueType(0));
1955 SDValue NewExtract
= DAG
.getNode(
1956 ISD::EXTRACT_VECTOR_ELT
, DL
, Extract
.getValueType(),
1957 DAG
.getBitcast(ExtractedVecT
, Extract
.getOperand(0)), NewIndex
);
1958 return DAG
.getNode(ISD::SIGN_EXTEND_INREG
, DL
, Op
.getValueType(), NewExtract
,
1962 static SDValue
LowerConvertLow(SDValue Op
, SelectionDAG
&DAG
) {
1964 if (Op
.getValueType() != MVT::v2f64
)
1967 auto GetConvertedLane
= [](SDValue Op
, unsigned &Opcode
, SDValue
&SrcVec
,
1968 unsigned &Index
) -> bool {
1969 switch (Op
.getOpcode()) {
1970 case ISD::SINT_TO_FP
:
1971 Opcode
= WebAssemblyISD::CONVERT_LOW_S
;
1973 case ISD::UINT_TO_FP
:
1974 Opcode
= WebAssemblyISD::CONVERT_LOW_U
;
1976 case ISD::FP_EXTEND
:
1977 Opcode
= WebAssemblyISD::PROMOTE_LOW
;
1983 auto ExtractVector
= Op
.getOperand(0);
1984 if (ExtractVector
.getOpcode() != ISD::EXTRACT_VECTOR_ELT
)
1987 if (!isa
<ConstantSDNode
>(ExtractVector
.getOperand(1).getNode()))
1990 SrcVec
= ExtractVector
.getOperand(0);
1991 Index
= ExtractVector
.getConstantOperandVal(1);
1995 unsigned LHSOpcode
, RHSOpcode
, LHSIndex
, RHSIndex
;
1996 SDValue LHSSrcVec
, RHSSrcVec
;
1997 if (!GetConvertedLane(Op
.getOperand(0), LHSOpcode
, LHSSrcVec
, LHSIndex
) ||
1998 !GetConvertedLane(Op
.getOperand(1), RHSOpcode
, RHSSrcVec
, RHSIndex
))
2001 if (LHSOpcode
!= RHSOpcode
)
2005 switch (LHSOpcode
) {
2006 case WebAssemblyISD::CONVERT_LOW_S
:
2007 case WebAssemblyISD::CONVERT_LOW_U
:
2008 ExpectedSrcVT
= MVT::v4i32
;
2010 case WebAssemblyISD::PROMOTE_LOW
:
2011 ExpectedSrcVT
= MVT::v4f32
;
2014 if (LHSSrcVec
.getValueType() != ExpectedSrcVT
)
2017 auto Src
= LHSSrcVec
;
2018 if (LHSIndex
!= 0 || RHSIndex
!= 1 || LHSSrcVec
!= RHSSrcVec
) {
2019 // Shuffle the source vector so that the converted lanes are the low lanes.
2020 Src
= DAG
.getVectorShuffle(
2021 ExpectedSrcVT
, DL
, LHSSrcVec
, RHSSrcVec
,
2022 {static_cast<int>(LHSIndex
), static_cast<int>(RHSIndex
) + 4, -1, -1});
2024 return DAG
.getNode(LHSOpcode
, DL
, MVT::v2f64
, Src
);
2027 SDValue
WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op
,
2028 SelectionDAG
&DAG
) const {
2029 if (auto ConvertLow
= LowerConvertLow(Op
, DAG
))
2033 const EVT VecT
= Op
.getValueType();
2034 const EVT LaneT
= Op
.getOperand(0).getValueType();
2035 const size_t Lanes
= Op
.getNumOperands();
2036 bool CanSwizzle
= VecT
== MVT::v16i8
;
2038 // BUILD_VECTORs are lowered to the instruction that initializes the highest
2039 // possible number of lanes at once followed by a sequence of replace_lane
2040 // instructions to individually initialize any remaining lanes.
2042 // TODO: Tune this. For example, lanewise swizzling is very expensive, so
2043 // swizzled lanes should be given greater weight.
2045 // TODO: Investigate looping rather than always extracting/replacing specific
2046 // lanes to fill gaps.
2048 auto IsConstant
= [](const SDValue
&V
) {
2049 return V
.getOpcode() == ISD::Constant
|| V
.getOpcode() == ISD::ConstantFP
;
2052 // Returns the source vector and index vector pair if they exist. Checks for:
2053 // (extract_vector_elt
2055 // (sign_extend_inreg (extract_vector_elt $indices, $i))
2057 auto GetSwizzleSrcs
= [](size_t I
, const SDValue
&Lane
) {
2058 auto Bail
= std::make_pair(SDValue(), SDValue());
2059 if (Lane
->getOpcode() != ISD::EXTRACT_VECTOR_ELT
)
2061 const SDValue
&SwizzleSrc
= Lane
->getOperand(0);
2062 const SDValue
&IndexExt
= Lane
->getOperand(1);
2063 if (IndexExt
->getOpcode() != ISD::SIGN_EXTEND_INREG
)
2065 const SDValue
&Index
= IndexExt
->getOperand(0);
2066 if (Index
->getOpcode() != ISD::EXTRACT_VECTOR_ELT
)
2068 const SDValue
&SwizzleIndices
= Index
->getOperand(0);
2069 if (SwizzleSrc
.getValueType() != MVT::v16i8
||
2070 SwizzleIndices
.getValueType() != MVT::v16i8
||
2071 Index
->getOperand(1)->getOpcode() != ISD::Constant
||
2072 Index
->getConstantOperandVal(1) != I
)
2074 return std::make_pair(SwizzleSrc
, SwizzleIndices
);
2077 // If the lane is extracted from another vector at a constant index, return
2078 // that vector. The source vector must not have more lanes than the dest
2079 // because the shufflevector indices are in terms of the destination lanes and
2080 // would not be able to address the smaller individual source lanes.
2081 auto GetShuffleSrc
= [&](const SDValue
&Lane
) {
2082 if (Lane
->getOpcode() != ISD::EXTRACT_VECTOR_ELT
)
2084 if (!isa
<ConstantSDNode
>(Lane
->getOperand(1).getNode()))
2086 if (Lane
->getOperand(0).getValueType().getVectorNumElements() >
2087 VecT
.getVectorNumElements())
2089 return Lane
->getOperand(0);
2092 using ValueEntry
= std::pair
<SDValue
, size_t>;
2093 SmallVector
<ValueEntry
, 16> SplatValueCounts
;
2095 using SwizzleEntry
= std::pair
<std::pair
<SDValue
, SDValue
>, size_t>;
2096 SmallVector
<SwizzleEntry
, 16> SwizzleCounts
;
2098 using ShuffleEntry
= std::pair
<SDValue
, size_t>;
2099 SmallVector
<ShuffleEntry
, 16> ShuffleCounts
;
2101 auto AddCount
= [](auto &Counts
, const auto &Val
) {
2103 llvm::find_if(Counts
, [&Val
](auto E
) { return E
.first
== Val
; });
2104 if (CountIt
== Counts
.end()) {
2105 Counts
.emplace_back(Val
, 1);
2111 auto GetMostCommon
= [](auto &Counts
) {
2113 std::max_element(Counts
.begin(), Counts
.end(),
2114 [](auto A
, auto B
) { return A
.second
< B
.second
; });
2115 assert(CommonIt
!= Counts
.end() && "Unexpected all-undef build_vector");
2119 size_t NumConstantLanes
= 0;
2121 // Count eligible lanes for each type of vector creation op
2122 for (size_t I
= 0; I
< Lanes
; ++I
) {
2123 const SDValue
&Lane
= Op
->getOperand(I
);
2127 AddCount(SplatValueCounts
, Lane
);
2129 if (IsConstant(Lane
))
2131 if (auto ShuffleSrc
= GetShuffleSrc(Lane
))
2132 AddCount(ShuffleCounts
, ShuffleSrc
);
2134 auto SwizzleSrcs
= GetSwizzleSrcs(I
, Lane
);
2135 if (SwizzleSrcs
.first
)
2136 AddCount(SwizzleCounts
, SwizzleSrcs
);
2141 size_t NumSplatLanes
;
2142 std::tie(SplatValue
, NumSplatLanes
) = GetMostCommon(SplatValueCounts
);
2145 SDValue SwizzleIndices
;
2146 size_t NumSwizzleLanes
= 0;
2147 if (SwizzleCounts
.size())
2148 std::forward_as_tuple(std::tie(SwizzleSrc
, SwizzleIndices
),
2149 NumSwizzleLanes
) = GetMostCommon(SwizzleCounts
);
2151 // Shuffles can draw from up to two vectors, so find the two most common
2153 SDValue ShuffleSrc1
, ShuffleSrc2
;
2154 size_t NumShuffleLanes
= 0;
2155 if (ShuffleCounts
.size()) {
2156 std::tie(ShuffleSrc1
, NumShuffleLanes
) = GetMostCommon(ShuffleCounts
);
2157 llvm::erase_if(ShuffleCounts
,
2158 [&](const auto &Pair
) { return Pair
.first
== ShuffleSrc1
; });
2160 if (ShuffleCounts
.size()) {
2161 size_t AdditionalShuffleLanes
;
2162 std::tie(ShuffleSrc2
, AdditionalShuffleLanes
) =
2163 GetMostCommon(ShuffleCounts
);
2164 NumShuffleLanes
+= AdditionalShuffleLanes
;
2167 // Predicate returning true if the lane is properly initialized by the
2168 // original instruction
2169 std::function
<bool(size_t, const SDValue
&)> IsLaneConstructed
;
2171 // Prefer swizzles over shuffles over vector consts over splats
2172 if (NumSwizzleLanes
>= NumShuffleLanes
&&
2173 NumSwizzleLanes
>= NumConstantLanes
&& NumSwizzleLanes
>= NumSplatLanes
) {
2174 Result
= DAG
.getNode(WebAssemblyISD::SWIZZLE
, DL
, VecT
, SwizzleSrc
,
2176 auto Swizzled
= std::make_pair(SwizzleSrc
, SwizzleIndices
);
2177 IsLaneConstructed
= [&, Swizzled
](size_t I
, const SDValue
&Lane
) {
2178 return Swizzled
== GetSwizzleSrcs(I
, Lane
);
2180 } else if (NumShuffleLanes
>= NumConstantLanes
&&
2181 NumShuffleLanes
>= NumSplatLanes
) {
2182 size_t DestLaneSize
= VecT
.getVectorElementType().getFixedSizeInBits() / 8;
2183 size_t DestLaneCount
= VecT
.getVectorNumElements();
2186 SDValue Src1
= ShuffleSrc1
;
2187 SDValue Src2
= ShuffleSrc2
? ShuffleSrc2
: DAG
.getUNDEF(VecT
);
2188 if (Src1
.getValueType() != VecT
) {
2190 Src1
.getValueType().getVectorElementType().getFixedSizeInBits() / 8;
2191 assert(LaneSize
> DestLaneSize
);
2192 Scale1
= LaneSize
/ DestLaneSize
;
2193 Src1
= DAG
.getBitcast(VecT
, Src1
);
2195 if (Src2
.getValueType() != VecT
) {
2197 Src2
.getValueType().getVectorElementType().getFixedSizeInBits() / 8;
2198 assert(LaneSize
> DestLaneSize
);
2199 Scale2
= LaneSize
/ DestLaneSize
;
2200 Src2
= DAG
.getBitcast(VecT
, Src2
);
2204 assert(DestLaneCount
<= 16);
2205 for (size_t I
= 0; I
< DestLaneCount
; ++I
) {
2206 const SDValue
&Lane
= Op
->getOperand(I
);
2207 SDValue Src
= GetShuffleSrc(Lane
);
2208 if (Src
== ShuffleSrc1
) {
2209 Mask
[I
] = Lane
->getConstantOperandVal(1) * Scale1
;
2210 } else if (Src
&& Src
== ShuffleSrc2
) {
2211 Mask
[I
] = DestLaneCount
+ Lane
->getConstantOperandVal(1) * Scale2
;
2216 ArrayRef
<int> MaskRef(Mask
, DestLaneCount
);
2217 Result
= DAG
.getVectorShuffle(VecT
, DL
, Src1
, Src2
, MaskRef
);
2218 IsLaneConstructed
= [&](size_t, const SDValue
&Lane
) {
2219 auto Src
= GetShuffleSrc(Lane
);
2220 return Src
== ShuffleSrc1
|| (Src
&& Src
== ShuffleSrc2
);
2222 } else if (NumConstantLanes
>= NumSplatLanes
) {
2223 SmallVector
<SDValue
, 16> ConstLanes
;
2224 for (const SDValue
&Lane
: Op
->op_values()) {
2225 if (IsConstant(Lane
)) {
2226 // Values may need to be fixed so that they will sign extend to be
2227 // within the expected range during ISel. Check whether the value is in
2228 // bounds based on the lane bit width and if it is out of bounds, lop
2229 // off the extra bits and subtract 2^n to reflect giving the high bit
2230 // value -2^(n-1) rather than +2^(n-1). Skip the i64 case because it
2231 // cannot possibly be out of range.
2232 auto *Const
= dyn_cast
<ConstantSDNode
>(Lane
.getNode());
2233 int64_t Val
= Const
? Const
->getSExtValue() : 0;
2234 uint64_t LaneBits
= 128 / Lanes
;
2235 assert((LaneBits
== 64 || Val
>= -(1ll << (LaneBits
- 1))) &&
2236 "Unexpected out of bounds negative value");
2237 if (Const
&& LaneBits
!= 64 && Val
> (1ll << (LaneBits
- 1)) - 1) {
2238 auto NewVal
= ((uint64_t)Val
% (1ll << LaneBits
)) - (1ll << LaneBits
);
2239 ConstLanes
.push_back(DAG
.getConstant(NewVal
, SDLoc(Lane
), LaneT
));
2241 ConstLanes
.push_back(Lane
);
2243 } else if (LaneT
.isFloatingPoint()) {
2244 ConstLanes
.push_back(DAG
.getConstantFP(0, DL
, LaneT
));
2246 ConstLanes
.push_back(DAG
.getConstant(0, DL
, LaneT
));
2249 Result
= DAG
.getBuildVector(VecT
, DL
, ConstLanes
);
2250 IsLaneConstructed
= [&IsConstant
](size_t _
, const SDValue
&Lane
) {
2251 return IsConstant(Lane
);
2254 // Use a splat, but possibly a load_splat
2255 LoadSDNode
*SplattedLoad
;
2256 if ((SplattedLoad
= dyn_cast
<LoadSDNode
>(SplatValue
)) &&
2257 SplattedLoad
->getMemoryVT() == VecT
.getVectorElementType()) {
2258 Result
= DAG
.getMemIntrinsicNode(
2259 WebAssemblyISD::LOAD_SPLAT
, DL
, DAG
.getVTList(VecT
),
2260 {SplattedLoad
->getChain(), SplattedLoad
->getBasePtr(),
2261 SplattedLoad
->getOffset()},
2262 SplattedLoad
->getMemoryVT(), SplattedLoad
->getMemOperand());
2264 Result
= DAG
.getSplatBuildVector(VecT
, DL
, SplatValue
);
2266 IsLaneConstructed
= [&SplatValue
](size_t _
, const SDValue
&Lane
) {
2267 return Lane
== SplatValue
;
2272 assert(IsLaneConstructed
);
2274 // Add replace_lane instructions for any unhandled values
2275 for (size_t I
= 0; I
< Lanes
; ++I
) {
2276 const SDValue
&Lane
= Op
->getOperand(I
);
2277 if (!Lane
.isUndef() && !IsLaneConstructed(I
, Lane
))
2278 Result
= DAG
.getNode(ISD::INSERT_VECTOR_ELT
, DL
, VecT
, Result
, Lane
,
2279 DAG
.getConstant(I
, DL
, MVT::i32
));
2286 WebAssemblyTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op
,
2287 SelectionDAG
&DAG
) const {
2289 ArrayRef
<int> Mask
= cast
<ShuffleVectorSDNode
>(Op
.getNode())->getMask();
2290 MVT VecType
= Op
.getOperand(0).getSimpleValueType();
2291 assert(VecType
.is128BitVector() && "Unexpected shuffle vector type");
2292 size_t LaneBytes
= VecType
.getVectorElementType().getSizeInBits() / 8;
2294 // Space for two vector args and sixteen mask indices
2297 Ops
[OpIdx
++] = Op
.getOperand(0);
2298 Ops
[OpIdx
++] = Op
.getOperand(1);
2300 // Expand mask indices to byte indices and materialize them as operands
2301 for (int M
: Mask
) {
2302 for (size_t J
= 0; J
< LaneBytes
; ++J
) {
2303 // Lower undefs (represented by -1 in mask) to zero
2304 uint64_t ByteIndex
= M
== -1 ? 0 : (uint64_t)M
* LaneBytes
+ J
;
2305 Ops
[OpIdx
++] = DAG
.getConstant(ByteIndex
, DL
, MVT::i32
);
2309 return DAG
.getNode(WebAssemblyISD::SHUFFLE
, DL
, Op
.getValueType(), Ops
);
2312 SDValue
WebAssemblyTargetLowering::LowerSETCC(SDValue Op
,
2313 SelectionDAG
&DAG
) const {
2315 // The legalizer does not know how to expand the unsupported comparison modes
2316 // of i64x2 vectors, so we manually unroll them here.
2317 assert(Op
->getOperand(0)->getSimpleValueType(0) == MVT::v2i64
);
2318 SmallVector
<SDValue
, 2> LHS
, RHS
;
2319 DAG
.ExtractVectorElements(Op
->getOperand(0), LHS
);
2320 DAG
.ExtractVectorElements(Op
->getOperand(1), RHS
);
2321 const SDValue
&CC
= Op
->getOperand(2);
2322 auto MakeLane
= [&](unsigned I
) {
2323 return DAG
.getNode(ISD::SELECT_CC
, DL
, MVT::i64
, LHS
[I
], RHS
[I
],
2324 DAG
.getConstant(uint64_t(-1), DL
, MVT::i64
),
2325 DAG
.getConstant(uint64_t(0), DL
, MVT::i64
), CC
);
2327 return DAG
.getBuildVector(Op
->getValueType(0), DL
,
2328 {MakeLane(0), MakeLane(1)});
2332 WebAssemblyTargetLowering::LowerAccessVectorElement(SDValue Op
,
2333 SelectionDAG
&DAG
) const {
2334 // Allow constant lane indices, expand variable lane indices
2335 SDNode
*IdxNode
= Op
.getOperand(Op
.getNumOperands() - 1).getNode();
2336 if (isa
<ConstantSDNode
>(IdxNode
) || IdxNode
->isUndef())
2339 // Perform default expansion
2343 static SDValue
unrollVectorShift(SDValue Op
, SelectionDAG
&DAG
) {
2344 EVT LaneT
= Op
.getSimpleValueType().getVectorElementType();
2345 // 32-bit and 64-bit unrolled shifts will have proper semantics
2346 if (LaneT
.bitsGE(MVT::i32
))
2347 return DAG
.UnrollVectorOp(Op
.getNode());
2348 // Otherwise mask the shift value to get proper semantics from 32-bit shift
2350 size_t NumLanes
= Op
.getSimpleValueType().getVectorNumElements();
2351 SDValue Mask
= DAG
.getConstant(LaneT
.getSizeInBits() - 1, DL
, MVT::i32
);
2352 unsigned ShiftOpcode
= Op
.getOpcode();
2353 SmallVector
<SDValue
, 16> ShiftedElements
;
2354 DAG
.ExtractVectorElements(Op
.getOperand(0), ShiftedElements
, 0, 0, MVT::i32
);
2355 SmallVector
<SDValue
, 16> ShiftElements
;
2356 DAG
.ExtractVectorElements(Op
.getOperand(1), ShiftElements
, 0, 0, MVT::i32
);
2357 SmallVector
<SDValue
, 16> UnrolledOps
;
2358 for (size_t i
= 0; i
< NumLanes
; ++i
) {
2359 SDValue MaskedShiftValue
=
2360 DAG
.getNode(ISD::AND
, DL
, MVT::i32
, ShiftElements
[i
], Mask
);
2361 SDValue ShiftedValue
= ShiftedElements
[i
];
2362 if (ShiftOpcode
== ISD::SRA
)
2363 ShiftedValue
= DAG
.getNode(ISD::SIGN_EXTEND_INREG
, DL
, MVT::i32
,
2364 ShiftedValue
, DAG
.getValueType(LaneT
));
2365 UnrolledOps
.push_back(
2366 DAG
.getNode(ShiftOpcode
, DL
, MVT::i32
, ShiftedValue
, MaskedShiftValue
));
2368 return DAG
.getBuildVector(Op
.getValueType(), DL
, UnrolledOps
);
2371 SDValue
WebAssemblyTargetLowering::LowerShift(SDValue Op
,
2372 SelectionDAG
&DAG
) const {
2375 // Only manually lower vector shifts
2376 assert(Op
.getSimpleValueType().isVector());
2378 auto ShiftVal
= DAG
.getSplatValue(Op
.getOperand(1));
2380 return unrollVectorShift(Op
, DAG
);
2382 // Use anyext because none of the high bits can affect the shift
2383 ShiftVal
= DAG
.getAnyExtOrTrunc(ShiftVal
, DL
, MVT::i32
);
2386 switch (Op
.getOpcode()) {
2388 Opcode
= WebAssemblyISD::VEC_SHL
;
2391 Opcode
= WebAssemblyISD::VEC_SHR_S
;
2394 Opcode
= WebAssemblyISD::VEC_SHR_U
;
2397 llvm_unreachable("unexpected opcode");
2400 return DAG
.getNode(Opcode
, DL
, Op
.getValueType(), Op
.getOperand(0), ShiftVal
);
2403 SDValue
WebAssemblyTargetLowering::LowerFP_TO_INT_SAT(SDValue Op
,
2404 SelectionDAG
&DAG
) const {
2406 EVT ResT
= Op
.getValueType();
2407 EVT SatVT
= cast
<VTSDNode
>(Op
.getOperand(1))->getVT();
2409 if ((ResT
== MVT::i32
|| ResT
== MVT::i64
) &&
2410 (SatVT
== MVT::i32
|| SatVT
== MVT::i64
))
2413 if (ResT
== MVT::v4i32
&& SatVT
== MVT::i32
)
2419 //===----------------------------------------------------------------------===//
2420 // Custom DAG combine hooks
2421 //===----------------------------------------------------------------------===//
2423 performVECTOR_SHUFFLECombine(SDNode
*N
, TargetLowering::DAGCombinerInfo
&DCI
) {
2424 auto &DAG
= DCI
.DAG
;
2425 auto Shuffle
= cast
<ShuffleVectorSDNode
>(N
);
2427 // Hoist vector bitcasts that don't change the number of lanes out of unary
2428 // shuffles, where they are less likely to get in the way of other combines.
2429 // (shuffle (vNxT1 (bitcast (vNxT0 x))), undef, mask) ->
2430 // (vNxT1 (bitcast (vNxT0 (shuffle x, undef, mask))))
2431 SDValue Bitcast
= N
->getOperand(0);
2432 if (Bitcast
.getOpcode() != ISD::BITCAST
)
2434 if (!N
->getOperand(1).isUndef())
2436 SDValue CastOp
= Bitcast
.getOperand(0);
2437 MVT SrcType
= CastOp
.getSimpleValueType();
2438 MVT DstType
= Bitcast
.getSimpleValueType();
2439 if (!SrcType
.is128BitVector() ||
2440 SrcType
.getVectorNumElements() != DstType
.getVectorNumElements())
2442 SDValue NewShuffle
= DAG
.getVectorShuffle(
2443 SrcType
, SDLoc(N
), CastOp
, DAG
.getUNDEF(SrcType
), Shuffle
->getMask());
2444 return DAG
.getBitcast(DstType
, NewShuffle
);
2448 performVectorExtendCombine(SDNode
*N
, TargetLowering::DAGCombinerInfo
&DCI
) {
2449 auto &DAG
= DCI
.DAG
;
2450 assert(N
->getOpcode() == ISD::SIGN_EXTEND
||
2451 N
->getOpcode() == ISD::ZERO_EXTEND
);
2453 // Combine ({s,z}ext (extract_subvector src, i)) into a widening operation if
2454 // possible before the extract_subvector can be expanded.
2455 auto Extract
= N
->getOperand(0);
2456 if (Extract
.getOpcode() != ISD::EXTRACT_SUBVECTOR
)
2458 auto Source
= Extract
.getOperand(0);
2459 auto *IndexNode
= dyn_cast
<ConstantSDNode
>(Extract
.getOperand(1));
2460 if (IndexNode
== nullptr)
2462 auto Index
= IndexNode
->getZExtValue();
2464 // Only v8i8, v4i16, and v2i32 extracts can be widened, and only if the
2465 // extracted subvector is the low or high half of its source.
2466 EVT ResVT
= N
->getValueType(0);
2467 if (ResVT
== MVT::v8i16
) {
2468 if (Extract
.getValueType() != MVT::v8i8
||
2469 Source
.getValueType() != MVT::v16i8
|| (Index
!= 0 && Index
!= 8))
2471 } else if (ResVT
== MVT::v4i32
) {
2472 if (Extract
.getValueType() != MVT::v4i16
||
2473 Source
.getValueType() != MVT::v8i16
|| (Index
!= 0 && Index
!= 4))
2475 } else if (ResVT
== MVT::v2i64
) {
2476 if (Extract
.getValueType() != MVT::v2i32
||
2477 Source
.getValueType() != MVT::v4i32
|| (Index
!= 0 && Index
!= 2))
2483 bool IsSext
= N
->getOpcode() == ISD::SIGN_EXTEND
;
2484 bool IsLow
= Index
== 0;
2486 unsigned Op
= IsSext
? (IsLow
? WebAssemblyISD::EXTEND_LOW_S
2487 : WebAssemblyISD::EXTEND_HIGH_S
)
2488 : (IsLow
? WebAssemblyISD::EXTEND_LOW_U
2489 : WebAssemblyISD::EXTEND_HIGH_U
);
2491 return DAG
.getNode(Op
, SDLoc(N
), ResVT
, Source
);
2495 performVectorTruncZeroCombine(SDNode
*N
, TargetLowering::DAGCombinerInfo
&DCI
) {
2496 auto &DAG
= DCI
.DAG
;
2498 auto GetWasmConversionOp
= [](unsigned Op
) {
2500 case ISD::FP_TO_SINT_SAT
:
2501 return WebAssemblyISD::TRUNC_SAT_ZERO_S
;
2502 case ISD::FP_TO_UINT_SAT
:
2503 return WebAssemblyISD::TRUNC_SAT_ZERO_U
;
2505 return WebAssemblyISD::DEMOTE_ZERO
;
2507 llvm_unreachable("unexpected op");
2510 auto IsZeroSplat
= [](SDValue SplatVal
) {
2511 auto *Splat
= dyn_cast
<BuildVectorSDNode
>(SplatVal
.getNode());
2512 APInt SplatValue
, SplatUndef
;
2513 unsigned SplatBitSize
;
2516 Splat
->isConstantSplat(SplatValue
, SplatUndef
, SplatBitSize
,
2521 if (N
->getOpcode() == ISD::CONCAT_VECTORS
) {
2524 // (concat_vectors (v2i32 (fp_to_{s,u}int_sat $x, 32)), (v2i32 (splat 0)))
2526 // into (i32x4.trunc_sat_f64x2_zero_{s,u} $x).
2530 // (concat_vectors (v2f32 (fp_round (v2f64 $x))), (v2f32 (splat 0)))
2532 // into (f32x4.demote_zero_f64x2 $x).
2534 EVT ExpectedConversionType
;
2535 auto Conversion
= N
->getOperand(0);
2536 auto ConversionOp
= Conversion
.getOpcode();
2537 switch (ConversionOp
) {
2538 case ISD::FP_TO_SINT_SAT
:
2539 case ISD::FP_TO_UINT_SAT
:
2541 ExpectedConversionType
= MVT::v2i32
;
2545 ExpectedConversionType
= MVT::v2f32
;
2551 if (N
->getValueType(0) != ResVT
)
2554 if (Conversion
.getValueType() != ExpectedConversionType
)
2557 auto Source
= Conversion
.getOperand(0);
2558 if (Source
.getValueType() != MVT::v2f64
)
2561 if (!IsZeroSplat(N
->getOperand(1)) ||
2562 N
->getOperand(1).getValueType() != ExpectedConversionType
)
2565 unsigned Op
= GetWasmConversionOp(ConversionOp
);
2566 return DAG
.getNode(Op
, SDLoc(N
), ResVT
, Source
);
2571 // (fp_to_{s,u}int_sat (concat_vectors $x, (v2f64 (splat 0))), 32)
2573 // into (i32x4.trunc_sat_f64x2_zero_{s,u} $x).
2577 // (v4f32 (fp_round (concat_vectors $x, (v2f64 (splat 0)))))
2579 // into (f32x4.demote_zero_f64x2 $x).
2581 auto ConversionOp
= N
->getOpcode();
2582 switch (ConversionOp
) {
2583 case ISD::FP_TO_SINT_SAT
:
2584 case ISD::FP_TO_UINT_SAT
:
2591 llvm_unreachable("unexpected op");
2594 if (N
->getValueType(0) != ResVT
)
2597 auto Concat
= N
->getOperand(0);
2598 if (Concat
.getValueType() != MVT::v4f64
)
2601 auto Source
= Concat
.getOperand(0);
2602 if (Source
.getValueType() != MVT::v2f64
)
2605 if (!IsZeroSplat(Concat
.getOperand(1)) ||
2606 Concat
.getOperand(1).getValueType() != MVT::v2f64
)
2609 unsigned Op
= GetWasmConversionOp(ConversionOp
);
2610 return DAG
.getNode(Op
, SDLoc(N
), ResVT
, Source
);
2613 // Helper to extract VectorWidth bits from Vec, starting from IdxVal.
2614 static SDValue
extractSubVector(SDValue Vec
, unsigned IdxVal
, SelectionDAG
&DAG
,
2615 const SDLoc
&DL
, unsigned VectorWidth
) {
2616 EVT VT
= Vec
.getValueType();
2617 EVT ElVT
= VT
.getVectorElementType();
2618 unsigned Factor
= VT
.getSizeInBits() / VectorWidth
;
2619 EVT ResultVT
= EVT::getVectorVT(*DAG
.getContext(), ElVT
,
2620 VT
.getVectorNumElements() / Factor
);
2622 // Extract the relevant VectorWidth bits. Generate an EXTRACT_SUBVECTOR
2623 unsigned ElemsPerChunk
= VectorWidth
/ ElVT
.getSizeInBits();
2624 assert(isPowerOf2_32(ElemsPerChunk
) && "Elements per chunk not power of 2");
2626 // This is the index of the first element of the VectorWidth-bit chunk
2627 // we want. Since ElemsPerChunk is a power of 2 just need to clear bits.
2628 IdxVal
&= ~(ElemsPerChunk
- 1);
2630 // If the input is a buildvector just emit a smaller one.
2631 if (Vec
.getOpcode() == ISD::BUILD_VECTOR
)
2632 return DAG
.getBuildVector(ResultVT
, DL
,
2633 Vec
->ops().slice(IdxVal
, ElemsPerChunk
));
2635 SDValue VecIdx
= DAG
.getIntPtrConstant(IdxVal
, DL
);
2636 return DAG
.getNode(ISD::EXTRACT_SUBVECTOR
, DL
, ResultVT
, Vec
, VecIdx
);
2639 // Helper to recursively truncate vector elements in half with NARROW_U. DstVT
2640 // is the expected destination value type after recursion. In is the initial
2641 // input. Note that the input should have enough leading zero bits to prevent
2642 // NARROW_U from saturating results.
2643 static SDValue
truncateVectorWithNARROW(EVT DstVT
, SDValue In
, const SDLoc
&DL
,
2644 SelectionDAG
&DAG
) {
2645 EVT SrcVT
= In
.getValueType();
2647 // No truncation required, we might get here due to recursive calls.
2651 unsigned SrcSizeInBits
= SrcVT
.getSizeInBits();
2652 unsigned NumElems
= SrcVT
.getVectorNumElements();
2653 if (!isPowerOf2_32(NumElems
))
2655 assert(DstVT
.getVectorNumElements() == NumElems
&& "Illegal truncation");
2656 assert(SrcSizeInBits
> DstVT
.getSizeInBits() && "Illegal truncation");
2658 LLVMContext
&Ctx
= *DAG
.getContext();
2659 EVT PackedSVT
= EVT::getIntegerVT(Ctx
, SrcVT
.getScalarSizeInBits() / 2);
2661 // Narrow to the largest type possible:
2662 // vXi64/vXi32 -> i16x8.narrow_i32x4_u and vXi16 -> i8x16.narrow_i16x8_u.
2663 EVT InVT
= MVT::i16
, OutVT
= MVT::i8
;
2664 if (SrcVT
.getScalarSizeInBits() > 16) {
2668 unsigned SubSizeInBits
= SrcSizeInBits
/ 2;
2669 InVT
= EVT::getVectorVT(Ctx
, InVT
, SubSizeInBits
/ InVT
.getSizeInBits());
2670 OutVT
= EVT::getVectorVT(Ctx
, OutVT
, SubSizeInBits
/ OutVT
.getSizeInBits());
2672 // Split lower/upper subvectors.
2673 SDValue Lo
= extractSubVector(In
, 0, DAG
, DL
, SubSizeInBits
);
2674 SDValue Hi
= extractSubVector(In
, NumElems
/ 2, DAG
, DL
, SubSizeInBits
);
2676 // 256bit -> 128bit truncate - Narrow lower/upper 128-bit subvectors.
2677 if (SrcVT
.is256BitVector() && DstVT
.is128BitVector()) {
2678 Lo
= DAG
.getBitcast(InVT
, Lo
);
2679 Hi
= DAG
.getBitcast(InVT
, Hi
);
2680 SDValue Res
= DAG
.getNode(WebAssemblyISD::NARROW_U
, DL
, OutVT
, Lo
, Hi
);
2681 return DAG
.getBitcast(DstVT
, Res
);
2684 // Recursively narrow lower/upper subvectors, concat result and narrow again.
2685 EVT PackedVT
= EVT::getVectorVT(Ctx
, PackedSVT
, NumElems
/ 2);
2686 Lo
= truncateVectorWithNARROW(PackedVT
, Lo
, DL
, DAG
);
2687 Hi
= truncateVectorWithNARROW(PackedVT
, Hi
, DL
, DAG
);
2689 PackedVT
= EVT::getVectorVT(Ctx
, PackedSVT
, NumElems
);
2690 SDValue Res
= DAG
.getNode(ISD::CONCAT_VECTORS
, DL
, PackedVT
, Lo
, Hi
);
2691 return truncateVectorWithNARROW(DstVT
, Res
, DL
, DAG
);
2694 static SDValue
performTruncateCombine(SDNode
*N
,
2695 TargetLowering::DAGCombinerInfo
&DCI
) {
2696 auto &DAG
= DCI
.DAG
;
2698 SDValue In
= N
->getOperand(0);
2699 EVT InVT
= In
.getValueType();
2700 if (!InVT
.isSimple())
2703 EVT OutVT
= N
->getValueType(0);
2704 if (!OutVT
.isVector())
2707 EVT OutSVT
= OutVT
.getVectorElementType();
2708 EVT InSVT
= InVT
.getVectorElementType();
2709 // Currently only cover truncate to v16i8 or v8i16.
2710 if (!((InSVT
== MVT::i16
|| InSVT
== MVT::i32
|| InSVT
== MVT::i64
) &&
2711 (OutSVT
== MVT::i8
|| OutSVT
== MVT::i16
) && OutVT
.is128BitVector()))
2715 APInt Mask
= APInt::getLowBitsSet(InVT
.getScalarSizeInBits(),
2716 OutVT
.getScalarSizeInBits());
2717 In
= DAG
.getNode(ISD::AND
, DL
, InVT
, In
, DAG
.getConstant(Mask
, DL
, InVT
));
2718 return truncateVectorWithNARROW(OutVT
, In
, DL
, DAG
);
2722 WebAssemblyTargetLowering::PerformDAGCombine(SDNode
*N
,
2723 DAGCombinerInfo
&DCI
) const {
2724 switch (N
->getOpcode()) {
2727 case ISD::VECTOR_SHUFFLE
:
2728 return performVECTOR_SHUFFLECombine(N
, DCI
);
2729 case ISD::SIGN_EXTEND
:
2730 case ISD::ZERO_EXTEND
:
2731 return performVectorExtendCombine(N
, DCI
);
2732 case ISD::FP_TO_SINT_SAT
:
2733 case ISD::FP_TO_UINT_SAT
:
2735 case ISD::CONCAT_VECTORS
:
2736 return performVectorTruncZeroCombine(N
, DCI
);
2738 return performTruncateCombine(N
, DCI
);