1 //=== WebAssemblyLowerEmscriptenEHSjLj.cpp - Lower exceptions for Emscripten =//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 /// This file lowers exception-related instructions and setjmp/longjmp
11 /// function calls in order to use Emscripten's JavaScript try and catch
14 /// To handle exceptions and setjmp/longjmps, this scheme relies on JavaScript's
15 /// try and catch syntax and relevant exception-related libraries implemented
16 /// in JavaScript glue code that will be produced by Emscripten.
18 /// * Exception handling
19 /// This pass lowers invokes and landingpads into library functions in JS glue
20 /// code. Invokes are lowered into function wrappers called invoke wrappers that
21 /// exist in JS side, which wraps the original function call with JS try-catch.
22 /// If an exception occurred, cxa_throw() function in JS side sets some
23 /// variables (see below) so we can check whether an exception occurred from
24 /// wasm code and handle it appropriately.
26 /// * Setjmp-longjmp handling
27 /// This pass lowers setjmp to a reasonably-performant approach for emscripten.
28 /// The idea is that each block with a setjmp is broken up into two parts: the
29 /// part containing setjmp and the part right after the setjmp. The latter part
30 /// is either reached from the setjmp, or later from a longjmp. To handle the
31 /// longjmp, all calls that might longjmp are also called using invoke wrappers
32 /// and thus JS / try-catch. JS longjmp() function also sets some variables so
33 /// we can check / whether a longjmp occurred from wasm code. Each block with a
34 /// function call that might longjmp is also split up after the longjmp call.
35 /// After the longjmp call, we check whether a longjmp occurred, and if it did,
36 /// which setjmp it corresponds to, and jump to the right post-setjmp block.
37 /// We assume setjmp-longjmp handling always run after EH handling, which means
38 /// we don't expect any exception-related instructions when SjLj runs.
39 /// FIXME Currently this scheme does not support indirect call of setjmp,
40 /// because of the limitation of the scheme itself. fastcomp does not support it
43 /// In detail, this pass does following things:
45 /// 1) Assumes the existence of global variables: __THREW__, __threwValue
46 /// __THREW__ and __threwValue are defined in compiler-rt in Emscripten.
47 /// These variables are used for both exceptions and setjmp/longjmps.
48 /// __THREW__ indicates whether an exception or a longjmp occurred or not. 0
49 /// means nothing occurred, 1 means an exception occurred, and other numbers
50 /// mean a longjmp occurred. In the case of longjmp, __THREW__ variable
51 /// indicates the corresponding setjmp buffer the longjmp corresponds to.
52 /// __threwValue is 0 for exceptions, and the argument to longjmp in case of
55 /// * Exception handling
57 /// 2) We assume the existence of setThrew and setTempRet0/getTempRet0 functions
58 /// at link time. setThrew exists in Emscripten's compiler-rt:
60 /// void setThrew(uintptr_t threw, int value) {
61 /// if (__THREW__ == 0) {
62 /// __THREW__ = threw;
63 /// __threwValue = value;
67 /// setTempRet0 is called from __cxa_find_matching_catch() in JS glue code.
68 /// In exception handling, getTempRet0 indicates the type of an exception
69 /// caught, and in setjmp/longjmp, it means the second argument to longjmp
73 /// invoke @func(arg1, arg2) to label %invoke.cont unwind label %lpad
76 /// call @__invoke_SIG(func, arg1, arg2)
77 /// %__THREW__.val = __THREW__;
79 /// if (%__THREW__.val == 1)
83 /// SIG is a mangled string generated based on the LLVM IR-level function
84 /// signature. After LLVM IR types are lowered to the target wasm types,
85 /// the names for these wrappers will change based on wasm types as well,
86 /// as in invoke_vi (function takes an int and returns void). The bodies of
87 /// these wrappers will be generated in JS glue code, and inside those
88 /// wrappers we use JS try-catch to generate actual exception effects. It
89 /// also calls the original callee function. An example wrapper in JS code
90 /// would look like this:
91 /// function invoke_vi(index,a1) {
93 /// Module["dynCall_vi"](index,a1); // This calls original callee
95 /// if (typeof e !== 'number' && e !== 'longjmp') throw e;
96 /// _setThrew(1, 0); // setThrew is called here
99 /// If an exception is thrown, __THREW__ will be set to true in a wrapper,
100 /// so we can jump to the right BB based on this value.
103 /// %val = landingpad catch c1 catch c2 catch c3 ...
106 /// %fmc = call @__cxa_find_matching_catch_N(c1, c2, c3, ...)
107 /// %val = {%fmc, getTempRet0()}
109 /// Here N is a number calculated based on the number of clauses.
110 /// setTempRet0 is called from __cxa_find_matching_catch() in JS glue code.
115 /// call @__resumeException(%a)
116 /// where __resumeException() is a function in JS glue code.
119 /// call @llvm.eh.typeid.for(type) (intrinsic)
121 /// call @llvm_eh_typeid_for(type)
122 /// llvm_eh_typeid_for function will be generated in JS glue code.
124 /// * Setjmp / Longjmp handling
126 /// In case calls to longjmp() exists
129 /// longjmp(buf, value)
131 /// emscripten_longjmp(buf, value)
133 /// In case calls to setjmp() exists
135 /// 2) In the function entry that calls setjmp, initialize setjmpTable and
136 /// sejmpTableSize as follows:
137 /// setjmpTableSize = 4;
138 /// setjmpTable = (int *) malloc(40);
139 /// setjmpTable[0] = 0;
140 /// setjmpTable and setjmpTableSize are used to call saveSetjmp() function in
141 /// Emscripten compiler-rt.
146 /// setjmpTable = saveSetjmp(buf, label, setjmpTable, setjmpTableSize);
147 /// setjmpTableSize = getTempRet0();
148 /// For each dynamic setjmp call, setjmpTable stores its ID (a number which
149 /// is incrementally assigned from 0) and its label (a unique number that
150 /// represents each callsite of setjmp). When we need more entries in
151 /// setjmpTable, it is reallocated in saveSetjmp() in Emscripten's
152 /// compiler-rt and it will return the new table address, and assign the new
153 /// table size in setTempRet0(). saveSetjmp also stores the setjmp's ID into
154 /// the buffer buf. A BB with setjmp is split into two after setjmp call in
155 /// order to make the post-setjmp BB the possible destination of longjmp BB.
158 /// 4) Lower every call that might longjmp into
160 /// call @__invoke_SIG(func, arg1, arg2)
161 /// %__THREW__.val = __THREW__;
163 /// %__threwValue.val = __threwValue;
164 /// if (%__THREW__.val != 0 & %__threwValue.val != 0) {
165 /// %label = testSetjmp(mem[%__THREW__.val], setjmpTable,
166 /// setjmpTableSize);
168 /// emscripten_longjmp(%__THREW__.val, %__threwValue.val);
169 /// setTempRet0(%__threwValue.val);
173 /// longjmp_result = getTempRet0();
175 /// label 1: goto post-setjmp BB 1
176 /// label 2: goto post-setjmp BB 2
178 /// default: goto splitted next BB
180 /// testSetjmp examines setjmpTable to see if there is a matching setjmp
181 /// call. After calling an invoke wrapper, if a longjmp occurred, __THREW__
182 /// will be the address of matching jmp_buf buffer and __threwValue be the
183 /// second argument to longjmp. mem[%__THREW__.val] is a setjmp ID that is
184 /// stored in saveSetjmp. testSetjmp returns a setjmp label, a unique ID to
185 /// each setjmp callsite. Label 0 means this longjmp buffer does not
186 /// correspond to one of the setjmp callsites in this function, so in this
187 /// case we just chain the longjmp to the caller. Label -1 means no longjmp
188 /// occurred. Otherwise we jump to the right post-setjmp BB based on the
191 ///===----------------------------------------------------------------------===//
193 #include "WebAssembly.h"
194 #include "WebAssemblyTargetMachine.h"
195 #include "llvm/ADT/StringExtras.h"
196 #include "llvm/CodeGen/TargetPassConfig.h"
197 #include "llvm/IR/DebugInfoMetadata.h"
198 #include "llvm/IR/Dominators.h"
199 #include "llvm/IR/IRBuilder.h"
200 #include "llvm/Support/CommandLine.h"
201 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
202 #include "llvm/Transforms/Utils/SSAUpdater.h"
204 using namespace llvm
;
206 #define DEBUG_TYPE "wasm-lower-em-ehsjlj"
208 static cl::list
<std::string
>
209 EHAllowlist("emscripten-cxx-exceptions-allowed",
210 cl::desc("The list of function names in which Emscripten-style "
211 "exception handling is enabled (see emscripten "
212 "EMSCRIPTEN_CATCHING_ALLOWED options)"),
216 class WebAssemblyLowerEmscriptenEHSjLj final
: public ModulePass
{
217 bool EnableEmEH
; // Enable Emscripten exception handling
218 bool EnableEmSjLj
; // Enable Emscripten setjmp/longjmp handling
219 bool DoSjLj
; // Whether we actually perform setjmp/longjmp handling
221 GlobalVariable
*ThrewGV
= nullptr; // __THREW__ (Emscripten)
222 GlobalVariable
*ThrewValueGV
= nullptr; // __threwValue (Emscripten)
223 Function
*GetTempRet0F
= nullptr; // getTempRet0() (Emscripten)
224 Function
*SetTempRet0F
= nullptr; // setTempRet0() (Emscripten)
225 Function
*ResumeF
= nullptr; // __resumeException() (Emscripten)
226 Function
*EHTypeIDF
= nullptr; // llvm.eh.typeid.for() (intrinsic)
227 Function
*EmLongjmpF
= nullptr; // emscripten_longjmp() (Emscripten)
228 Function
*SaveSetjmpF
= nullptr; // saveSetjmp() (Emscripten)
229 Function
*TestSetjmpF
= nullptr; // testSetjmp() (Emscripten)
231 // __cxa_find_matching_catch_N functions.
232 // Indexed by the number of clauses in an original landingpad instruction.
233 DenseMap
<int, Function
*> FindMatchingCatches
;
234 // Map of <function signature string, invoke_ wrappers>
235 StringMap
<Function
*> InvokeWrappers
;
236 // Set of allowed function names for exception handling
237 std::set
<std::string
> EHAllowlistSet
;
238 // Functions that contains calls to setjmp
239 SmallPtrSet
<Function
*, 8> SetjmpUsers
;
241 StringRef
getPassName() const override
{
242 return "WebAssembly Lower Emscripten Exceptions";
245 bool runEHOnFunction(Function
&F
);
246 bool runSjLjOnFunction(Function
&F
);
247 Function
*getFindMatchingCatch(Module
&M
, unsigned NumClauses
);
249 Value
*wrapInvoke(CallBase
*CI
);
250 void wrapTestSetjmp(BasicBlock
*BB
, DebugLoc DL
, Value
*Threw
,
251 Value
*SetjmpTable
, Value
*SetjmpTableSize
, Value
*&Label
,
252 Value
*&LongjmpResult
, BasicBlock
*&EndBB
);
253 Function
*getInvokeWrapper(CallBase
*CI
);
255 bool areAllExceptionsAllowed() const { return EHAllowlistSet
.empty(); }
256 bool supportsException(const Function
*F
) const {
257 return EnableEmEH
&& (areAllExceptionsAllowed() ||
258 EHAllowlistSet
.count(std::string(F
->getName())));
261 void rebuildSSA(Function
&F
);
266 WebAssemblyLowerEmscriptenEHSjLj(bool EnableEmEH
= true,
267 bool EnableEmSjLj
= true)
268 : ModulePass(ID
), EnableEmEH(EnableEmEH
), EnableEmSjLj(EnableEmSjLj
) {
269 EHAllowlistSet
.insert(EHAllowlist
.begin(), EHAllowlist
.end());
271 bool runOnModule(Module
&M
) override
;
273 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
274 AU
.addRequired
<DominatorTreeWrapperPass
>();
277 } // End anonymous namespace
279 char WebAssemblyLowerEmscriptenEHSjLj::ID
= 0;
280 INITIALIZE_PASS(WebAssemblyLowerEmscriptenEHSjLj
, DEBUG_TYPE
,
281 "WebAssembly Lower Emscripten Exceptions / Setjmp / Longjmp",
284 ModulePass
*llvm::createWebAssemblyLowerEmscriptenEHSjLj(bool EnableEmEH
,
286 return new WebAssemblyLowerEmscriptenEHSjLj(EnableEmEH
, EnableEmSjLj
);
289 static bool canThrow(const Value
*V
) {
290 if (const auto *F
= dyn_cast
<const Function
>(V
)) {
291 // Intrinsics cannot throw
292 if (F
->isIntrinsic())
294 StringRef Name
= F
->getName();
295 // leave setjmp and longjmp (mostly) alone, we process them properly later
296 if (Name
== "setjmp" || Name
== "longjmp" || Name
== "emscripten_longjmp")
298 return !F
->doesNotThrow();
300 // not a function, so an indirect call - can throw, we can't tell
304 // Get a global variable with the given name. If it doesn't exist declare it,
305 // which will generate an import and assume that it will exist at link time.
306 static GlobalVariable
*getGlobalVariable(Module
&M
, Type
*Ty
,
307 WebAssemblyTargetMachine
&TM
,
309 auto *GV
= dyn_cast
<GlobalVariable
>(M
.getOrInsertGlobal(Name
, Ty
));
311 report_fatal_error(Twine("unable to create global: ") + Name
);
313 // If the target supports TLS, make this variable thread-local. We can't just
314 // unconditionally make it thread-local and depend on
315 // CoalesceFeaturesAndStripAtomics to downgrade it, because stripping TLS has
316 // the side effect of disallowing the object from being linked into a
317 // shared-memory module, which we don't want to be responsible for.
318 auto *Subtarget
= TM
.getSubtargetImpl();
319 auto TLS
= Subtarget
->hasAtomics() && Subtarget
->hasBulkMemory()
320 ? GlobalValue::LocalExecTLSModel
321 : GlobalValue::NotThreadLocal
;
322 GV
->setThreadLocalMode(TLS
);
326 // Simple function name mangler.
327 // This function simply takes LLVM's string representation of parameter types
328 // and concatenate them with '_'. There are non-alphanumeric characters but llc
329 // is ok with it, and we need to postprocess these names after the lowering
331 static std::string
getSignature(FunctionType
*FTy
) {
333 raw_string_ostream
OS(Sig
);
334 OS
<< *FTy
->getReturnType();
335 for (Type
*ParamTy
: FTy
->params())
336 OS
<< "_" << *ParamTy
;
340 erase_if(Sig
, isSpace
);
341 // When s2wasm parses .s file, a comma means the end of an argument. So a
342 // mangled function name can contain any character but a comma.
343 std::replace(Sig
.begin(), Sig
.end(), ',', '.');
347 static Function
*getEmscriptenFunction(FunctionType
*Ty
, const Twine
&Name
,
349 Function
* F
= Function::Create(Ty
, GlobalValue::ExternalLinkage
, Name
, M
);
350 // Tell the linker that this function is expected to be imported from the
352 if (!F
->hasFnAttribute("wasm-import-module")) {
354 B
.addAttribute("wasm-import-module", "env");
357 if (!F
->hasFnAttribute("wasm-import-name")) {
359 B
.addAttribute("wasm-import-name", F
->getName());
365 // Returns an integer type for the target architecture's address space.
366 // i32 for wasm32 and i64 for wasm64.
367 static Type
*getAddrIntType(Module
*M
) {
368 IRBuilder
<> IRB(M
->getContext());
369 return IRB
.getIntNTy(M
->getDataLayout().getPointerSizeInBits());
372 // Returns an integer pointer type for the target architecture's address space.
373 // i32* for wasm32 and i64* for wasm64.
374 static Type
*getAddrPtrType(Module
*M
) {
375 return Type::getIntNPtrTy(M
->getContext(),
376 M
->getDataLayout().getPointerSizeInBits());
379 // Returns an integer whose type is the integer type for the target's address
380 // space. Returns (i32 C) for wasm32 and (i64 C) for wasm64, when C is the
382 static Value
*getAddrSizeInt(Module
*M
, uint64_t C
) {
383 IRBuilder
<> IRB(M
->getContext());
384 return IRB
.getIntN(M
->getDataLayout().getPointerSizeInBits(), C
);
387 // Returns __cxa_find_matching_catch_N function, where N = NumClauses + 2.
388 // This is because a landingpad instruction contains two more arguments, a
389 // personality function and a cleanup bit, and __cxa_find_matching_catch_N
390 // functions are named after the number of arguments in the original landingpad
393 WebAssemblyLowerEmscriptenEHSjLj::getFindMatchingCatch(Module
&M
,
394 unsigned NumClauses
) {
395 if (FindMatchingCatches
.count(NumClauses
))
396 return FindMatchingCatches
[NumClauses
];
397 PointerType
*Int8PtrTy
= Type::getInt8PtrTy(M
.getContext());
398 SmallVector
<Type
*, 16> Args(NumClauses
, Int8PtrTy
);
399 FunctionType
*FTy
= FunctionType::get(Int8PtrTy
, Args
, false);
400 Function
*F
= getEmscriptenFunction(
401 FTy
, "__cxa_find_matching_catch_" + Twine(NumClauses
+ 2), &M
);
402 FindMatchingCatches
[NumClauses
] = F
;
406 // Generate invoke wrapper seqence with preamble and postamble
410 // %__THREW__.val = __THREW__; __THREW__ = 0;
411 // Returns %__THREW__.val, which indicates whether an exception is thrown (or
412 // whether longjmp occurred), for future use.
413 Value
*WebAssemblyLowerEmscriptenEHSjLj::wrapInvoke(CallBase
*CI
) {
414 Module
*M
= CI
->getModule();
415 LLVMContext
&C
= M
->getContext();
417 // If we are calling a function that is noreturn, we must remove that
418 // attribute. The code we insert here does expect it to return, after we
419 // catch the exception.
420 if (CI
->doesNotReturn()) {
421 if (auto *F
= CI
->getCalledFunction())
422 F
->removeFnAttr(Attribute::NoReturn
);
423 CI
->removeFnAttr(Attribute::NoReturn
);
427 IRB
.SetInsertPoint(CI
);
431 IRB
.CreateStore(getAddrSizeInt(M
, 0), ThrewGV
);
433 // Invoke function wrapper in JavaScript
434 SmallVector
<Value
*, 16> Args
;
435 // Put the pointer to the callee as first argument, so it can be called
436 // within the invoke wrapper later
437 Args
.push_back(CI
->getCalledOperand());
438 Args
.append(CI
->arg_begin(), CI
->arg_end());
439 CallInst
*NewCall
= IRB
.CreateCall(getInvokeWrapper(CI
), Args
);
440 NewCall
->takeName(CI
);
441 NewCall
->setCallingConv(CallingConv::WASM_EmscriptenInvoke
);
442 NewCall
->setDebugLoc(CI
->getDebugLoc());
444 // Because we added the pointer to the callee as first argument, all
445 // argument attribute indices have to be incremented by one.
446 SmallVector
<AttributeSet
, 8> ArgAttributes
;
447 const AttributeList
&InvokeAL
= CI
->getAttributes();
449 // No attributes for the callee pointer.
450 ArgAttributes
.push_back(AttributeSet());
451 // Copy the argument attributes from the original
452 for (unsigned I
= 0, E
= CI
->getNumArgOperands(); I
< E
; ++I
)
453 ArgAttributes
.push_back(InvokeAL
.getParamAttrs(I
));
455 AttrBuilder
FnAttrs(InvokeAL
.getFnAttrs());
456 if (FnAttrs
.contains(Attribute::AllocSize
)) {
457 // The allocsize attribute (if any) referes to parameters by index and needs
460 Optional
<unsigned> NEltArg
;
461 std::tie(SizeArg
, NEltArg
) = FnAttrs
.getAllocSizeArgs();
463 if (NEltArg
.hasValue())
464 NEltArg
= NEltArg
.getValue() + 1;
465 FnAttrs
.addAllocSizeAttr(SizeArg
, NEltArg
);
468 // Reconstruct the AttributesList based on the vector we constructed.
469 AttributeList NewCallAL
= AttributeList::get(
470 C
, AttributeSet::get(C
, FnAttrs
), InvokeAL
.getRetAttrs(), ArgAttributes
);
471 NewCall
->setAttributes(NewCallAL
);
473 CI
->replaceAllUsesWith(NewCall
);
476 // %__THREW__.val = __THREW__; __THREW__ = 0;
478 IRB
.CreateLoad(getAddrIntType(M
), ThrewGV
, ThrewGV
->getName() + ".val");
479 IRB
.CreateStore(getAddrSizeInt(M
, 0), ThrewGV
);
483 // Get matching invoke wrapper based on callee signature
484 Function
*WebAssemblyLowerEmscriptenEHSjLj::getInvokeWrapper(CallBase
*CI
) {
485 Module
*M
= CI
->getModule();
486 SmallVector
<Type
*, 16> ArgTys
;
487 FunctionType
*CalleeFTy
= CI
->getFunctionType();
489 std::string Sig
= getSignature(CalleeFTy
);
490 if (InvokeWrappers
.find(Sig
) != InvokeWrappers
.end())
491 return InvokeWrappers
[Sig
];
493 // Put the pointer to the callee as first argument
494 ArgTys
.push_back(PointerType::getUnqual(CalleeFTy
));
495 // Add argument types
496 ArgTys
.append(CalleeFTy
->param_begin(), CalleeFTy
->param_end());
498 FunctionType
*FTy
= FunctionType::get(CalleeFTy
->getReturnType(), ArgTys
,
499 CalleeFTy
->isVarArg());
500 Function
*F
= getEmscriptenFunction(FTy
, "__invoke_" + Sig
, M
);
501 InvokeWrappers
[Sig
] = F
;
505 static bool canLongjmp(const Value
*Callee
) {
506 if (auto *CalleeF
= dyn_cast
<Function
>(Callee
))
507 if (CalleeF
->isIntrinsic())
510 // Attempting to transform inline assembly will result in something like:
511 // call void @__invoke_void(void ()* asm ...)
512 // which is invalid because inline assembly blocks do not have addresses
513 // and can't be passed by pointer. The result is a crash with illegal IR.
514 if (isa
<InlineAsm
>(Callee
))
516 StringRef CalleeName
= Callee
->getName();
518 // The reason we include malloc/free here is to exclude the malloc/free
519 // calls generated in setjmp prep / cleanup routines.
520 if (CalleeName
== "setjmp" || CalleeName
== "malloc" || CalleeName
== "free")
523 // There are functions in Emscripten's JS glue code or compiler-rt
524 if (CalleeName
== "__resumeException" || CalleeName
== "llvm_eh_typeid_for" ||
525 CalleeName
== "saveSetjmp" || CalleeName
== "testSetjmp" ||
526 CalleeName
== "getTempRet0" || CalleeName
== "setTempRet0")
529 // __cxa_find_matching_catch_N functions cannot longjmp
530 if (Callee
->getName().startswith("__cxa_find_matching_catch_"))
533 // Exception-catching related functions
534 if (CalleeName
== "__cxa_begin_catch" || CalleeName
== "__cxa_end_catch" ||
535 CalleeName
== "__cxa_allocate_exception" || CalleeName
== "__cxa_throw" ||
536 CalleeName
== "__clang_call_terminate")
539 // Otherwise we don't know
543 static bool isEmAsmCall(const Value
*Callee
) {
544 StringRef CalleeName
= Callee
->getName();
545 // This is an exhaustive list from Emscripten's <emscripten/em_asm.h>.
546 return CalleeName
== "emscripten_asm_const_int" ||
547 CalleeName
== "emscripten_asm_const_double" ||
548 CalleeName
== "emscripten_asm_const_int_sync_on_main_thread" ||
549 CalleeName
== "emscripten_asm_const_double_sync_on_main_thread" ||
550 CalleeName
== "emscripten_asm_const_async_on_main_thread";
553 // Generate testSetjmp function call seqence with preamble and postamble.
554 // The code this generates is equivalent to the following JavaScript code:
555 // %__threwValue.val = __threwValue;
556 // if (%__THREW__.val != 0 & %__threwValue.val != 0) {
557 // %label = testSetjmp(mem[%__THREW__.val], setjmpTable, setjmpTableSize);
559 // emscripten_longjmp(%__THREW__.val, %__threwValue.val);
560 // setTempRet0(%__threwValue.val);
564 // %longjmp_result = getTempRet0();
566 // As output parameters. returns %label, %longjmp_result, and the BB the last
567 // instruction (%longjmp_result = ...) is in.
568 void WebAssemblyLowerEmscriptenEHSjLj::wrapTestSetjmp(
569 BasicBlock
*BB
, DebugLoc DL
, Value
*Threw
, Value
*SetjmpTable
,
570 Value
*SetjmpTableSize
, Value
*&Label
, Value
*&LongjmpResult
,
571 BasicBlock
*&EndBB
) {
572 Function
*F
= BB
->getParent();
573 Module
*M
= F
->getParent();
574 LLVMContext
&C
= M
->getContext();
576 IRB
.SetCurrentDebugLocation(DL
);
578 // if (%__THREW__.val != 0 & %__threwValue.val != 0)
579 IRB
.SetInsertPoint(BB
);
580 BasicBlock
*ThenBB1
= BasicBlock::Create(C
, "if.then1", F
);
581 BasicBlock
*ElseBB1
= BasicBlock::Create(C
, "if.else1", F
);
582 BasicBlock
*EndBB1
= BasicBlock::Create(C
, "if.end", F
);
583 Value
*ThrewCmp
= IRB
.CreateICmpNE(Threw
, getAddrSizeInt(M
, 0));
584 Value
*ThrewValue
= IRB
.CreateLoad(IRB
.getInt32Ty(), ThrewValueGV
,
585 ThrewValueGV
->getName() + ".val");
586 Value
*ThrewValueCmp
= IRB
.CreateICmpNE(ThrewValue
, IRB
.getInt32(0));
587 Value
*Cmp1
= IRB
.CreateAnd(ThrewCmp
, ThrewValueCmp
, "cmp1");
588 IRB
.CreateCondBr(Cmp1
, ThenBB1
, ElseBB1
);
590 // %label = testSetjmp(mem[%__THREW__.val], setjmpTable, setjmpTableSize);
592 IRB
.SetInsertPoint(ThenBB1
);
593 BasicBlock
*ThenBB2
= BasicBlock::Create(C
, "if.then2", F
);
594 BasicBlock
*EndBB2
= BasicBlock::Create(C
, "if.end2", F
);
596 IRB
.CreateIntToPtr(Threw
, getAddrPtrType(M
), Threw
->getName() + ".p");
597 Value
*LoadedThrew
= IRB
.CreateLoad(getAddrIntType(M
), ThrewPtr
,
598 ThrewPtr
->getName() + ".loaded");
599 Value
*ThenLabel
= IRB
.CreateCall(
600 TestSetjmpF
, {LoadedThrew
, SetjmpTable
, SetjmpTableSize
}, "label");
601 Value
*Cmp2
= IRB
.CreateICmpEQ(ThenLabel
, IRB
.getInt32(0));
602 IRB
.CreateCondBr(Cmp2
, ThenBB2
, EndBB2
);
604 // emscripten_longjmp(%__THREW__.val, %__threwValue.val);
605 IRB
.SetInsertPoint(ThenBB2
);
606 IRB
.CreateCall(EmLongjmpF
, {Threw
, ThrewValue
});
607 IRB
.CreateUnreachable();
609 // setTempRet0(%__threwValue.val);
610 IRB
.SetInsertPoint(EndBB2
);
611 IRB
.CreateCall(SetTempRet0F
, ThrewValue
);
612 IRB
.CreateBr(EndBB1
);
614 IRB
.SetInsertPoint(ElseBB1
);
615 IRB
.CreateBr(EndBB1
);
617 // longjmp_result = getTempRet0();
618 IRB
.SetInsertPoint(EndBB1
);
619 PHINode
*LabelPHI
= IRB
.CreatePHI(IRB
.getInt32Ty(), 2, "label");
620 LabelPHI
->addIncoming(ThenLabel
, EndBB2
);
622 LabelPHI
->addIncoming(IRB
.getInt32(-1), ElseBB1
);
624 // Output parameter assignment
627 LongjmpResult
= IRB
.CreateCall(GetTempRet0F
, None
, "longjmp_result");
630 void WebAssemblyLowerEmscriptenEHSjLj::rebuildSSA(Function
&F
) {
631 DominatorTree
&DT
= getAnalysis
<DominatorTreeWrapperPass
>(F
).getDomTree();
632 DT
.recalculate(F
); // CFG has been changed
634 for (BasicBlock
&BB
: F
) {
635 for (Instruction
&I
: BB
) {
636 SSA
.Initialize(I
.getType(), I
.getName());
637 SSA
.AddAvailableValue(&BB
, &I
);
638 for (auto UI
= I
.use_begin(), UE
= I
.use_end(); UI
!= UE
;) {
641 auto *User
= cast
<Instruction
>(U
.getUser());
642 if (auto *UserPN
= dyn_cast
<PHINode
>(User
))
643 if (UserPN
->getIncomingBlock(U
) == &BB
)
646 if (DT
.dominates(&I
, User
))
648 SSA
.RewriteUseAfterInsertions(U
);
654 // Replace uses of longjmp with emscripten_longjmp. emscripten_longjmp takes
655 // arguments of type {i32, i32} (wasm32) / {i64, i32} (wasm64) and longjmp takes
656 // {jmp_buf*, i32}, so we need a ptrtoint instruction here to make the type
657 // match. jmp_buf* will eventually be lowered to i32/i64 in the wasm backend.
658 static void replaceLongjmpWithEmscriptenLongjmp(Function
*LongjmpF
,
659 Function
*EmLongjmpF
) {
660 Module
*M
= LongjmpF
->getParent();
661 SmallVector
<CallInst
*, 8> ToErase
;
662 LLVMContext
&C
= LongjmpF
->getParent()->getContext();
665 // For calls to longjmp, replace it with emscripten_longjmp and cast its first
666 // argument (jmp_buf*) to int
667 for (User
*U
: LongjmpF
->users()) {
668 auto *CI
= dyn_cast
<CallInst
>(U
);
669 if (CI
&& CI
->getCalledFunction() == LongjmpF
) {
670 IRB
.SetInsertPoint(CI
);
672 IRB
.CreatePtrToInt(CI
->getArgOperand(0), getAddrIntType(M
), "jmpbuf");
673 IRB
.CreateCall(EmLongjmpF
, {JmpBuf
, CI
->getArgOperand(1)});
674 ToErase
.push_back(CI
);
677 for (auto *I
: ToErase
)
678 I
->eraseFromParent();
680 // If we have any remaining uses of longjmp's function pointer, replace it
681 // with (int(*)(jmp_buf*, int))emscripten_longjmp.
682 if (!LongjmpF
->uses().empty()) {
684 IRB
.CreateBitCast(EmLongjmpF
, LongjmpF
->getType(), "em_longjmp");
685 LongjmpF
->replaceAllUsesWith(EmLongjmp
);
689 static bool containsLongjmpableCalls(const Function
*F
) {
690 for (const auto &BB
: *F
)
691 for (const auto &I
: BB
)
692 if (const auto *CB
= dyn_cast
<CallBase
>(&I
))
693 if (canLongjmp(CB
->getCalledOperand()))
698 bool WebAssemblyLowerEmscriptenEHSjLj::runOnModule(Module
&M
) {
699 LLVM_DEBUG(dbgs() << "********** Lower Emscripten EH & SjLj **********\n");
701 LLVMContext
&C
= M
.getContext();
704 Function
*SetjmpF
= M
.getFunction("setjmp");
705 Function
*LongjmpF
= M
.getFunction("longjmp");
707 auto *TPC
= getAnalysisIfAvailable
<TargetPassConfig
>();
708 assert(TPC
&& "Expected a TargetPassConfig");
709 auto &TM
= TPC
->getTM
<WebAssemblyTargetMachine
>();
711 if (EnableEmEH
&& TM
.Options
.ExceptionModel
== ExceptionHandling::Wasm
)
712 report_fatal_error("-exception-model=wasm not allowed with "
713 "-enable-emscripten-cxx-exceptions");
715 // Declare (or get) global variables __THREW__, __threwValue, and
716 // getTempRet0/setTempRet0 function which are used in common for both
717 // exception handling and setjmp/longjmp handling
718 ThrewGV
= getGlobalVariable(M
, getAddrIntType(&M
), TM
, "__THREW__");
719 ThrewValueGV
= getGlobalVariable(M
, IRB
.getInt32Ty(), TM
, "__threwValue");
720 GetTempRet0F
= getEmscriptenFunction(
721 FunctionType::get(IRB
.getInt32Ty(), false), "getTempRet0", &M
);
722 SetTempRet0F
= getEmscriptenFunction(
723 FunctionType::get(IRB
.getVoidTy(), IRB
.getInt32Ty(), false),
725 GetTempRet0F
->setDoesNotThrow();
726 SetTempRet0F
->setDoesNotThrow();
728 bool Changed
= false;
730 // Function registration for exception handling
732 // Register __resumeException function
733 FunctionType
*ResumeFTy
=
734 FunctionType::get(IRB
.getVoidTy(), IRB
.getInt8PtrTy(), false);
735 ResumeF
= getEmscriptenFunction(ResumeFTy
, "__resumeException", &M
);
737 // Register llvm_eh_typeid_for function
738 FunctionType
*EHTypeIDTy
=
739 FunctionType::get(IRB
.getInt32Ty(), IRB
.getInt8PtrTy(), false);
740 EHTypeIDF
= getEmscriptenFunction(EHTypeIDTy
, "llvm_eh_typeid_for", &M
);
743 if (EnableEmSjLj
&& SetjmpF
) {
744 // Precompute setjmp users
745 for (User
*U
: SetjmpF
->users()) {
746 Function
*UserF
= cast
<Instruction
>(U
)->getFunction();
747 // If a function that calls setjmp does not contain any other calls that
748 // can longjmp, we don't need to do any transformation on that function,
750 if (containsLongjmpableCalls(UserF
))
751 SetjmpUsers
.insert(UserF
);
755 bool SetjmpUsed
= SetjmpF
&& !SetjmpUsers
.empty();
756 bool LongjmpUsed
= LongjmpF
&& !LongjmpF
->use_empty();
757 DoSjLj
= EnableEmSjLj
&& (SetjmpUsed
|| LongjmpUsed
);
759 // Function registration and data pre-gathering for setjmp/longjmp handling
761 // Register emscripten_longjmp function
762 FunctionType
*FTy
= FunctionType::get(
763 IRB
.getVoidTy(), {getAddrIntType(&M
), IRB
.getInt32Ty()}, false);
764 EmLongjmpF
= getEmscriptenFunction(FTy
, "emscripten_longjmp", &M
);
767 // Register saveSetjmp function
768 FunctionType
*SetjmpFTy
= SetjmpF
->getFunctionType();
769 FTy
= FunctionType::get(Type::getInt32PtrTy(C
),
770 {SetjmpFTy
->getParamType(0), IRB
.getInt32Ty(),
771 Type::getInt32PtrTy(C
), IRB
.getInt32Ty()},
773 SaveSetjmpF
= getEmscriptenFunction(FTy
, "saveSetjmp", &M
);
775 // Register testSetjmp function
776 FTy
= FunctionType::get(
778 {getAddrIntType(&M
), Type::getInt32PtrTy(C
), IRB
.getInt32Ty()},
780 TestSetjmpF
= getEmscriptenFunction(FTy
, "testSetjmp", &M
);
784 // Exception handling transformation
786 for (Function
&F
: M
) {
787 if (F
.isDeclaration())
789 Changed
|= runEHOnFunction(F
);
793 // Setjmp/longjmp handling transformation
795 Changed
= true; // We have setjmp or longjmp somewhere
797 replaceLongjmpWithEmscriptenLongjmp(LongjmpF
, EmLongjmpF
);
798 // Only traverse functions that uses setjmp in order not to insert
799 // unnecessary prep / cleanup code in every function
801 for (Function
*F
: SetjmpUsers
)
802 runSjLjOnFunction(*F
);
806 // Delete unused global variables and functions
808 ResumeF
->eraseFromParent();
810 EHTypeIDF
->eraseFromParent();
812 EmLongjmpF
->eraseFromParent();
814 SaveSetjmpF
->eraseFromParent();
816 TestSetjmpF
->eraseFromParent();
823 bool WebAssemblyLowerEmscriptenEHSjLj::runEHOnFunction(Function
&F
) {
824 Module
&M
= *F
.getParent();
825 LLVMContext
&C
= F
.getContext();
827 bool Changed
= false;
828 SmallVector
<Instruction
*, 64> ToErase
;
829 SmallPtrSet
<LandingPadInst
*, 32> LandingPads
;
831 for (BasicBlock
&BB
: F
) {
832 auto *II
= dyn_cast
<InvokeInst
>(BB
.getTerminator());
836 LandingPads
.insert(II
->getLandingPadInst());
837 IRB
.SetInsertPoint(II
);
839 const Value
*Callee
= II
->getCalledOperand();
840 bool NeedInvoke
= supportsException(&F
) && canThrow(Callee
);
842 // Wrap invoke with invoke wrapper and generate preamble/postamble
843 Value
*Threw
= wrapInvoke(II
);
844 ToErase
.push_back(II
);
846 // If setjmp/longjmp handling is enabled, the thrown value can be not an
847 // exception but a longjmp. If the current function contains calls to
848 // setjmp, it will be appropriately handled in runSjLjOnFunction. But even
849 // if the function does not contain setjmp calls, we shouldn't silently
850 // ignore longjmps; we should rethrow them so they can be correctly
851 // handled in somewhere up the call chain where setjmp is. __THREW__'s
852 // value is 0 when nothing happened, 1 when an exception is thrown, and
853 // other values when longjmp is thrown.
855 // if (%__THREW__.val == 0 || %__THREW__.val == 1)
858 // goto %longjmp.rethrow
860 // longjmp.rethrow: ;; This is longjmp. Rethrow it
861 // %__threwValue.val = __threwValue
862 // emscripten_longjmp(%__THREW__.val, %__threwValue.val);
864 // tail: ;; Nothing happened or an exception is thrown
865 // ... Continue exception handling ...
866 if (DoSjLj
&& !SetjmpUsers
.count(&F
) && canLongjmp(Callee
)) {
867 BasicBlock
*Tail
= BasicBlock::Create(C
, "tail", &F
);
868 BasicBlock
*RethrowBB
= BasicBlock::Create(C
, "longjmp.rethrow", &F
);
870 IRB
.CreateICmpEQ(Threw
, getAddrSizeInt(&M
, 1), "cmp.eq.one");
872 IRB
.CreateICmpEQ(Threw
, getAddrSizeInt(&M
, 0), "cmp.eq.zero");
873 Value
*Or
= IRB
.CreateOr(CmpEqZero
, CmpEqOne
, "or");
874 IRB
.CreateCondBr(Or
, Tail
, RethrowBB
);
875 IRB
.SetInsertPoint(RethrowBB
);
876 Value
*ThrewValue
= IRB
.CreateLoad(IRB
.getInt32Ty(), ThrewValueGV
,
877 ThrewValueGV
->getName() + ".val");
878 IRB
.CreateCall(EmLongjmpF
, {Threw
, ThrewValue
});
880 IRB
.CreateUnreachable();
881 IRB
.SetInsertPoint(Tail
);
884 // Insert a branch based on __THREW__ variable
885 Value
*Cmp
= IRB
.CreateICmpEQ(Threw
, getAddrSizeInt(&M
, 1), "cmp");
886 IRB
.CreateCondBr(Cmp
, II
->getUnwindDest(), II
->getNormalDest());
889 // This can't throw, and we don't need this invoke, just replace it with a
891 SmallVector
<Value
*, 16> Args(II
->args());
893 IRB
.CreateCall(II
->getFunctionType(), II
->getCalledOperand(), Args
);
894 NewCall
->takeName(II
);
895 NewCall
->setCallingConv(II
->getCallingConv());
896 NewCall
->setDebugLoc(II
->getDebugLoc());
897 NewCall
->setAttributes(II
->getAttributes());
898 II
->replaceAllUsesWith(NewCall
);
899 ToErase
.push_back(II
);
901 IRB
.CreateBr(II
->getNormalDest());
903 // Remove any PHI node entries from the exception destination
904 II
->getUnwindDest()->removePredecessor(&BB
);
908 // Process resume instructions
909 for (BasicBlock
&BB
: F
) {
910 // Scan the body of the basic block for resumes
911 for (Instruction
&I
: BB
) {
912 auto *RI
= dyn_cast
<ResumeInst
>(&I
);
917 // Split the input into legal values
918 Value
*Input
= RI
->getValue();
919 IRB
.SetInsertPoint(RI
);
920 Value
*Low
= IRB
.CreateExtractValue(Input
, 0, "low");
921 // Create a call to __resumeException function
922 IRB
.CreateCall(ResumeF
, {Low
});
923 // Add a terminator to the block
924 IRB
.CreateUnreachable();
925 ToErase
.push_back(RI
);
929 // Process llvm.eh.typeid.for intrinsics
930 for (BasicBlock
&BB
: F
) {
931 for (Instruction
&I
: BB
) {
932 auto *CI
= dyn_cast
<CallInst
>(&I
);
935 const Function
*Callee
= CI
->getCalledFunction();
938 if (Callee
->getIntrinsicID() != Intrinsic::eh_typeid_for
)
942 IRB
.SetInsertPoint(CI
);
944 IRB
.CreateCall(EHTypeIDF
, CI
->getArgOperand(0), "typeid");
945 CI
->replaceAllUsesWith(NewCI
);
946 ToErase
.push_back(CI
);
950 // Look for orphan landingpads, can occur in blocks with no predecessors
951 for (BasicBlock
&BB
: F
) {
952 Instruction
*I
= BB
.getFirstNonPHI();
953 if (auto *LPI
= dyn_cast
<LandingPadInst
>(I
))
954 LandingPads
.insert(LPI
);
956 Changed
|= !LandingPads
.empty();
958 // Handle all the landingpad for this function together, as multiple invokes
959 // may share a single lp
960 for (LandingPadInst
*LPI
: LandingPads
) {
961 IRB
.SetInsertPoint(LPI
);
962 SmallVector
<Value
*, 16> FMCArgs
;
963 for (unsigned I
= 0, E
= LPI
->getNumClauses(); I
< E
; ++I
) {
964 Constant
*Clause
= LPI
->getClause(I
);
965 // TODO Handle filters (= exception specifications).
966 // https://bugs.llvm.org/show_bug.cgi?id=50396
968 FMCArgs
.push_back(Clause
);
971 // Create a call to __cxa_find_matching_catch_N function
972 Function
*FMCF
= getFindMatchingCatch(M
, FMCArgs
.size());
973 CallInst
*FMCI
= IRB
.CreateCall(FMCF
, FMCArgs
, "fmc");
974 Value
*Undef
= UndefValue::get(LPI
->getType());
975 Value
*Pair0
= IRB
.CreateInsertValue(Undef
, FMCI
, 0, "pair0");
976 Value
*TempRet0
= IRB
.CreateCall(GetTempRet0F
, None
, "tempret0");
977 Value
*Pair1
= IRB
.CreateInsertValue(Pair0
, TempRet0
, 1, "pair1");
979 LPI
->replaceAllUsesWith(Pair1
);
980 ToErase
.push_back(LPI
);
983 // Erase everything we no longer need in this function
984 for (Instruction
*I
: ToErase
)
985 I
->eraseFromParent();
990 // This tries to get debug info from the instruction before which a new
991 // instruction will be inserted, and if there's no debug info in that
992 // instruction, tries to get the info instead from the previous instruction (if
993 // any). If none of these has debug info and a DISubprogram is provided, it
994 // creates a dummy debug info with the first line of the function, because IR
995 // verifier requires all inlinable callsites should have debug info when both a
996 // caller and callee have DISubprogram. If none of these conditions are met,
997 // returns empty info.
998 static DebugLoc
getOrCreateDebugLoc(const Instruction
*InsertBefore
,
1000 assert(InsertBefore
);
1001 if (InsertBefore
->getDebugLoc())
1002 return InsertBefore
->getDebugLoc();
1003 const Instruction
*Prev
= InsertBefore
->getPrevNode();
1004 if (Prev
&& Prev
->getDebugLoc())
1005 return Prev
->getDebugLoc();
1007 return DILocation::get(SP
->getContext(), SP
->getLine(), 1, SP
);
1011 bool WebAssemblyLowerEmscriptenEHSjLj::runSjLjOnFunction(Function
&F
) {
1012 Module
&M
= *F
.getParent();
1013 LLVMContext
&C
= F
.getContext();
1015 SmallVector
<Instruction
*, 64> ToErase
;
1016 // Vector of %setjmpTable values
1017 SmallVector
<Instruction
*, 4> SetjmpTableInsts
;
1018 // Vector of %setjmpTableSize values
1019 SmallVector
<Instruction
*, 4> SetjmpTableSizeInsts
;
1021 // Setjmp preparation
1023 // This instruction effectively means %setjmpTableSize = 4.
1024 // We create this as an instruction intentionally, and we don't want to fold
1025 // this instruction to a constant 4, because this value will be used in
1026 // SSAUpdater.AddAvailableValue(...) later.
1027 BasicBlock
*Entry
= &F
.getEntryBlock();
1028 DebugLoc FirstDL
= getOrCreateDebugLoc(&*Entry
->begin(), F
.getSubprogram());
1029 BinaryOperator
*SetjmpTableSize
=
1030 BinaryOperator::Create(Instruction::Add
, IRB
.getInt32(4), IRB
.getInt32(0),
1031 "setjmpTableSize", &*Entry
->getFirstInsertionPt());
1032 SetjmpTableSize
->setDebugLoc(FirstDL
);
1033 // setjmpTable = (int *) malloc(40);
1034 Instruction
*SetjmpTable
= CallInst::CreateMalloc(
1035 SetjmpTableSize
, IRB
.getInt32Ty(), IRB
.getInt32Ty(), IRB
.getInt32(40),
1036 nullptr, nullptr, "setjmpTable");
1037 SetjmpTable
->setDebugLoc(FirstDL
);
1038 // CallInst::CreateMalloc may return a bitcast instruction if the result types
1039 // mismatch. We need to set the debug loc for the original call too.
1040 auto *MallocCall
= SetjmpTable
->stripPointerCasts();
1041 if (auto *MallocCallI
= dyn_cast
<Instruction
>(MallocCall
)) {
1042 MallocCallI
->setDebugLoc(FirstDL
);
1044 // setjmpTable[0] = 0;
1045 IRB
.SetInsertPoint(SetjmpTableSize
);
1046 IRB
.CreateStore(IRB
.getInt32(0), SetjmpTable
);
1047 SetjmpTableInsts
.push_back(SetjmpTable
);
1048 SetjmpTableSizeInsts
.push_back(SetjmpTableSize
);
1050 // Setjmp transformation
1051 SmallVector
<PHINode
*, 4> SetjmpRetPHIs
;
1052 Function
*SetjmpF
= M
.getFunction("setjmp");
1053 for (User
*U
: SetjmpF
->users()) {
1054 auto *CI
= dyn_cast
<CallInst
>(U
);
1056 report_fatal_error("Does not support indirect calls to setjmp");
1058 BasicBlock
*BB
= CI
->getParent();
1059 if (BB
->getParent() != &F
) // in other function
1062 // The tail is everything right after the call, and will be reached once
1063 // when setjmp is called, and later when longjmp returns to the setjmp
1064 BasicBlock
*Tail
= SplitBlock(BB
, CI
->getNextNode());
1065 // Add a phi to the tail, which will be the output of setjmp, which
1066 // indicates if this is the first call or a longjmp back. The phi directly
1067 // uses the right value based on where we arrive from
1068 IRB
.SetInsertPoint(Tail
->getFirstNonPHI());
1069 PHINode
*SetjmpRet
= IRB
.CreatePHI(IRB
.getInt32Ty(), 2, "setjmp.ret");
1071 // setjmp initial call returns 0
1072 SetjmpRet
->addIncoming(IRB
.getInt32(0), BB
);
1073 // The proper output is now this, not the setjmp call itself
1074 CI
->replaceAllUsesWith(SetjmpRet
);
1075 // longjmp returns to the setjmp will add themselves to this phi
1076 SetjmpRetPHIs
.push_back(SetjmpRet
);
1079 // Our index in the function is our place in the array + 1 to avoid index
1080 // 0, because index 0 means the longjmp is not ours to handle.
1081 IRB
.SetInsertPoint(CI
);
1082 Value
*Args
[] = {CI
->getArgOperand(0), IRB
.getInt32(SetjmpRetPHIs
.size()),
1083 SetjmpTable
, SetjmpTableSize
};
1084 Instruction
*NewSetjmpTable
=
1085 IRB
.CreateCall(SaveSetjmpF
, Args
, "setjmpTable");
1086 Instruction
*NewSetjmpTableSize
=
1087 IRB
.CreateCall(GetTempRet0F
, None
, "setjmpTableSize");
1088 SetjmpTableInsts
.push_back(NewSetjmpTable
);
1089 SetjmpTableSizeInsts
.push_back(NewSetjmpTableSize
);
1090 ToErase
.push_back(CI
);
1093 // Update each call that can longjmp so it can return to a setjmp where
1096 // Because we are creating new BBs while processing and don't want to make
1097 // all these newly created BBs candidates again for longjmp processing, we
1098 // first make the vector of candidate BBs.
1099 std::vector
<BasicBlock
*> BBs
;
1100 for (BasicBlock
&BB
: F
)
1103 // BBs.size() will change within the loop, so we query it every time
1104 for (unsigned I
= 0; I
< BBs
.size(); I
++) {
1105 BasicBlock
*BB
= BBs
[I
];
1106 for (Instruction
&I
: *BB
) {
1107 if (isa
<InvokeInst
>(&I
))
1108 report_fatal_error("When using Wasm EH with Emscripten SjLj, there is "
1109 "a restriction that `setjmp` function call and "
1110 "exception cannot be used within the same function");
1111 auto *CI
= dyn_cast
<CallInst
>(&I
);
1115 const Value
*Callee
= CI
->getCalledOperand();
1116 if (!canLongjmp(Callee
))
1118 if (isEmAsmCall(Callee
))
1119 report_fatal_error("Cannot use EM_ASM* alongside setjmp/longjmp in " +
1121 ". Please consider using EM_JS, or move the "
1122 "EM_ASM into another function.",
1125 Value
*Threw
= nullptr;
1127 if (Callee
->getName().startswith("__invoke_")) {
1128 // If invoke wrapper has already been generated for this call in
1129 // previous EH phase, search for the load instruction
1130 // %__THREW__.val = __THREW__;
1131 // in postamble after the invoke wrapper call
1132 LoadInst
*ThrewLI
= nullptr;
1133 StoreInst
*ThrewResetSI
= nullptr;
1134 for (auto I
= std::next(BasicBlock::iterator(CI
)), IE
= BB
->end();
1136 if (auto *LI
= dyn_cast
<LoadInst
>(I
))
1137 if (auto *GV
= dyn_cast
<GlobalVariable
>(LI
->getPointerOperand()))
1138 if (GV
== ThrewGV
) {
1139 Threw
= ThrewLI
= LI
;
1143 // Search for the store instruction after the load above
1145 for (auto I
= std::next(BasicBlock::iterator(ThrewLI
)), IE
= BB
->end();
1147 if (auto *SI
= dyn_cast
<StoreInst
>(I
)) {
1148 if (auto *GV
= dyn_cast
<GlobalVariable
>(SI
->getPointerOperand())) {
1149 if (GV
== ThrewGV
&&
1150 SI
->getValueOperand() == getAddrSizeInt(&M
, 0)) {
1157 assert(Threw
&& ThrewLI
&& "Cannot find __THREW__ load after invoke");
1158 assert(ThrewResetSI
&& "Cannot find __THREW__ store after invoke");
1159 Tail
= SplitBlock(BB
, ThrewResetSI
->getNextNode());
1162 // Wrap call with invoke wrapper and generate preamble/postamble
1163 Threw
= wrapInvoke(CI
);
1164 ToErase
.push_back(CI
);
1165 Tail
= SplitBlock(BB
, CI
->getNextNode());
1167 // If exception handling is enabled, the thrown value can be not a
1168 // longjmp but an exception, in which case we shouldn't silently ignore
1169 // exceptions; we should rethrow them.
1170 // __THREW__'s value is 0 when nothing happened, 1 when an exception is
1171 // thrown, other values when longjmp is thrown.
1173 // if (%__THREW__.val == 1)
1178 // eh.rethrow: ;; Rethrow exception
1179 // %exn = call @__cxa_find_matching_catch_2() ;; Retrieve thrown ptr
1180 // __resumeException(%exn)
1183 // <-- Insertion point. Will insert sjlj handling code from here
1188 if (supportsException(&F
) && canThrow(Callee
)) {
1189 IRB
.SetInsertPoint(CI
);
1190 // We will add a new conditional branch. So remove the branch created
1191 // when we split the BB
1192 ToErase
.push_back(BB
->getTerminator());
1193 BasicBlock
*NormalBB
= BasicBlock::Create(C
, "normal", &F
);
1194 BasicBlock
*RethrowBB
= BasicBlock::Create(C
, "eh.rethrow", &F
);
1196 IRB
.CreateICmpEQ(Threw
, getAddrSizeInt(&M
, 1), "cmp.eq.one");
1197 IRB
.CreateCondBr(CmpEqOne
, RethrowBB
, NormalBB
);
1198 IRB
.SetInsertPoint(RethrowBB
);
1199 CallInst
*Exn
= IRB
.CreateCall(getFindMatchingCatch(M
, 0), {}, "exn");
1200 IRB
.CreateCall(ResumeF
, {Exn
});
1201 IRB
.CreateUnreachable();
1202 IRB
.SetInsertPoint(NormalBB
);
1204 BB
= NormalBB
; // New insertion point to insert testSetjmp()
1208 // We need to replace the terminator in Tail - SplitBlock makes BB go
1209 // straight to Tail, we need to check if a longjmp occurred, and go to the
1210 // right setjmp-tail if so
1211 ToErase
.push_back(BB
->getTerminator());
1213 // Generate a function call to testSetjmp function and preamble/postamble
1214 // code to figure out (1) whether longjmp occurred (2) if longjmp
1215 // occurred, which setjmp it corresponds to
1216 Value
*Label
= nullptr;
1217 Value
*LongjmpResult
= nullptr;
1218 BasicBlock
*EndBB
= nullptr;
1219 wrapTestSetjmp(BB
, CI
->getDebugLoc(), Threw
, SetjmpTable
, SetjmpTableSize
,
1220 Label
, LongjmpResult
, EndBB
);
1221 assert(Label
&& LongjmpResult
&& EndBB
);
1223 // Create switch instruction
1224 IRB
.SetInsertPoint(EndBB
);
1225 IRB
.SetCurrentDebugLocation(EndBB
->getInstList().back().getDebugLoc());
1226 SwitchInst
*SI
= IRB
.CreateSwitch(Label
, Tail
, SetjmpRetPHIs
.size());
1227 // -1 means no longjmp happened, continue normally (will hit the default
1228 // switch case). 0 means a longjmp that is not ours to handle, needs a
1229 // rethrow. Otherwise the index is the same as the index in P+1 (to avoid
1231 for (unsigned I
= 0; I
< SetjmpRetPHIs
.size(); I
++) {
1232 SI
->addCase(IRB
.getInt32(I
+ 1), SetjmpRetPHIs
[I
]->getParent());
1233 SetjmpRetPHIs
[I
]->addIncoming(LongjmpResult
, EndBB
);
1236 // We are splitting the block here, and must continue to find other calls
1237 // in the block - which is now split. so continue to traverse in the Tail
1238 BBs
.push_back(Tail
);
1242 // Erase everything we no longer need in this function
1243 for (Instruction
*I
: ToErase
)
1244 I
->eraseFromParent();
1246 // Free setjmpTable buffer before each return instruction + function-exiting
1248 SmallVector
<Instruction
*, 16> ExitingInsts
;
1249 for (BasicBlock
&BB
: F
) {
1250 Instruction
*TI
= BB
.getTerminator();
1251 if (isa
<ReturnInst
>(TI
))
1252 ExitingInsts
.push_back(TI
);
1253 for (auto &I
: BB
) {
1254 if (auto *CB
= dyn_cast
<CallBase
>(&I
)) {
1255 StringRef CalleeName
= CB
->getCalledOperand()->getName();
1256 if (CalleeName
== "__resumeException" ||
1257 CalleeName
== "emscripten_longjmp" || CalleeName
== "__cxa_throw")
1258 ExitingInsts
.push_back(&I
);
1262 for (auto *I
: ExitingInsts
) {
1263 DebugLoc DL
= getOrCreateDebugLoc(I
, F
.getSubprogram());
1264 auto *Free
= CallInst::CreateFree(SetjmpTable
, I
);
1265 Free
->setDebugLoc(DL
);
1266 // CallInst::CreateFree may create a bitcast instruction if its argument
1267 // types mismatch. We need to set the debug loc for the bitcast too.
1268 if (auto *FreeCallI
= dyn_cast
<CallInst
>(Free
)) {
1269 if (auto *BitCastI
= dyn_cast
<BitCastInst
>(FreeCallI
->getArgOperand(0)))
1270 BitCastI
->setDebugLoc(DL
);
1274 // Every call to saveSetjmp can change setjmpTable and setjmpTableSize
1275 // (when buffer reallocation occurs)
1277 // setjmpTableSize = 4;
1278 // setjmpTable = (int *) malloc(40);
1279 // setjmpTable[0] = 0;
1282 // setjmpTable = saveSetjmp(buf, label, setjmpTable, setjmpTableSize);
1283 // setjmpTableSize = getTempRet0();
1284 // So we need to make sure the SSA for these variables is valid so that every
1285 // saveSetjmp and testSetjmp calls have the correct arguments.
1286 SSAUpdater SetjmpTableSSA
;
1287 SSAUpdater SetjmpTableSizeSSA
;
1288 SetjmpTableSSA
.Initialize(Type::getInt32PtrTy(C
), "setjmpTable");
1289 SetjmpTableSizeSSA
.Initialize(Type::getInt32Ty(C
), "setjmpTableSize");
1290 for (Instruction
*I
: SetjmpTableInsts
)
1291 SetjmpTableSSA
.AddAvailableValue(I
->getParent(), I
);
1292 for (Instruction
*I
: SetjmpTableSizeInsts
)
1293 SetjmpTableSizeSSA
.AddAvailableValue(I
->getParent(), I
);
1295 for (auto UI
= SetjmpTable
->use_begin(), UE
= SetjmpTable
->use_end();
1297 // Grab the use before incrementing the iterator.
1299 // Increment the iterator before removing the use from the list.
1301 if (auto *I
= dyn_cast
<Instruction
>(U
.getUser()))
1302 if (I
->getParent() != Entry
)
1303 SetjmpTableSSA
.RewriteUse(U
);
1305 for (auto UI
= SetjmpTableSize
->use_begin(), UE
= SetjmpTableSize
->use_end();
1309 if (auto *I
= dyn_cast
<Instruction
>(U
.getUser()))
1310 if (I
->getParent() != Entry
)
1311 SetjmpTableSizeSSA
.RewriteUse(U
);
1314 // Finally, our modifications to the cfg can break dominance of SSA variables.
1315 // For example, in this code,
1316 // if (x()) { .. setjmp() .. }
1317 // if (y()) { .. longjmp() .. }
1318 // We must split the longjmp block, and it can jump into the block splitted
1319 // from setjmp one. But that means that when we split the setjmp block, it's
1320 // first part no longer dominates its second part - there is a theoretically
1321 // possible control flow path where x() is false, then y() is true and we
1322 // reach the second part of the setjmp block, without ever reaching the first
1323 // part. So, we rebuild SSA form here.