1 ; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -disable-wasm-explicit-locals -verify-machineinstrs | FileCheck %s
3 ; Test the register stackifier pass.
5 target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
6 target triple = "wasm32-unknown-unknown"
8 ; No because of pointer aliasing.
11 ; CHECK: return $1{{$}}
12 define i32 @no0(i32* %p, i32* %q) {
13 %t = load i32, i32* %q
18 ; No because of side effects.
21 ; CHECK: return $1{{$}}
22 define i32 @no1(i32* %p, i32* dereferenceable(4) %q) {
23 %t = load volatile i32, i32* %q, !invariant.load !0
24 store volatile i32 0, i32* %p
28 ; Yes because of invariant load and no side effects.
31 ; CHECK: return $pop{{[0-9]+}}{{$}}
32 define i32 @yes0(i32* %p, i32* dereferenceable(4) %q) {
33 %t = load i32, i32* %q, !invariant.load !0
38 ; Yes because of no intervening side effects.
41 ; CHECK: return $pop0{{$}}
42 define i32 @yes1(i32* %q) {
43 %t = load volatile i32, i32* %q
47 ; Yes because undefined behavior can be sunk past a store.
49 ; CHECK-LABEL: sink_trap:
50 ; CHECK: return $pop{{[0-9]+}}{{$}}
51 define i32 @sink_trap(i32 %x, i32 %y, i32* %p) {
53 store volatile i32 0, i32* %p
57 ; Yes because the call is readnone.
59 ; CHECK-LABEL: sink_readnone_call:
60 ; CHECK: return $pop0{{$}}
61 declare i32 @readnone_callee() readnone nounwind
62 define i32 @sink_readnone_call(i32 %x, i32 %y, i32* %p) {
63 %t = call i32 @readnone_callee()
64 store volatile i32 0, i32* %p
68 ; No because the call is readonly and there's an intervening store.
70 ; CHECK-LABEL: no_sink_readonly_call:
71 ; CHECK: return ${{[0-9]+}}{{$}}
72 declare i32 @readonly_callee() readonly nounwind
73 define i32 @no_sink_readonly_call(i32 %x, i32 %y, i32* %p) {
74 %t = call i32 @readonly_callee()
79 ; Don't schedule stack uses into the stack. To reduce register pressure, the
80 ; scheduler might be tempted to move the definition of $2 down. However, this
81 ; would risk getting incorrect liveness if the instructions are later
82 ; rearranged to make the stack contiguous.
84 ; CHECK-LABEL: stack_uses:
85 ; CHECK: .param i32, i32, i32, i32{{$}}
86 ; CHECK-NEXT: .result i32{{$}}
87 ; CHECK-NEXT: block {{$}}
88 ; CHECK-NEXT: i32.const $push[[L13:[0-9]+]]=, 1{{$}}
89 ; CHECK-NEXT: i32.lt_s $push[[L0:[0-9]+]]=, $0, $pop[[L13]]{{$}}
90 ; CHECK-NEXT: i32.const $push[[L1:[0-9]+]]=, 2{{$}}
91 ; CHECK-NEXT: i32.lt_s $push[[L2:[0-9]+]]=, $1, $pop[[L1]]{{$}}
92 ; CHECK-NEXT: i32.xor $push[[L5:[0-9]+]]=, $pop[[L0]], $pop[[L2]]{{$}}
93 ; CHECK-NEXT: i32.const $push[[L12:[0-9]+]]=, 1{{$}}
94 ; CHECK-NEXT: i32.lt_s $push[[L3:[0-9]+]]=, $2, $pop[[L12]]{{$}}
95 ; CHECK-NEXT: i32.const $push[[L11:[0-9]+]]=, 2{{$}}
96 ; CHECK-NEXT: i32.lt_s $push[[L4:[0-9]+]]=, $3, $pop[[L11]]{{$}}
97 ; CHECK-NEXT: i32.xor $push[[L6:[0-9]+]]=, $pop[[L3]], $pop[[L4]]{{$}}
98 ; CHECK-NEXT: i32.xor $push[[L7:[0-9]+]]=, $pop[[L5]], $pop[[L6]]{{$}}
99 ; CHECK-NEXT: i32.const $push10=, 1{{$}}
100 ; CHECK-NEXT: i32.ne $push8=, $pop7, $pop10{{$}}
101 ; CHECK-NEXT: br_if 0, $pop8{{$}}
102 ; CHECK-NEXT: i32.const $push9=, 0{{$}}
103 ; CHECK-NEXT: return $pop9{{$}}
104 ; CHECK-NEXT: .LBB7_2:
105 ; CHECK-NEXT: end_block{{$}}
106 ; CHECK-NEXT: i32.const $push14=, 1{{$}}
107 ; CHECK-NEXT: return $pop14{{$}}
108 define i32 @stack_uses(i32 %x, i32 %y, i32 %z, i32 %w) {
110 %c = icmp sle i32 %x, 0
111 %d = icmp sle i32 %y, 1
112 %e = icmp sle i32 %z, 0
113 %f = icmp sle i32 %w, 1
117 br i1 %i, label %true, label %false
124 ; Test an interesting case where the load has multiple uses and cannot
125 ; be trivially stackified. However, it can be stackified with a tee_local.
127 ; CHECK-LABEL: multiple_uses:
128 ; CHECK: .param i32, i32, i32{{$}}
129 ; CHECK-NEXT: block {{$}}
130 ; CHECK-NEXT: i32.load $push[[NUM0:[0-9]+]]=, 0($2){{$}}
131 ; CHECK-NEXT: tee_local $push[[NUM1:[0-9]+]]=, $3=, $pop[[NUM0]]{{$}}
132 ; CHECK-NEXT: i32.ge_u $push[[NUM2:[0-9]+]]=, $pop[[NUM1]], $1{{$}}
133 ; CHECK-NEXT: br_if 0, $pop[[NUM2]]{{$}}
134 ; CHECK-NEXT: i32.lt_u $push[[NUM3:[0-9]+]]=, $3, $0{{$}}
135 ; CHECK-NEXT: br_if 0, $pop[[NUM3]]{{$}}
136 ; CHECK-NEXT: i32.store 0($2), $3{{$}}
137 ; CHECK-NEXT: .LBB8_3:
138 ; CHECK-NEXT: end_block{{$}}
139 ; CHECK-NEXT: return{{$}}
140 define void @multiple_uses(i32* %arg0, i32* %arg1, i32* %arg2) nounwind {
145 %tmp7 = load i32, i32* %arg2
146 %tmp8 = inttoptr i32 %tmp7 to i32*
147 %tmp9 = icmp uge i32* %tmp8, %arg1
148 %tmp10 = icmp ult i32* %tmp8, %arg0
149 %tmp11 = or i1 %tmp9, %tmp10
150 br i1 %tmp11, label %back, label %then
153 store i32 %tmp7, i32* %arg2
157 br i1 undef, label %return, label %loop
163 ; Don't stackify stores effects across other instructions with side effects.
165 ; CHECK: side_effects:
170 declare void @evoke_side_effects()
171 define hidden void @stackify_store_across_side_effects(double* nocapture %d) {
173 store double 2.0, double* %d
174 call void @evoke_side_effects()
175 store double 2.0, double* %d
176 call void @evoke_side_effects()
180 ; Div instructions have side effects and can't be reordered, but this entire
181 ; function should still be able to be stackified because it's already in
184 ; CHECK-LABEL: div_tree:
185 ; CHECK: .param i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32{{$}}
186 ; CHECK-NEXT: .result i32{{$}}
187 ; CHECK-NEXT: i32.div_s $push[[L0:[0-9]+]]=, $0, $1{{$}}
188 ; CHECK-NEXT: i32.div_s $push[[L1:[0-9]+]]=, $2, $3{{$}}
189 ; CHECK-NEXT: i32.div_s $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
190 ; CHECK-NEXT: i32.div_s $push[[L3:[0-9]+]]=, $4, $5{{$}}
191 ; CHECK-NEXT: i32.div_s $push[[L4:[0-9]+]]=, $6, $7{{$}}
192 ; CHECK-NEXT: i32.div_s $push[[L5:[0-9]+]]=, $pop[[L3]], $pop[[L4]]{{$}}
193 ; CHECK-NEXT: i32.div_s $push[[L6:[0-9]+]]=, $pop[[L2]], $pop[[L5]]{{$}}
194 ; CHECK-NEXT: i32.div_s $push[[L7:[0-9]+]]=, $8, $9{{$}}
195 ; CHECK-NEXT: i32.div_s $push[[L8:[0-9]+]]=, $10, $11{{$}}
196 ; CHECK-NEXT: i32.div_s $push[[L9:[0-9]+]]=, $pop[[L7]], $pop[[L8]]{{$}}
197 ; CHECK-NEXT: i32.div_s $push[[L10:[0-9]+]]=, $12, $13{{$}}
198 ; CHECK-NEXT: i32.div_s $push[[L11:[0-9]+]]=, $14, $15{{$}}
199 ; CHECK-NEXT: i32.div_s $push[[L12:[0-9]+]]=, $pop[[L10]], $pop[[L11]]{{$}}
200 ; CHECK-NEXT: i32.div_s $push[[L13:[0-9]+]]=, $pop[[L9]], $pop[[L12]]{{$}}
201 ; CHECK-NEXT: i32.div_s $push[[L14:[0-9]+]]=, $pop[[L6]], $pop[[L13]]{{$}}
202 ; CHECK-NEXT: return $pop[[L14]]{{$}}
203 define i32 @div_tree(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h, i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n, i32 %o, i32 %p) {
205 %div = sdiv i32 %a, %b
206 %div1 = sdiv i32 %c, %d
207 %div2 = sdiv i32 %div, %div1
208 %div3 = sdiv i32 %e, %f
209 %div4 = sdiv i32 %g, %h
210 %div5 = sdiv i32 %div3, %div4
211 %div6 = sdiv i32 %div2, %div5
212 %div7 = sdiv i32 %i, %j
213 %div8 = sdiv i32 %k, %l
214 %div9 = sdiv i32 %div7, %div8
215 %div10 = sdiv i32 %m, %n
216 %div11 = sdiv i32 %o, %p
217 %div12 = sdiv i32 %div10, %div11
218 %div13 = sdiv i32 %div9, %div12
219 %div14 = sdiv i32 %div6, %div13
223 ; A simple multiple-use case.
225 ; CHECK-LABEL: simple_multiple_use:
226 ; CHECK: .param i32, i32{{$}}
227 ; CHECK-NEXT: i32.mul $push[[NUM0:[0-9]+]]=, $1, $0{{$}}
228 ; CHECK-NEXT: tee_local $push[[NUM1:[0-9]+]]=, $[[NUM2:[0-9]+]]=, $pop[[NUM0]]{{$}}
229 ; CHECK-NEXT: call use_a@FUNCTION, $pop[[NUM1]]{{$}}
230 ; CHECK-NEXT: call use_b@FUNCTION, $[[NUM2]]{{$}}
231 ; CHECK-NEXT: return{{$}}
232 declare void @use_a(i32)
233 declare void @use_b(i32)
234 define void @simple_multiple_use(i32 %x, i32 %y) {
235 %mul = mul i32 %y, %x
236 call void @use_a(i32 %mul)
237 call void @use_b(i32 %mul)
241 ; Multiple uses of the same value in one instruction.
243 ; CHECK-LABEL: multiple_uses_in_same_insn:
244 ; CHECK: .param i32, i32{{$}}
245 ; CHECK-NEXT: i32.mul $push[[NUM0:[0-9]+]]=, $1, $0{{$}}
246 ; CHECK-NEXT: tee_local $push[[NUM1:[0-9]+]]=, $[[NUM2:[0-9]+]]=, $pop[[NUM0]]{{$}}
247 ; CHECK-NEXT: call use_2@FUNCTION, $pop[[NUM1]], $[[NUM2]]{{$}}
248 ; CHECK-NEXT: return{{$}}
249 declare void @use_2(i32, i32)
250 define void @multiple_uses_in_same_insn(i32 %x, i32 %y) {
251 %mul = mul i32 %y, %x
252 call void @use_2(i32 %mul, i32 %mul)
256 ; Commute operands to achieve better stackifying.
258 ; CHECK-LABEL: commute:
260 ; CHECK: .result i32{{$}}
261 ; CHECK-NEXT: i32.call $push0=, red@FUNCTION{{$}}
262 ; CHECK-NEXT: i32.call $push1=, green@FUNCTION{{$}}
263 ; CHECK-NEXT: i32.add $push2=, $pop0, $pop1{{$}}
264 ; CHECK-NEXT: i32.call $push3=, blue@FUNCTION{{$}}
265 ; CHECK-NEXT: i32.add $push4=, $pop2, $pop3{{$}}
266 ; CHECK-NEXT: return $pop4{{$}}
270 define i32 @commute() {
271 %call = call i32 @red()
272 %call1 = call i32 @green()
273 %add = add i32 %call1, %call
274 %call2 = call i32 @blue()
275 %add3 = add i32 %add, %call2
279 ; Don't stackify a register when it would move a the def of the register past
280 ; an implicit get_local for the register.
282 ; CHECK-LABEL: no_stackify_past_use:
283 ; CHECK: i32.call $1=, callee@FUNCTION, $0
284 ; CHECK-NEXT: i32.const $push0=, 1
285 ; CHECK-NEXT: i32.add $push1=, $0, $pop0
286 ; CHECK-NEXT: i32.call $push2=, callee@FUNCTION, $pop1
287 ; CHECK-NEXT: i32.sub $push3=, $pop2, $1
288 ; CHECK-NEXT: i32.div_s $push4=, $pop3, $1
289 ; CHECK-NEXT: return $pop4
290 declare i32 @callee(i32)
291 define i32 @no_stackify_past_use(i32 %arg) {
292 %tmp1 = call i32 @callee(i32 %arg)
293 %tmp2 = add i32 %arg, 1
294 %tmp3 = call i32 @callee(i32 %tmp2)
295 %tmp5 = sub i32 %tmp3, %tmp1
296 %tmp6 = sdiv i32 %tmp5, %tmp1
300 ; This is the same as no_stackify_past_use, except using a commutative operator,
301 ; so we can reorder the operands and stackify.
303 ; CHECK-LABEL: commute_to_fix_ordering:
304 ; CHECK: i32.call $push[[L0:.+]]=, callee@FUNCTION, $0
305 ; CHECK: tee_local $push[[L1:.+]]=, $1=, $pop[[L0]]
306 ; CHECK: i32.const $push0=, 1
307 ; CHECK: i32.add $push1=, $0, $pop0
308 ; CHECK: i32.call $push2=, callee@FUNCTION, $pop1
309 ; CHECK: i32.add $push3=, $1, $pop2
310 ; CHECK: i32.mul $push4=, $pop[[L1]], $pop3
311 ; CHECK: return $pop4
312 define i32 @commute_to_fix_ordering(i32 %arg) {
313 %tmp1 = call i32 @callee(i32 %arg)
314 %tmp2 = add i32 %arg, 1
315 %tmp3 = call i32 @callee(i32 %tmp2)
316 %tmp5 = add i32 %tmp3, %tmp1
317 %tmp6 = mul i32 %tmp5, %tmp1
321 ; Stackify individual defs of virtual registers with multiple defs.
323 ; CHECK-LABEL: multiple_defs:
324 ; CHECK: f64.add $push[[NUM0:[0-9]+]]=, ${{[0-9]+}}, $pop{{[0-9]+}}{{$}}
325 ; CHECK-NEXT: tee_local $push[[NUM1:[0-9]+]]=, $[[NUM2:[0-9]+]]=, $pop[[NUM0]]{{$}}
326 ; CHECK-NEXT: f64.select $push{{[0-9]+}}=, $pop{{[0-9]+}}, $pop[[NUM1]], ${{[0-9]+}}{{$}}
328 define void @multiple_defs(i32 %arg, i32 %arg1, i1 %arg2, i1 %arg3, i1 %arg4) {
332 bb5: ; preds = %bb21, %bb
333 %tmp = phi double [ 0.000000e+00, %bb ], [ %tmp22, %bb21 ]
334 %tmp6 = phi double [ 0.000000e+00, %bb ], [ %tmp23, %bb21 ]
335 %tmp7 = fcmp olt double %tmp6, 2.323450e+01
336 br i1 %tmp7, label %bb8, label %bb21
338 bb8: ; preds = %bb17, %bb5
339 %tmp9 = phi double [ %tmp19, %bb17 ], [ %tmp, %bb5 ]
340 %tmp10 = fadd double %tmp6, -1.000000e+00
341 %tmp11 = select i1 %arg2, double -1.135357e+04, double %tmp10
342 %tmp12 = fadd double %tmp11, %tmp9
343 br i1 %arg3, label %bb17, label %bb13
346 %tmp14 = or i32 %arg1, 2
347 %tmp15 = icmp eq i32 %tmp14, 14
348 %tmp16 = select i1 %tmp15, double -1.135357e+04, double 0xBFCE147AE147B000
351 bb17: ; preds = %bb13, %bb8
352 %tmp18 = phi double [ %tmp16, %bb13 ], [ %tmp10, %bb8 ]
353 %tmp19 = fadd double %tmp18, %tmp12
354 %tmp20 = fcmp olt double %tmp6, 2.323450e+01
355 br i1 %tmp20, label %bb8, label %bb21
357 bb21: ; preds = %bb17, %bb5
358 %tmp22 = phi double [ %tmp, %bb5 ], [ %tmp9, %bb17 ]
359 %tmp23 = fadd double %tmp6, 1.000000e+00
360 br i1 %arg4, label %exit, label %bb5
365 ; Don't move calls past loads
366 ; CHECK-LABEL: no_stackify_call_past_load:
367 ; CHECK: i32.call $0=, red
368 ; CHECK: i32.const $push0=, 0
369 ; CHECK: i32.load $1=, count($pop0)
370 @count = hidden global i32 0, align 4
371 define i32 @no_stackify_call_past_load() {
373 %b = load i32, i32* @count, align 4
374 call i32 @callee(i32 %a)
379 ; Don't move stores past loads if there may be aliasing
380 ; CHECK-LABEL: no_stackify_store_past_load
381 ; CHECK: i32.store 0($1), $0
382 ; CHECK: i32.load {{.*}}, 0($2)
383 ; CHECK: i32.call {{.*}}, callee@FUNCTION, $0{{$}}
384 define i32 @no_stackify_store_past_load(i32 %a, i32* %p1, i32* %p2) {
385 store i32 %a, i32* %p1
386 %b = load i32, i32* %p2, align 4
387 call i32 @callee(i32 %a)
391 ; Can still stackify past invariant loads.
392 ; CHECK-LABEL: store_past_invar_load
393 ; CHECK: i32.store 0($1), $0
394 ; CHECK: i32.call {{.*}}, callee@FUNCTION, $0
395 ; CHECK: i32.load $push{{.*}}, 0($2)
397 define i32 @store_past_invar_load(i32 %a, i32* %p1, i32* dereferenceable(4) %p2) {
398 store i32 %a, i32* %p1
399 %b = load i32, i32* %p2, !invariant.load !0
400 call i32 @callee(i32 %a)
404 ; CHECK-LABEL: ignore_dbg_value:
405 ; CHECK-NEXT: .Lfunc_begin
406 ; CHECK-NEXT: unreachable
407 declare void @llvm.dbg.value(metadata, i64, metadata, metadata)
408 define void @ignore_dbg_value() {
409 call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !7, metadata !9), !dbg !10
413 ; Don't stackify an expression that might use the stack into a return, since we
414 ; might insert a prologue before the return.
416 ; CHECK-LABEL: no_stackify_past_epilogue:
417 ; CHECK: return ${{[0-9]+}}{{$}}
418 declare i32 @use_memory(i32*)
419 define i32 @no_stackify_past_epilogue() {
421 %call = call i32 @use_memory(i32* %x)
425 ; Stackify a loop induction variable into a loop comparison.
427 ; CHECK-LABEL: stackify_indvar:
428 ; CHECK: i32.const $push[[L5:.+]]=, 1{{$}}
429 ; CHECK-NEXT: i32.add $push[[L4:.+]]=, $[[R0:.+]], $pop[[L5]]{{$}}
430 ; CHECK-NEXT: tee_local $push[[L3:.+]]=, $[[R0]]=, $pop[[L4]]{{$}}
431 ; CHECK-NEXT: i32.ne $push[[L2:.+]]=, $0, $pop[[L3]]{{$}}
432 define void @stackify_indvar(i32 %tmp, i32* %v) #0 {
436 bb3: ; preds = %bb3, %bb2
437 %tmp4 = phi i32 [ %tmp7, %bb3 ], [ 0, %bb ]
438 %tmp5 = load volatile i32, i32* %v, align 4
439 %tmp6 = add nsw i32 %tmp5, %tmp4
440 store volatile i32 %tmp6, i32* %v, align 4
441 %tmp7 = add nuw nsw i32 %tmp4, 1
442 %tmp8 = icmp eq i32 %tmp7, %tmp
443 br i1 %tmp8, label %bb10, label %bb3
445 bb10: ; preds = %bb9, %bb
449 ; Don't stackify a call past a __stack_pointer store.
451 ; CHECK-LABEL: stackpointer_dependency:
452 ; CHECK: call {{.+}}, stackpointer_callee@FUNCTION,
453 ; CHECK-NEXT: set_global __stack_pointer@GLOBAL,
454 declare i32 @stackpointer_callee(i8* readnone, i8* readnone)
455 declare i8* @llvm.frameaddress(i32)
456 define i32 @stackpointer_dependency(i8* readnone) {
457 %2 = tail call i8* @llvm.frameaddress(i32 0)
458 %3 = tail call i32 @stackpointer_callee(i8* %0, i8* %2)
462 ; Stackify a call_indirect with respect to its ordering
464 ; CHECK-LABEL: call_indirect_stackify:
465 ; CHECK: i32.load $push[[L4:.+]]=, 0($0)
466 ; CHECK-NEXT: tee_local $push[[L3:.+]]=, $0=, $pop[[L4]]
467 ; CHECK-NEXT: i32.load $push[[L0:.+]]=, 0($0)
468 ; CHECK-NEXT: i32.load $push[[L1:.+]]=, 0($pop[[L0]])
469 ; CHECK-NEXT: i32.call_indirect $push{{.+}}=, $pop[[L3]], $1, $pop[[L1]]
470 %class.call_indirect = type { i32 (...)** }
471 define i32 @call_indirect_stackify(%class.call_indirect** %objptr, i32 %arg) {
472 %obj = load %class.call_indirect*, %class.call_indirect** %objptr
473 %addr = bitcast %class.call_indirect* %obj to i32(%class.call_indirect*, i32)***
474 %vtable = load i32(%class.call_indirect*, i32)**, i32(%class.call_indirect*, i32)*** %addr
475 %vfn = getelementptr inbounds i32(%class.call_indirect*, i32)*, i32(%class.call_indirect*, i32)** %vtable, i32 0
476 %f = load i32(%class.call_indirect*, i32)*, i32(%class.call_indirect*, i32)** %vfn
477 %ret = call i32 %f(%class.call_indirect* %obj, i32 %arg)
481 !llvm.module.flags = !{!0}
484 !0 = !{i32 2, !"Debug Info Version", i32 3}
485 !1 = distinct !DICompileUnit(language: DW_LANG_C99, file: !2, producer: "clang version 3.9.0 (trunk 266005) (llvm/trunk 266105)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !3)
486 !2 = !DIFile(filename: "test.c", directory: "/")
488 !5 = distinct !DISubprogram(name: "test", scope: !2, file: !2, line: 10, type: !6, isLocal: false, isDefinition: true, scopeLine: 11, flags: DIFlagPrototyped, isOptimized: true, unit: !1, retainedNodes: !3)
489 !6 = !DISubroutineType(types: !3)
490 !7 = !DILocalVariable(name: "nzcnt", scope: !5, file: !2, line: 15, type: !8)
491 !8 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
493 !10 = !DILocation(line: 15, column: 6, scope: !5)