1 ; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+bulk-memory | FileCheck %s --check-prefixes CHECK,BULK-MEM
2 ; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=-bulk-memory | FileCheck %s --check-prefixes CHECK,NO-BULK-MEM
4 ; Test that basic bulk memory codegen works correctly
6 target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
7 target triple = "wasm32-unknown-unknown"
9 declare void @llvm.memcpy.p0i8.p0i8.i8(i8*, i8*, i8, i1)
10 declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i1)
11 declare void @llvm.memcpy.p0i32.p0i32.i32(i32*, i32*, i32, i1)
13 declare void @llvm.memmove.p0i8.p0i8.i8(i8*, i8*, i8, i1)
14 declare void @llvm.memmove.p0i8.p0i8.i32(i8*, i8*, i32, i1)
15 declare void @llvm.memmove.p0i32.p0i32.i32(i32*, i32*, i32, i1)
17 declare void @llvm.memset.p0i8.i8(i8*, i8, i8, i1)
18 declare void @llvm.memset.p0i8.i32(i8*, i8, i32, i1)
19 declare void @llvm.memset.p0i32.i32(i32*, i8, i32, i1)
21 ; CHECK-LABEL: memcpy_i8:
22 ; NO-BULK-MEM-NOT: memory.copy
23 ; BULK-MEM-NEXT: .functype memcpy_i8 (i32, i32, i32) -> ()
24 ; BULK-MEM-NEXT: memory.copy 0, 0, $0, $1, $2
25 ; BULK-MEM-NEXT: return
26 define void @memcpy_i8(i8* %dest, i8* %src, i8 zeroext %len) {
27 call void @llvm.memcpy.p0i8.p0i8.i8(i8* %dest, i8* %src, i8 %len, i1 0)
31 ; CHECK-LABEL: memmove_i8:
32 ; NO-BULK-MEM-NOT: memory.copy
33 ; BULK-MEM-NEXT: .functype memmove_i8 (i32, i32, i32) -> ()
34 ; BULK-MEM-NEXT: memory.copy 0, 0, $0, $1, $2
35 ; BULK-MEM-NEXT: return
36 define void @memmove_i8(i8* %dest, i8* %src, i8 zeroext %len) {
37 call void @llvm.memmove.p0i8.p0i8.i8(i8* %dest, i8* %src, i8 %len, i1 0)
41 ; CHECK-LABEL: memset_i8:
42 ; NO-BULK-MEM-NOT: memory.fill
43 ; BULK-MEM-NEXT: .functype memset_i8 (i32, i32, i32) -> ()
44 ; BULK-MEM-NEXT: memory.fill 0, $0, $1, $2
45 ; BULK-MEM-NEXT: return
46 define void @memset_i8(i8* %dest, i8 %val, i8 zeroext %len) {
47 call void @llvm.memset.p0i8.i8(i8* %dest, i8 %val, i8 %len, i1 0)
51 ; CHECK-LABEL: memcpy_i32:
52 ; NO-BULK-MEM-NOT: memory.copy
53 ; BULK-MEM-NEXT: .functype memcpy_i32 (i32, i32, i32) -> ()
54 ; BULK-MEM-NEXT: memory.copy 0, 0, $0, $1, $2
55 ; BULK-MEM-NEXT: return
56 define void @memcpy_i32(i32* %dest, i32* %src, i32 %len) {
57 call void @llvm.memcpy.p0i32.p0i32.i32(i32* %dest, i32* %src, i32 %len, i1 0)
61 ; CHECK-LABEL: memmove_i32:
62 ; NO-BULK-MEM-NOT: memory.copy
63 ; BULK-MEM-NEXT: .functype memmove_i32 (i32, i32, i32) -> ()
64 ; BULK-MEM-NEXT: memory.copy 0, 0, $0, $1, $2
65 ; BULK-MEM-NEXT: return
66 define void @memmove_i32(i32* %dest, i32* %src, i32 %len) {
67 call void @llvm.memmove.p0i32.p0i32.i32(i32* %dest, i32* %src, i32 %len, i1 0)
71 ; CHECK-LABEL: memset_i32:
72 ; NO-BULK-MEM-NOT: memory.fill
73 ; BULK-MEM-NEXT: .functype memset_i32 (i32, i32, i32) -> ()
74 ; BULK-MEM-NEXT: memory.fill 0, $0, $1, $2
75 ; BULK-MEM-NEXT: return
76 define void @memset_i32(i32* %dest, i8 %val, i32 %len) {
77 call void @llvm.memset.p0i32.i32(i32* %dest, i8 %val, i32 %len, i1 0)
81 ; CHECK-LABEL: memcpy_1:
82 ; CHECK-NEXT: .functype memcpy_1 (i32, i32) -> ()
83 ; CHECK-NEXT: i32.load8_u $push[[L0:[0-9]+]]=, 0($1)
84 ; CHECK-NEXT: i32.store8 0($0), $pop[[L0]]
86 define void @memcpy_1(i8* %dest, i8* %src) {
87 call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 1, i1 0)
91 ; CHECK-LABEL: memmove_1:
92 ; CHECK-NEXT: .functype memmove_1 (i32, i32) -> ()
93 ; CHECK-NEXT: i32.load8_u $push[[L0:[0-9]+]]=, 0($1)
94 ; CHECK-NEXT: i32.store8 0($0), $pop[[L0]]
96 define void @memmove_1(i8* %dest, i8* %src) {
97 call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 1, i1 0)
101 ; CHECK-LABEL: memset_1:
102 ; NO-BULK-MEM-NOT: memory.fill
103 ; BULK-MEM-NEXT: .functype memset_1 (i32, i32) -> ()
104 ; BULK-MEM-NEXT: i32.store8 0($0), $1
105 ; BULK-MEM-NEXT: return
106 define void @memset_1(i8* %dest, i8 %val) {
107 call void @llvm.memset.p0i8.i32(i8* %dest, i8 %val, i32 1, i1 0)
111 ; CHECK-LABEL: memcpy_1024:
112 ; NO-BULK-MEM-NOT: memory.copy
113 ; BULK-MEM-NEXT: .functype memcpy_1024 (i32, i32) -> ()
114 ; BULK-MEM-NEXT: i32.const $push[[L0:[0-9]+]]=, 1024
115 ; BULK-MEM-NEXT: memory.copy 0, 0, $0, $1, $pop[[L0]]
116 ; BULK-MEM-NEXT: return
117 define void @memcpy_1024(i8* %dest, i8* %src) {
118 call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 1024, i1 0)
122 ; CHECK-LABEL: memmove_1024:
123 ; NO-BULK-MEM-NOT: memory.copy
124 ; BULK-MEM-NEXT: .functype memmove_1024 (i32, i32) -> ()
125 ; BULK-MEM-NEXT: i32.const $push[[L0:[0-9]+]]=, 1024
126 ; BULK-MEM-NEXT: memory.copy 0, 0, $0, $1, $pop[[L0]]
127 ; BULK-MEM-NEXT: return
128 define void @memmove_1024(i8* %dest, i8* %src) {
129 call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 1024, i1 0)
133 ; CHECK-LABEL: memset_1024:
134 ; NO-BULK-MEM-NOT: memory.fill
135 ; BULK-MEM-NEXT: .functype memset_1024 (i32, i32) -> ()
136 ; BULK-MEM-NEXT: i32.const $push[[L0:[0-9]+]]=, 1024
137 ; BULK-MEM-NEXT: memory.fill 0, $0, $1, $pop[[L0]]
138 ; BULK-MEM-NEXT: return
139 define void @memset_1024(i8* %dest, i8 %val) {
140 call void @llvm.memset.p0i8.i32(i8* %dest, i8 %val, i32 1024, i1 0)
144 ; The following tests check that frame index elimination works for
145 ; bulk memory instructions. The stack pointer is bumped by 16 instead
146 ; of 10 because the stack pointer in WebAssembly is currently always
147 ; 16-byte aligned, even in leaf functions, although it is not written
148 ; back to the global in this case.
150 ; TODO: Change TransientStackAlignment to 1 to avoid this extra
151 ; arithmetic. This will require forcing the use of StackAlignment in
152 ; PrologEpilogEmitter.cpp when
153 ; WebAssemblyFrameLowering::needsSPWriteback would be true.
155 ; CHECK-LABEL: memcpy_alloca_src:
156 ; NO-BULK-MEM-NOT: memory.copy
157 ; BULK-MEM-NEXT: .functype memcpy_alloca_src (i32) -> ()
158 ; BULK-MEM-NEXT: global.get $push[[L0:[0-9]+]]=, __stack_pointer
159 ; BULK-MEM-NEXT: i32.const $push[[L1:[0-9]+]]=, 16
160 ; BULK-MEM-NEXT: i32.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]
161 ; BULK-MEM-NEXT: i32.const $push[[L3:[0-9]+]]=, 6
162 ; BULK-MEM-NEXT: i32.add $push[[L4:[0-9]+]]=, $pop[[L2]], $pop[[L3]]
163 ; BULK-MEM-NEXT: i32.const $push[[L5:[0-9]+]]=, 10
164 ; BULK-MEM-NEXT: memory.copy 0, 0, $0, $pop[[L4]], $pop[[L5]]
165 ; BULK-MEM-NEXT: return
166 define void @memcpy_alloca_src(i8* %dst) {
167 %a = alloca [10 x i8]
168 %p = bitcast [10 x i8]* %a to i8*
169 call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %p, i32 10, i1 false)
173 ; CHECK-LABEL: memcpy_alloca_dst:
174 ; NO-BULK-MEM-NOT: memory.copy
175 ; BULK-MEM-NEXT: .functype memcpy_alloca_dst (i32) -> ()
176 ; BULK-MEM-NEXT: global.get $push[[L0:[0-9]+]]=, __stack_pointer
177 ; BULK-MEM-NEXT: i32.const $push[[L1:[0-9]+]]=, 16
178 ; BULK-MEM-NEXT: i32.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]
179 ; BULK-MEM-NEXT: i32.const $push[[L3:[0-9]+]]=, 6
180 ; BULK-MEM-NEXT: i32.add $push[[L4:[0-9]+]]=, $pop[[L2]], $pop[[L3]]
181 ; BULK-MEM-NEXT: i32.const $push[[L5:[0-9]+]]=, 10
182 ; BULK-MEM-NEXT: memory.copy 0, 0, $pop[[L4]], $0, $pop[[L5]]
183 ; BULK-MEM-NEXT: return
184 define void @memcpy_alloca_dst(i8* %src) {
185 %a = alloca [10 x i8]
186 %p = bitcast [10 x i8]* %a to i8*
187 call void @llvm.memcpy.p0i8.p0i8.i32(i8* %p, i8* %src, i32 10, i1 false)
191 ; CHECK-LABEL: memset_alloca:
192 ; NO-BULK-MEM-NOT: memory.fill
193 ; BULK-MEM-NEXT: .functype memset_alloca (i32) -> ()
194 ; BULK-MEM-NEXT: global.get $push[[L0:[0-9]+]]=, __stack_pointer
195 ; BULK-MEM-NEXT: i32.const $push[[L1:[0-9]+]]=, 16
196 ; BULK-MEM-NEXT: i32.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]
197 ; BULK-MEM-NEXT: i32.const $push[[L3:[0-9]+]]=, 6
198 ; BULK-MEM-NEXT: i32.add $push[[L4:[0-9]+]]=, $pop[[L2]], $pop[[L3]]
199 ; BULK-MEM-NEXT: i32.const $push[[L5:[0-9]+]]=, 10
200 ; BULK-MEM-NEXT: memory.fill 0, $pop[[L4]], $0, $pop[[L5]]
201 ; BULK-MEM-NEXT: return
202 define void @memset_alloca(i8 %val) {
203 %a = alloca [10 x i8]
204 %p = bitcast [10 x i8]* %a to i8*
205 call void @llvm.memset.p0i8.i32(i8* %p, i8 %val, i32 10, i1 false)