1 ; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+bulk-memory | FileCheck %s --check-prefixes CHECK,BULK-MEM
2 ; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=-bulk-memory | FileCheck %s --check-prefixes CHECK,NO-BULK-MEM
4 ; Test that basic bulk memory codegen works correctly
6 target triple = "wasm64-unknown-unknown"
8 declare void @llvm.memcpy.p0i8.p0i8.i8(i8*, i8*, i8, i1)
9 declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i1)
10 declare void @llvm.memcpy.p0i32.p0i32.i64(i32*, i32*, i64, i1)
12 declare void @llvm.memmove.p0i8.p0i8.i8(i8*, i8*, i8, i1)
13 declare void @llvm.memmove.p0i8.p0i8.i64(i8*, i8*, i64, i1)
14 declare void @llvm.memmove.p0i32.p0i32.i64(i32*, i32*, i64, i1)
16 declare void @llvm.memset.p0i8.i8(i8*, i8, i8, i1)
17 declare void @llvm.memset.p0i8.i64(i8*, i8, i64, i1)
18 declare void @llvm.memset.p0i32.i64(i32*, i8, i64, i1)
20 ; CHECK-LABEL: memcpy_i8:
21 ; NO-BULK-MEM-NOT: memory.copy
22 ; BULK-MEM-NEXT: .functype memcpy_i8 (i64, i64, i32) -> ()
23 ; BULK-MEM-NEXT: i64.extend_i32_u $push0=, $2
24 ; BULK-MEM-NEXT: memory.copy 0, 0, $0, $1, $pop0
25 ; BULK-MEM-NEXT: return
26 define void @memcpy_i8(i8* %dest, i8* %src, i8 zeroext %len) {
27 call void @llvm.memcpy.p0i8.p0i8.i8(i8* %dest, i8* %src, i8 %len, i1 0)
31 ; CHECK-LABEL: memmove_i8:
32 ; NO-BULK-MEM-NOT: memory.copy
33 ; BULK-MEM-NEXT: .functype memmove_i8 (i64, i64, i32) -> ()
34 ; BULK-MEM-NEXT: i64.extend_i32_u $push0=, $2
35 ; BULK-MEM-NEXT: memory.copy 0, 0, $0, $1, $pop0
36 ; BULK-MEM-NEXT: return
37 define void @memmove_i8(i8* %dest, i8* %src, i8 zeroext %len) {
38 call void @llvm.memmove.p0i8.p0i8.i8(i8* %dest, i8* %src, i8 %len, i1 0)
42 ; CHECK-LABEL: memset_i8:
43 ; NO-BULK-MEM-NOT: memory.fill
44 ; BULK-MEM-NEXT: .functype memset_i8 (i64, i32, i32) -> ()
45 ; BULK-MEM-NEXT: i64.extend_i32_u $push0=, $2
46 ; BULK-MEM-NEXT: memory.fill 0, $0, $1, $pop0
47 ; BULK-MEM-NEXT: return
48 define void @memset_i8(i8* %dest, i8 %val, i8 zeroext %len) {
49 call void @llvm.memset.p0i8.i8(i8* %dest, i8 %val, i8 %len, i1 0)
53 ; CHECK-LABEL: memcpy_i32:
54 ; NO-BULK-MEM-NOT: memory.copy
55 ; BULK-MEM-NEXT: .functype memcpy_i32 (i64, i64, i64) -> ()
56 ; BULK-MEM-NEXT: memory.copy 0, 0, $0, $1, $2
57 ; BULK-MEM-NEXT: return
58 define void @memcpy_i32(i32* %dest, i32* %src, i64 %len) {
59 call void @llvm.memcpy.p0i32.p0i32.i64(i32* %dest, i32* %src, i64 %len, i1 0)
63 ; CHECK-LABEL: memmove_i32:
64 ; NO-BULK-MEM-NOT: memory.copy
65 ; BULK-MEM-NEXT: .functype memmove_i32 (i64, i64, i64) -> ()
66 ; BULK-MEM-NEXT: memory.copy 0, 0, $0, $1, $2
67 ; BULK-MEM-NEXT: return
68 define void @memmove_i32(i32* %dest, i32* %src, i64 %len) {
69 call void @llvm.memmove.p0i32.p0i32.i64(i32* %dest, i32* %src, i64 %len, i1 0)
73 ; CHECK-LABEL: memset_i32:
74 ; NO-BULK-MEM-NOT: memory.fill
75 ; BULK-MEM-NEXT: .functype memset_i32 (i64, i32, i64) -> ()
76 ; BULK-MEM-NEXT: memory.fill 0, $0, $1, $2
77 ; BULK-MEM-NEXT: return
78 define void @memset_i32(i32* %dest, i8 %val, i64 %len) {
79 call void @llvm.memset.p0i32.i64(i32* %dest, i8 %val, i64 %len, i1 0)
83 ; CHECK-LABEL: memcpy_1:
84 ; CHECK-NEXT: .functype memcpy_1 (i64, i64) -> ()
85 ; CHECK-NEXT: i32.load8_u $push[[L0:[0-9]+]]=, 0($1)
86 ; CHECK-NEXT: i32.store8 0($0), $pop[[L0]]
88 define void @memcpy_1(i8* %dest, i8* %src) {
89 call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dest, i8* %src, i64 1, i1 0)
93 ; CHECK-LABEL: memmove_1:
94 ; CHECK-NEXT: .functype memmove_1 (i64, i64) -> ()
95 ; CHECK-NEXT: i32.load8_u $push[[L0:[0-9]+]]=, 0($1)
96 ; CHECK-NEXT: i32.store8 0($0), $pop[[L0]]
98 define void @memmove_1(i8* %dest, i8* %src) {
99 call void @llvm.memmove.p0i8.p0i8.i64(i8* %dest, i8* %src, i64 1, i1 0)
103 ; CHECK-LABEL: memset_1:
104 ; NO-BULK-MEM-NOT: memory.fill
105 ; BULK-MEM-NEXT: .functype memset_1 (i64, i32) -> ()
106 ; BULK-MEM-NEXT: i32.store8 0($0), $1
107 ; BULK-MEM-NEXT: return
108 define void @memset_1(i8* %dest, i8 %val) {
109 call void @llvm.memset.p0i8.i64(i8* %dest, i8 %val, i64 1, i1 0)
113 ; CHECK-LABEL: memcpy_1024:
114 ; NO-BULK-MEM-NOT: memory.copy
115 ; BULK-MEM-NEXT: .functype memcpy_1024 (i64, i64) -> ()
116 ; BULK-MEM-NEXT: i64.const $push[[L0:[0-9]+]]=, 1024
117 ; BULK-MEM-NEXT: memory.copy 0, 0, $0, $1, $pop[[L0]]
118 ; BULK-MEM-NEXT: return
119 define void @memcpy_1024(i8* %dest, i8* %src) {
120 call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dest, i8* %src, i64 1024, i1 0)
124 ; CHECK-LABEL: memmove_1024:
125 ; NO-BULK-MEM-NOT: memory.copy
126 ; BULK-MEM-NEXT: .functype memmove_1024 (i64, i64) -> ()
127 ; BULK-MEM-NEXT: i64.const $push[[L0:[0-9]+]]=, 1024
128 ; BULK-MEM-NEXT: memory.copy 0, 0, $0, $1, $pop[[L0]]
129 ; BULK-MEM-NEXT: return
130 define void @memmove_1024(i8* %dest, i8* %src) {
131 call void @llvm.memmove.p0i8.p0i8.i64(i8* %dest, i8* %src, i64 1024, i1 0)
135 ; CHECK-LABEL: memset_1024:
136 ; NO-BULK-MEM-NOT: memory.fill
137 ; BULK-MEM-NEXT: .functype memset_1024 (i64, i32) -> ()
138 ; BULK-MEM-NEXT: i64.const $push[[L0:[0-9]+]]=, 1024
139 ; BULK-MEM-NEXT: memory.fill 0, $0, $1, $pop[[L0]]
140 ; BULK-MEM-NEXT: return
141 define void @memset_1024(i8* %dest, i8 %val) {
142 call void @llvm.memset.p0i8.i64(i8* %dest, i8 %val, i64 1024, i1 0)
146 ; The following tests check that frame index elimination works for
147 ; bulk memory instructions. The stack pointer is bumped by 112 instead
148 ; of 100 because the stack pointer in WebAssembly is currently always
149 ; 16-byte aligned, even in leaf functions, although it is not written
150 ; back to the global in this case.
152 ; TODO: Change TransientStackAlignment to 1 to avoid this extra
153 ; arithmetic. This will require forcing the use of StackAlignment in
154 ; PrologEpilogEmitter.cpp when
155 ; WebAssemblyFrameLowering::needsSPWriteback would be true.
157 ; CHECK-LABEL: memcpy_alloca_src:
158 ; NO-BULK-MEM-NOT: memory.copy
159 ; BULK-MEM-NEXT: .functype memcpy_alloca_src (i64) -> ()
160 ; BULK-MEM-NEXT: global.get $push[[L0:[0-9]+]]=, __stack_pointer
161 ; BULK-MEM-NEXT: i64.const $push[[L1:[0-9]+]]=, 112
162 ; BULK-MEM-NEXT: i64.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]
163 ; BULK-MEM-NEXT: i64.const $push[[L3:[0-9]+]]=, 12
164 ; BULK-MEM-NEXT: i64.add $push[[L4:[0-9]+]]=, $pop[[L2]], $pop[[L3]]
165 ; BULK-MEM-NEXT: i64.const $push[[L5:[0-9]+]]=, 100
166 ; BULK-MEM-NEXT: memory.copy 0, 0, $0, $pop[[L4]], $pop[[L5]]
167 ; BULK-MEM-NEXT: return
168 define void @memcpy_alloca_src(i8* %dst) {
169 %a = alloca [100 x i8]
170 %p = bitcast [100 x i8]* %a to i8*
171 call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %p, i64 100, i1 false)
175 ; CHECK-LABEL: memcpy_alloca_dst:
176 ; NO-BULK-MEM-NOT: memory.copy
177 ; BULK-MEM-NEXT: .functype memcpy_alloca_dst (i64) -> ()
178 ; BULK-MEM-NEXT: global.get $push[[L0:[0-9]+]]=, __stack_pointer
179 ; BULK-MEM-NEXT: i64.const $push[[L1:[0-9]+]]=, 112
180 ; BULK-MEM-NEXT: i64.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]
181 ; BULK-MEM-NEXT: i64.const $push[[L3:[0-9]+]]=, 12
182 ; BULK-MEM-NEXT: i64.add $push[[L4:[0-9]+]]=, $pop[[L2]], $pop[[L3]]
183 ; BULK-MEM-NEXT: i64.const $push[[L5:[0-9]+]]=, 100
184 ; BULK-MEM-NEXT: memory.copy 0, 0, $pop[[L4]], $0, $pop[[L5]]
185 ; BULK-MEM-NEXT: return
186 define void @memcpy_alloca_dst(i8* %src) {
187 %a = alloca [100 x i8]
188 %p = bitcast [100 x i8]* %a to i8*
189 call void @llvm.memcpy.p0i8.p0i8.i64(i8* %p, i8* %src, i64 100, i1 false)
193 ; CHECK-LABEL: memset_alloca:
194 ; NO-BULK-MEM-NOT: memory.fill
195 ; BULK-MEM-NEXT: .functype memset_alloca (i32) -> ()
196 ; BULK-MEM-NEXT: global.get $push[[L0:[0-9]+]]=, __stack_pointer
197 ; BULK-MEM-NEXT: i64.const $push[[L1:[0-9]+]]=, 112
198 ; BULK-MEM-NEXT: i64.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]
199 ; BULK-MEM-NEXT: i64.const $push[[L3:[0-9]+]]=, 12
200 ; BULK-MEM-NEXT: i64.add $push[[L4:[0-9]+]]=, $pop[[L2]], $pop[[L3]]
201 ; BULK-MEM-NEXT: i64.const $push[[L5:[0-9]+]]=, 100
202 ; BULK-MEM-NEXT: memory.fill 0, $pop[[L4]], $0, $pop[[L5]]
203 ; BULK-MEM-NEXT: return
204 define void @memset_alloca(i8 %val) {
205 %a = alloca [100 x i8]
206 %p = bitcast [100 x i8]* %a to i8*
207 call void @llvm.memset.p0i8.i64(i8* %p, i8 %val, i64 100, i1 false)