1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
3 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 %s -o - | FileCheck %s
5 ; Testing codegen for memcpy with vector operands for all combinations of the following parameters:
6 ; destination address space: 0, 1, 3, 5
7 ; source address space: 0, 1, 3, 4, 5
8 ; alignment: 1, 2, 8, 16
12 define void @memcpy_p0_p0_sz16_align_1_1(ptr addrspace(0) align 1 %dst, ptr addrspace(0) align 1 readonly %src) {
13 ; CHECK-LABEL: memcpy_p0_p0_sz16_align_1_1:
14 ; CHECK: ; %bb.0: ; %entry
15 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16 ; CHECK-NEXT: flat_load_dwordx4 v[2:5], v[2:3]
17 ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
18 ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5]
19 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
20 ; CHECK-NEXT: s_setpc_b64 s[30:31]
22 tail call void @llvm.memcpy.p0.p0.i64(ptr addrspace(0) noundef nonnull align 1 %dst, ptr addrspace(0) noundef nonnull align 1 %src, i64 16, i1 false)
26 define void @memcpy_p0_p0_sz31_align_1_1(ptr addrspace(0) align 1 %dst, ptr addrspace(0) align 1 readonly %src) {
27 ; CHECK-LABEL: memcpy_p0_p0_sz31_align_1_1:
28 ; CHECK: ; %bb.0: ; %entry
29 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
30 ; CHECK-NEXT: s_clause 0x3
31 ; CHECK-NEXT: flat_load_ubyte v9, v[2:3] offset:30
32 ; CHECK-NEXT: flat_load_ushort v10, v[2:3] offset:28
33 ; CHECK-NEXT: flat_load_dwordx3 v[6:8], v[2:3] offset:16
34 ; CHECK-NEXT: flat_load_dwordx4 v[2:5], v[2:3]
35 ; CHECK-NEXT: s_waitcnt vmcnt(3) lgkmcnt(3)
36 ; CHECK-NEXT: flat_store_byte v[0:1], v9 offset:30
37 ; CHECK-NEXT: s_waitcnt vmcnt(2) lgkmcnt(3)
38 ; CHECK-NEXT: flat_store_short v[0:1], v10 offset:28
39 ; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(3)
40 ; CHECK-NEXT: flat_store_dwordx3 v[0:1], v[6:8] offset:16
41 ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(3)
42 ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5]
43 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
44 ; CHECK-NEXT: s_setpc_b64 s[30:31]
46 tail call void @llvm.memcpy.p0.p0.i64(ptr addrspace(0) noundef nonnull align 1 %dst, ptr addrspace(0) noundef nonnull align 1 %src, i64 31, i1 false)
50 define void @memcpy_p0_p0_sz32_align_1_1(ptr addrspace(0) align 1 %dst, ptr addrspace(0) align 1 readonly %src) {
51 ; CHECK-LABEL: memcpy_p0_p0_sz32_align_1_1:
52 ; CHECK: ; %bb.0: ; %entry
53 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
54 ; CHECK-NEXT: s_clause 0x1
55 ; CHECK-NEXT: flat_load_dwordx4 v[4:7], v[2:3] offset:16
56 ; CHECK-NEXT: flat_load_dwordx4 v[8:11], v[2:3]
57 ; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1)
58 ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[4:7] offset:16
59 ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(1)
60 ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[8:11]
61 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
62 ; CHECK-NEXT: s_setpc_b64 s[30:31]
64 tail call void @llvm.memcpy.p0.p0.i64(ptr addrspace(0) noundef nonnull align 1 %dst, ptr addrspace(0) noundef nonnull align 1 %src, i64 32, i1 false)
68 define void @memcpy_p0_p0_sz16_align_2_2(ptr addrspace(0) align 2 %dst, ptr addrspace(0) align 2 readonly %src) {
69 ; CHECK-LABEL: memcpy_p0_p0_sz16_align_2_2:
70 ; CHECK: ; %bb.0: ; %entry
71 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
72 ; CHECK-NEXT: flat_load_dwordx4 v[2:5], v[2:3]
73 ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
74 ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5]
75 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
76 ; CHECK-NEXT: s_setpc_b64 s[30:31]
78 tail call void @llvm.memcpy.p0.p0.i64(ptr addrspace(0) noundef nonnull align 2 %dst, ptr addrspace(0) noundef nonnull align 2 %src, i64 16, i1 false)
82 define void @memcpy_p0_p0_sz31_align_2_2(ptr addrspace(0) align 2 %dst, ptr addrspace(0) align 2 readonly %src) {
83 ; CHECK-LABEL: memcpy_p0_p0_sz31_align_2_2:
84 ; CHECK: ; %bb.0: ; %entry
85 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
86 ; CHECK-NEXT: s_clause 0x3
87 ; CHECK-NEXT: flat_load_ubyte v9, v[2:3] offset:30
88 ; CHECK-NEXT: flat_load_ushort v10, v[2:3] offset:28
89 ; CHECK-NEXT: flat_load_dwordx3 v[6:8], v[2:3] offset:16
90 ; CHECK-NEXT: flat_load_dwordx4 v[2:5], v[2:3]
91 ; CHECK-NEXT: s_waitcnt vmcnt(3) lgkmcnt(3)
92 ; CHECK-NEXT: flat_store_byte v[0:1], v9 offset:30
93 ; CHECK-NEXT: s_waitcnt vmcnt(2) lgkmcnt(3)
94 ; CHECK-NEXT: flat_store_short v[0:1], v10 offset:28
95 ; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(3)
96 ; CHECK-NEXT: flat_store_dwordx3 v[0:1], v[6:8] offset:16
97 ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(3)
98 ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5]
99 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
100 ; CHECK-NEXT: s_setpc_b64 s[30:31]
102 tail call void @llvm.memcpy.p0.p0.i64(ptr addrspace(0) noundef nonnull align 2 %dst, ptr addrspace(0) noundef nonnull align 2 %src, i64 31, i1 false)
106 define void @memcpy_p0_p0_sz32_align_2_2(ptr addrspace(0) align 2 %dst, ptr addrspace(0) align 2 readonly %src) {
107 ; CHECK-LABEL: memcpy_p0_p0_sz32_align_2_2:
108 ; CHECK: ; %bb.0: ; %entry
109 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
110 ; CHECK-NEXT: s_clause 0x1
111 ; CHECK-NEXT: flat_load_dwordx4 v[4:7], v[2:3] offset:16
112 ; CHECK-NEXT: flat_load_dwordx4 v[8:11], v[2:3]
113 ; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1)
114 ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[4:7] offset:16
115 ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(1)
116 ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[8:11]
117 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
118 ; CHECK-NEXT: s_setpc_b64 s[30:31]
120 tail call void @llvm.memcpy.p0.p0.i64(ptr addrspace(0) noundef nonnull align 2 %dst, ptr addrspace(0) noundef nonnull align 2 %src, i64 32, i1 false)
124 define void @memcpy_p0_p0_sz16_align_8_8(ptr addrspace(0) align 8 %dst, ptr addrspace(0) align 8 readonly %src) {
125 ; CHECK-LABEL: memcpy_p0_p0_sz16_align_8_8:
126 ; CHECK: ; %bb.0: ; %entry
127 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
128 ; CHECK-NEXT: flat_load_dwordx4 v[2:5], v[2:3]
129 ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
130 ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5]
131 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
132 ; CHECK-NEXT: s_setpc_b64 s[30:31]
134 tail call void @llvm.memcpy.p0.p0.i64(ptr addrspace(0) noundef nonnull align 8 %dst, ptr addrspace(0) noundef nonnull align 8 %src, i64 16, i1 false)
138 define void @memcpy_p0_p0_sz31_align_8_8(ptr addrspace(0) align 8 %dst, ptr addrspace(0) align 8 readonly %src) {
139 ; CHECK-LABEL: memcpy_p0_p0_sz31_align_8_8:
140 ; CHECK: ; %bb.0: ; %entry
141 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
142 ; CHECK-NEXT: s_clause 0x1
143 ; CHECK-NEXT: flat_load_dwordx4 v[4:7], v[2:3] offset:15
144 ; CHECK-NEXT: flat_load_dwordx4 v[8:11], v[2:3]
145 ; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1)
146 ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[4:7] offset:15
147 ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(1)
148 ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[8:11]
149 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
150 ; CHECK-NEXT: s_setpc_b64 s[30:31]
152 tail call void @llvm.memcpy.p0.p0.i64(ptr addrspace(0) noundef nonnull align 8 %dst, ptr addrspace(0) noundef nonnull align 8 %src, i64 31, i1 false)
156 define void @memcpy_p0_p0_sz32_align_8_8(ptr addrspace(0) align 8 %dst, ptr addrspace(0) align 8 readonly %src) {
157 ; CHECK-LABEL: memcpy_p0_p0_sz32_align_8_8:
158 ; CHECK: ; %bb.0: ; %entry
159 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
160 ; CHECK-NEXT: s_clause 0x1
161 ; CHECK-NEXT: flat_load_dwordx4 v[4:7], v[2:3] offset:16
162 ; CHECK-NEXT: flat_load_dwordx4 v[8:11], v[2:3]
163 ; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1)
164 ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[4:7] offset:16
165 ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(1)
166 ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[8:11]
167 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
168 ; CHECK-NEXT: s_setpc_b64 s[30:31]
170 tail call void @llvm.memcpy.p0.p0.i64(ptr addrspace(0) noundef nonnull align 8 %dst, ptr addrspace(0) noundef nonnull align 8 %src, i64 32, i1 false)
174 define void @memcpy_p0_p0_sz16_align_16_16(ptr addrspace(0) align 16 %dst, ptr addrspace(0) align 16 readonly %src) {
175 ; CHECK-LABEL: memcpy_p0_p0_sz16_align_16_16:
176 ; CHECK: ; %bb.0: ; %entry
177 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
178 ; CHECK-NEXT: flat_load_dwordx4 v[2:5], v[2:3]
179 ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
180 ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5]
181 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
182 ; CHECK-NEXT: s_setpc_b64 s[30:31]
184 tail call void @llvm.memcpy.p0.p0.i64(ptr addrspace(0) noundef nonnull align 16 %dst, ptr addrspace(0) noundef nonnull align 16 %src, i64 16, i1 false)
188 define void @memcpy_p0_p0_sz31_align_16_16(ptr addrspace(0) align 16 %dst, ptr addrspace(0) align 16 readonly %src) {
189 ; CHECK-LABEL: memcpy_p0_p0_sz31_align_16_16:
190 ; CHECK: ; %bb.0: ; %entry
191 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
192 ; CHECK-NEXT: s_clause 0x1
193 ; CHECK-NEXT: flat_load_dwordx4 v[4:7], v[2:3] offset:15
194 ; CHECK-NEXT: flat_load_dwordx4 v[8:11], v[2:3]
195 ; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1)
196 ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[4:7] offset:15
197 ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(1)
198 ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[8:11]
199 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
200 ; CHECK-NEXT: s_setpc_b64 s[30:31]
202 tail call void @llvm.memcpy.p0.p0.i64(ptr addrspace(0) noundef nonnull align 16 %dst, ptr addrspace(0) noundef nonnull align 16 %src, i64 31, i1 false)
206 define void @memcpy_p0_p0_sz32_align_16_16(ptr addrspace(0) align 16 %dst, ptr addrspace(0) align 16 readonly %src) {
207 ; CHECK-LABEL: memcpy_p0_p0_sz32_align_16_16:
208 ; CHECK: ; %bb.0: ; %entry
209 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
210 ; CHECK-NEXT: s_clause 0x1
211 ; CHECK-NEXT: flat_load_dwordx4 v[4:7], v[2:3] offset:16
212 ; CHECK-NEXT: flat_load_dwordx4 v[8:11], v[2:3]
213 ; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1)
214 ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[4:7] offset:16
215 ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(1)
216 ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[8:11]
217 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
218 ; CHECK-NEXT: s_setpc_b64 s[30:31]
220 tail call void @llvm.memcpy.p0.p0.i64(ptr addrspace(0) noundef nonnull align 16 %dst, ptr addrspace(0) noundef nonnull align 16 %src, i64 32, i1 false)
224 define void @memcpy_p0_p1_sz16_align_1_1(ptr addrspace(0) align 1 %dst, ptr addrspace(1) align 1 readonly %src) {
225 ; CHECK-LABEL: memcpy_p0_p1_sz16_align_1_1:
226 ; CHECK: ; %bb.0: ; %entry
227 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
228 ; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off
229 ; CHECK-NEXT: s_waitcnt vmcnt(0)
230 ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5]
231 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
232 ; CHECK-NEXT: s_setpc_b64 s[30:31]
234 tail call void @llvm.memcpy.p0.p1.i64(ptr addrspace(0) noundef nonnull align 1 %dst, ptr addrspace(1) noundef nonnull align 1 %src, i64 16, i1 false)
238 define void @memcpy_p0_p1_sz31_align_1_1(ptr addrspace(0) align 1 %dst, ptr addrspace(1) align 1 readonly %src) {
239 ; CHECK-LABEL: memcpy_p0_p1_sz31_align_1_1:
240 ; CHECK: ; %bb.0: ; %entry
241 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
242 ; CHECK-NEXT: s_clause 0x3
243 ; CHECK-NEXT: global_load_ubyte v9, v[2:3], off offset:30
244 ; CHECK-NEXT: global_load_ushort v10, v[2:3], off offset:28
245 ; CHECK-NEXT: global_load_dwordx3 v[6:8], v[2:3], off offset:16
246 ; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off
247 ; CHECK-NEXT: s_waitcnt vmcnt(3)
248 ; CHECK-NEXT: flat_store_byte v[0:1], v9 offset:30
249 ; CHECK-NEXT: s_waitcnt vmcnt(2)
250 ; CHECK-NEXT: flat_store_short v[0:1], v10 offset:28
251 ; CHECK-NEXT: s_waitcnt vmcnt(1)
252 ; CHECK-NEXT: flat_store_dwordx3 v[0:1], v[6:8] offset:16
253 ; CHECK-NEXT: s_waitcnt vmcnt(0)
254 ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5]
255 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
256 ; CHECK-NEXT: s_setpc_b64 s[30:31]
258 tail call void @llvm.memcpy.p0.p1.i64(ptr addrspace(0) noundef nonnull align 1 %dst, ptr addrspace(1) noundef nonnull align 1 %src, i64 31, i1 false)
262 define void @memcpy_p0_p1_sz32_align_1_1(ptr addrspace(0) align 1 %dst, ptr addrspace(1) align 1 readonly %src) {
263 ; CHECK-LABEL: memcpy_p0_p1_sz32_align_1_1:
264 ; CHECK: ; %bb.0: ; %entry
265 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
266 ; CHECK-NEXT: s_clause 0x1
267 ; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off offset:16
268 ; CHECK-NEXT: global_load_dwordx4 v[8:11], v[2:3], off
269 ; CHECK-NEXT: s_waitcnt vmcnt(1)
270 ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[4:7] offset:16
271 ; CHECK-NEXT: s_waitcnt vmcnt(0)
272 ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[8:11]
273 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
274 ; CHECK-NEXT: s_setpc_b64 s[30:31]
276 tail call void @llvm.memcpy.p0.p1.i64(ptr addrspace(0) noundef nonnull align 1 %dst, ptr addrspace(1) noundef nonnull align 1 %src, i64 32, i1 false)
280 define void @memcpy_p0_p1_sz16_align_2_2(ptr addrspace(0) align 2 %dst, ptr addrspace(1) align 2 readonly %src) {
281 ; CHECK-LABEL: memcpy_p0_p1_sz16_align_2_2:
282 ; CHECK: ; %bb.0: ; %entry
283 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
284 ; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off
285 ; CHECK-NEXT: s_waitcnt vmcnt(0)
286 ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5]
287 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
288 ; CHECK-NEXT: s_setpc_b64 s[30:31]
290 tail call void @llvm.memcpy.p0.p1.i64(ptr addrspace(0) noundef nonnull align 2 %dst, ptr addrspace(1) noundef nonnull align 2 %src, i64 16, i1 false)
294 define void @memcpy_p0_p1_sz31_align_2_2(ptr addrspace(0) align 2 %dst, ptr addrspace(1) align 2 readonly %src) {
295 ; CHECK-LABEL: memcpy_p0_p1_sz31_align_2_2:
296 ; CHECK: ; %bb.0: ; %entry
297 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
298 ; CHECK-NEXT: s_clause 0x3
299 ; CHECK-NEXT: global_load_ubyte v9, v[2:3], off offset:30
300 ; CHECK-NEXT: global_load_ushort v10, v[2:3], off offset:28
301 ; CHECK-NEXT: global_load_dwordx3 v[6:8], v[2:3], off offset:16
302 ; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off
303 ; CHECK-NEXT: s_waitcnt vmcnt(3)
304 ; CHECK-NEXT: flat_store_byte v[0:1], v9 offset:30
305 ; CHECK-NEXT: s_waitcnt vmcnt(2)
306 ; CHECK-NEXT: flat_store_short v[0:1], v10 offset:28
307 ; CHECK-NEXT: s_waitcnt vmcnt(1)
308 ; CHECK-NEXT: flat_store_dwordx3 v[0:1], v[6:8] offset:16
309 ; CHECK-NEXT: s_waitcnt vmcnt(0)
310 ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5]
311 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
312 ; CHECK-NEXT: s_setpc_b64 s[30:31]
314 tail call void @llvm.memcpy.p0.p1.i64(ptr addrspace(0) noundef nonnull align 2 %dst, ptr addrspace(1) noundef nonnull align 2 %src, i64 31, i1 false)
318 define void @memcpy_p0_p1_sz32_align_2_2(ptr addrspace(0) align 2 %dst, ptr addrspace(1) align 2 readonly %src) {
319 ; CHECK-LABEL: memcpy_p0_p1_sz32_align_2_2:
320 ; CHECK: ; %bb.0: ; %entry
321 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
322 ; CHECK-NEXT: s_clause 0x1
323 ; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off offset:16
324 ; CHECK-NEXT: global_load_dwordx4 v[8:11], v[2:3], off
325 ; CHECK-NEXT: s_waitcnt vmcnt(1)
326 ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[4:7] offset:16
327 ; CHECK-NEXT: s_waitcnt vmcnt(0)
328 ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[8:11]
329 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
330 ; CHECK-NEXT: s_setpc_b64 s[30:31]
332 tail call void @llvm.memcpy.p0.p1.i64(ptr addrspace(0) noundef nonnull align 2 %dst, ptr addrspace(1) noundef nonnull align 2 %src, i64 32, i1 false)
336 define void @memcpy_p0_p1_sz16_align_8_8(ptr addrspace(0) align 8 %dst, ptr addrspace(1) align 8 readonly %src) {
337 ; CHECK-LABEL: memcpy_p0_p1_sz16_align_8_8:
338 ; CHECK: ; %bb.0: ; %entry
339 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
340 ; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off
341 ; CHECK-NEXT: s_waitcnt vmcnt(0)
342 ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5]
343 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
344 ; CHECK-NEXT: s_setpc_b64 s[30:31]
346 tail call void @llvm.memcpy.p0.p1.i64(ptr addrspace(0) noundef nonnull align 8 %dst, ptr addrspace(1) noundef nonnull align 8 %src, i64 16, i1 false)
350 define void @memcpy_p0_p1_sz31_align_8_8(ptr addrspace(0) align 8 %dst, ptr addrspace(1) align 8 readonly %src) {
351 ; CHECK-LABEL: memcpy_p0_p1_sz31_align_8_8:
352 ; CHECK: ; %bb.0: ; %entry
353 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
354 ; CHECK-NEXT: s_clause 0x1
355 ; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off offset:15
356 ; CHECK-NEXT: global_load_dwordx4 v[8:11], v[2:3], off
357 ; CHECK-NEXT: s_waitcnt vmcnt(1)
358 ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[4:7] offset:15
359 ; CHECK-NEXT: s_waitcnt vmcnt(0)
360 ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[8:11]
361 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
362 ; CHECK-NEXT: s_setpc_b64 s[30:31]
364 tail call void @llvm.memcpy.p0.p1.i64(ptr addrspace(0) noundef nonnull align 8 %dst, ptr addrspace(1) noundef nonnull align 8 %src, i64 31, i1 false)
368 define void @memcpy_p0_p1_sz32_align_8_8(ptr addrspace(0) align 8 %dst, ptr addrspace(1) align 8 readonly %src) {
369 ; CHECK-LABEL: memcpy_p0_p1_sz32_align_8_8:
370 ; CHECK: ; %bb.0: ; %entry
371 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
372 ; CHECK-NEXT: s_clause 0x1
373 ; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off offset:16
374 ; CHECK-NEXT: global_load_dwordx4 v[8:11], v[2:3], off
375 ; CHECK-NEXT: s_waitcnt vmcnt(1)
376 ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[4:7] offset:16
377 ; CHECK-NEXT: s_waitcnt vmcnt(0)
378 ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[8:11]
379 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
380 ; CHECK-NEXT: s_setpc_b64 s[30:31]
382 tail call void @llvm.memcpy.p0.p1.i64(ptr addrspace(0) noundef nonnull align 8 %dst, ptr addrspace(1) noundef nonnull align 8 %src, i64 32, i1 false)
386 define void @memcpy_p0_p1_sz16_align_16_16(ptr addrspace(0) align 16 %dst, ptr addrspace(1) align 16 readonly %src) {
387 ; CHECK-LABEL: memcpy_p0_p1_sz16_align_16_16:
388 ; CHECK: ; %bb.0: ; %entry
389 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
390 ; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off
391 ; CHECK-NEXT: s_waitcnt vmcnt(0)
392 ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5]
393 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
394 ; CHECK-NEXT: s_setpc_b64 s[30:31]
396 tail call void @llvm.memcpy.p0.p1.i64(ptr addrspace(0) noundef nonnull align 16 %dst, ptr addrspace(1) noundef nonnull align 16 %src, i64 16, i1 false)
400 define void @memcpy_p0_p1_sz31_align_16_16(ptr addrspace(0) align 16 %dst, ptr addrspace(1) align 16 readonly %src) {
401 ; CHECK-LABEL: memcpy_p0_p1_sz31_align_16_16:
402 ; CHECK: ; %bb.0: ; %entry
403 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
404 ; CHECK-NEXT: s_clause 0x1
405 ; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off offset:15
406 ; CHECK-NEXT: global_load_dwordx4 v[8:11], v[2:3], off
407 ; CHECK-NEXT: s_waitcnt vmcnt(1)
408 ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[4:7] offset:15
409 ; CHECK-NEXT: s_waitcnt vmcnt(0)
410 ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[8:11]
411 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
412 ; CHECK-NEXT: s_setpc_b64 s[30:31]
414 tail call void @llvm.memcpy.p0.p1.i64(ptr addrspace(0) noundef nonnull align 16 %dst, ptr addrspace(1) noundef nonnull align 16 %src, i64 31, i1 false)
418 define void @memcpy_p0_p1_sz32_align_16_16(ptr addrspace(0) align 16 %dst, ptr addrspace(1) align 16 readonly %src) {
419 ; CHECK-LABEL: memcpy_p0_p1_sz32_align_16_16:
420 ; CHECK: ; %bb.0: ; %entry
421 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
422 ; CHECK-NEXT: s_clause 0x1
423 ; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off offset:16
424 ; CHECK-NEXT: global_load_dwordx4 v[8:11], v[2:3], off
425 ; CHECK-NEXT: s_waitcnt vmcnt(1)
426 ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[4:7] offset:16
427 ; CHECK-NEXT: s_waitcnt vmcnt(0)
428 ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[8:11]
429 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
430 ; CHECK-NEXT: s_setpc_b64 s[30:31]
432 tail call void @llvm.memcpy.p0.p1.i64(ptr addrspace(0) noundef nonnull align 16 %dst, ptr addrspace(1) noundef nonnull align 16 %src, i64 32, i1 false)
436 define void @memcpy_p0_p3_sz16_align_1_1(ptr addrspace(0) align 1 %dst, ptr addrspace(3) align 1 readonly %src) {
437 ; CHECK-LABEL: memcpy_p0_p3_sz16_align_1_1:
438 ; CHECK: ; %bb.0: ; %entry
439 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
440 ; CHECK-NEXT: ds_read2_b64 v[2:5], v2 offset1:1
441 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
442 ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5]
443 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
444 ; CHECK-NEXT: s_setpc_b64 s[30:31]
446 tail call void @llvm.memcpy.p0.p3.i64(ptr addrspace(0) noundef nonnull align 1 %dst, ptr addrspace(3) noundef nonnull align 1 %src, i64 16, i1 false)
450 define void @memcpy_p0_p3_sz31_align_1_1(ptr addrspace(0) align 1 %dst, ptr addrspace(3) align 1 readonly %src) {
451 ; CHECK-LABEL: memcpy_p0_p3_sz31_align_1_1:
452 ; CHECK: ; %bb.0: ; %entry
453 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
454 ; CHECK-NEXT: ds_read_u8 v9, v2 offset:30
455 ; CHECK-NEXT: ds_read_b32 v8, v2 offset:24
456 ; CHECK-NEXT: ds_read_u16 v10, v2 offset:28
457 ; CHECK-NEXT: ds_read_b64 v[6:7], v2 offset:16
458 ; CHECK-NEXT: ds_read2_b64 v[2:5], v2 offset1:1
459 ; CHECK-NEXT: s_waitcnt lgkmcnt(4)
460 ; CHECK-NEXT: flat_store_byte v[0:1], v9 offset:30
461 ; CHECK-NEXT: s_waitcnt lgkmcnt(3)
462 ; CHECK-NEXT: flat_store_short v[0:1], v10 offset:28
463 ; CHECK-NEXT: s_waitcnt lgkmcnt(3)
464 ; CHECK-NEXT: flat_store_dwordx3 v[0:1], v[6:8] offset:16
465 ; CHECK-NEXT: s_waitcnt lgkmcnt(3)
466 ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5]
467 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
468 ; CHECK-NEXT: s_setpc_b64 s[30:31]
470 tail call void @llvm.memcpy.p0.p3.i64(ptr addrspace(0) noundef nonnull align 1 %dst, ptr addrspace(3) noundef nonnull align 1 %src, i64 31, i1 false)
474 define void @memcpy_p0_p3_sz32_align_1_1(ptr addrspace(0) align 1 %dst, ptr addrspace(3) align 1 readonly %src) {
475 ; CHECK-LABEL: memcpy_p0_p3_sz32_align_1_1:
476 ; CHECK: ; %bb.0: ; %entry
477 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
478 ; CHECK-NEXT: ds_read2_b64 v[3:6], v2 offset0:2 offset1:3
479 ; CHECK-NEXT: ds_read2_b64 v[7:10], v2 offset1:1
480 ; CHECK-NEXT: s_waitcnt lgkmcnt(1)
481 ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[3:6] offset:16
482 ; CHECK-NEXT: s_waitcnt lgkmcnt(1)
483 ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[7:10]
484 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
485 ; CHECK-NEXT: s_setpc_b64 s[30:31]
487 tail call void @llvm.memcpy.p0.p3.i64(ptr addrspace(0) noundef nonnull align 1 %dst, ptr addrspace(3) noundef nonnull align 1 %src, i64 32, i1 false)
491 define void @memcpy_p0_p3_sz16_align_2_2(ptr addrspace(0) align 2 %dst, ptr addrspace(3) align 2 readonly %src) {
492 ; CHECK-LABEL: memcpy_p0_p3_sz16_align_2_2:
493 ; CHECK: ; %bb.0: ; %entry
494 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
495 ; CHECK-NEXT: ds_read2_b64 v[2:5], v2 offset1:1
496 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
497 ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5]
498 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
499 ; CHECK-NEXT: s_setpc_b64 s[30:31]
501 tail call void @llvm.memcpy.p0.p3.i64(ptr addrspace(0) noundef nonnull align 2 %dst, ptr addrspace(3) noundef nonnull align 2 %src, i64 16, i1 false)
505 define void @memcpy_p0_p3_sz31_align_2_2(ptr addrspace(0) align 2 %dst, ptr addrspace(3) align 2 readonly %src) {
506 ; CHECK-LABEL: memcpy_p0_p3_sz31_align_2_2:
507 ; CHECK: ; %bb.0: ; %entry
508 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
509 ; CHECK-NEXT: ds_read_u8 v9, v2 offset:30
510 ; CHECK-NEXT: ds_read_b32 v8, v2 offset:24
511 ; CHECK-NEXT: ds_read_u16 v10, v2 offset:28
512 ; CHECK-NEXT: ds_read_b64 v[6:7], v2 offset:16
513 ; CHECK-NEXT: ds_read2_b64 v[2:5], v2 offset1:1
514 ; CHECK-NEXT: s_waitcnt lgkmcnt(4)
515 ; CHECK-NEXT: flat_store_byte v[0:1], v9 offset:30
516 ; CHECK-NEXT: s_waitcnt lgkmcnt(3)
517 ; CHECK-NEXT: flat_store_short v[0:1], v10 offset:28
518 ; CHECK-NEXT: s_waitcnt lgkmcnt(3)
519 ; CHECK-NEXT: flat_store_dwordx3 v[0:1], v[6:8] offset:16
520 ; CHECK-NEXT: s_waitcnt lgkmcnt(3)
521 ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5]
522 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
523 ; CHECK-NEXT: s_setpc_b64 s[30:31]
525 tail call void @llvm.memcpy.p0.p3.i64(ptr addrspace(0) noundef nonnull align 2 %dst, ptr addrspace(3) noundef nonnull align 2 %src, i64 31, i1 false)
529 define void @memcpy_p0_p3_sz32_align_2_2(ptr addrspace(0) align 2 %dst, ptr addrspace(3) align 2 readonly %src) {
530 ; CHECK-LABEL: memcpy_p0_p3_sz32_align_2_2:
531 ; CHECK: ; %bb.0: ; %entry
532 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
533 ; CHECK-NEXT: ds_read2_b64 v[3:6], v2 offset0:2 offset1:3
534 ; CHECK-NEXT: ds_read2_b64 v[7:10], v2 offset1:1
535 ; CHECK-NEXT: s_waitcnt lgkmcnt(1)
536 ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[3:6] offset:16
537 ; CHECK-NEXT: s_waitcnt lgkmcnt(1)
538 ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[7:10]
539 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
540 ; CHECK-NEXT: s_setpc_b64 s[30:31]
542 tail call void @llvm.memcpy.p0.p3.i64(ptr addrspace(0) noundef nonnull align 2 %dst, ptr addrspace(3) noundef nonnull align 2 %src, i64 32, i1 false)
546 define void @memcpy_p0_p3_sz16_align_8_8(ptr addrspace(0) align 8 %dst, ptr addrspace(3) align 8 readonly %src) {
547 ; CHECK-LABEL: memcpy_p0_p3_sz16_align_8_8:
548 ; CHECK: ; %bb.0: ; %entry
549 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
550 ; CHECK-NEXT: ds_read2_b64 v[2:5], v2 offset1:1
551 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
552 ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5]
553 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
554 ; CHECK-NEXT: s_setpc_b64 s[30:31]
556 tail call void @llvm.memcpy.p0.p3.i64(ptr addrspace(0) noundef nonnull align 8 %dst, ptr addrspace(3) noundef nonnull align 8 %src, i64 16, i1 false)
560 define void @memcpy_p0_p3_sz31_align_8_8(ptr addrspace(0) align 8 %dst, ptr addrspace(3) align 8 readonly %src) {
561 ; CHECK-LABEL: memcpy_p0_p3_sz31_align_8_8:
562 ; CHECK: ; %bb.0: ; %entry
563 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
564 ; CHECK-NEXT: ds_read_b128 v[3:6], v2 offset:15
565 ; CHECK-NEXT: ds_read2_b64 v[7:10], v2 offset1:1
566 ; CHECK-NEXT: s_waitcnt lgkmcnt(1)
567 ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[3:6] offset:15
568 ; CHECK-NEXT: s_waitcnt lgkmcnt(1)
569 ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[7:10]
570 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
571 ; CHECK-NEXT: s_setpc_b64 s[30:31]
573 tail call void @llvm.memcpy.p0.p3.i64(ptr addrspace(0) noundef nonnull align 8 %dst, ptr addrspace(3) noundef nonnull align 8 %src, i64 31, i1 false)
577 define void @memcpy_p0_p3_sz32_align_8_8(ptr addrspace(0) align 8 %dst, ptr addrspace(3) align 8 readonly %src) {
578 ; CHECK-LABEL: memcpy_p0_p3_sz32_align_8_8:
579 ; CHECK: ; %bb.0: ; %entry
580 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
581 ; CHECK-NEXT: ds_read2_b64 v[3:6], v2 offset0:2 offset1:3
582 ; CHECK-NEXT: ds_read2_b64 v[7:10], v2 offset1:1
583 ; CHECK-NEXT: s_waitcnt lgkmcnt(1)
584 ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[3:6] offset:16
585 ; CHECK-NEXT: s_waitcnt lgkmcnt(1)
586 ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[7:10]
587 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
588 ; CHECK-NEXT: s_setpc_b64 s[30:31]
590 tail call void @llvm.memcpy.p0.p3.i64(ptr addrspace(0) noundef nonnull align 8 %dst, ptr addrspace(3) noundef nonnull align 8 %src, i64 32, i1 false)
594 define void @memcpy_p0_p3_sz16_align_16_16(ptr addrspace(0) align 16 %dst, ptr addrspace(3) align 16 readonly %src) {
595 ; CHECK-LABEL: memcpy_p0_p3_sz16_align_16_16:
596 ; CHECK: ; %bb.0: ; %entry
597 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
598 ; CHECK-NEXT: ds_read_b128 v[2:5], v2
599 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
600 ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5]
601 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
602 ; CHECK-NEXT: s_setpc_b64 s[30:31]
604 tail call void @llvm.memcpy.p0.p3.i64(ptr addrspace(0) noundef nonnull align 16 %dst, ptr addrspace(3) noundef nonnull align 16 %src, i64 16, i1 false)
608 define void @memcpy_p0_p3_sz31_align_16_16(ptr addrspace(0) align 16 %dst, ptr addrspace(3) align 16 readonly %src) {
609 ; CHECK-LABEL: memcpy_p0_p3_sz31_align_16_16:
610 ; CHECK: ; %bb.0: ; %entry
611 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
612 ; CHECK-NEXT: ds_read_b128 v[3:6], v2 offset:15
613 ; CHECK-NEXT: ds_read_b128 v[7:10], v2
614 ; CHECK-NEXT: s_waitcnt lgkmcnt(1)
615 ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[3:6] offset:15
616 ; CHECK-NEXT: s_waitcnt lgkmcnt(1)
617 ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[7:10]
618 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
619 ; CHECK-NEXT: s_setpc_b64 s[30:31]
621 tail call void @llvm.memcpy.p0.p3.i64(ptr addrspace(0) noundef nonnull align 16 %dst, ptr addrspace(3) noundef nonnull align 16 %src, i64 31, i1 false)
625 define void @memcpy_p0_p3_sz32_align_16_16(ptr addrspace(0) align 16 %dst, ptr addrspace(3) align 16 readonly %src) {
626 ; CHECK-LABEL: memcpy_p0_p3_sz32_align_16_16:
627 ; CHECK: ; %bb.0: ; %entry
628 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
629 ; CHECK-NEXT: ds_read_b128 v[3:6], v2 offset:16
630 ; CHECK-NEXT: ds_read_b128 v[7:10], v2
631 ; CHECK-NEXT: s_waitcnt lgkmcnt(1)
632 ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[3:6] offset:16
633 ; CHECK-NEXT: s_waitcnt lgkmcnt(1)
634 ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[7:10]
635 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
636 ; CHECK-NEXT: s_setpc_b64 s[30:31]
638 tail call void @llvm.memcpy.p0.p3.i64(ptr addrspace(0) noundef nonnull align 16 %dst, ptr addrspace(3) noundef nonnull align 16 %src, i64 32, i1 false)
642 define void @memcpy_p0_p4_sz16_align_1_1(ptr addrspace(0) align 1 %dst, ptr addrspace(4) align 1 readonly %src) {
643 ; CHECK-LABEL: memcpy_p0_p4_sz16_align_1_1:
644 ; CHECK: ; %bb.0: ; %entry
645 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
646 ; CHECK-NEXT: global_load_dwordx2 v[4:5], v[2:3], off
647 ; CHECK-NEXT: s_waitcnt vmcnt(0)
648 ; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[4:5]
649 ; CHECK-NEXT: global_load_dwordx2 v[2:3], v[2:3], off offset:8
650 ; CHECK-NEXT: s_waitcnt vmcnt(0)
651 ; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[2:3] offset:8
652 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
653 ; CHECK-NEXT: s_setpc_b64 s[30:31]
655 tail call void @llvm.memcpy.p0.p4.i64(ptr addrspace(0) noundef nonnull align 1 %dst, ptr addrspace(4) noundef nonnull align 1 %src, i64 16, i1 false)
659 define void @memcpy_p0_p4_sz31_align_1_1(ptr addrspace(0) align 1 %dst, ptr addrspace(4) align 1 readonly %src) {
660 ; CHECK-LABEL: memcpy_p0_p4_sz31_align_1_1:
661 ; CHECK: ; %bb.0: ; %entry
662 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
663 ; CHECK-NEXT: global_load_dwordx2 v[4:5], v[2:3], off
664 ; CHECK-NEXT: s_waitcnt vmcnt(0)
665 ; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[4:5]
666 ; CHECK-NEXT: global_load_dwordx2 v[4:5], v[2:3], off offset:8
667 ; CHECK-NEXT: s_waitcnt vmcnt(0)
668 ; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[4:5] offset:8
669 ; CHECK-NEXT: global_load_dwordx2 v[4:5], v[2:3], off offset:16
670 ; CHECK-NEXT: s_waitcnt vmcnt(0)
671 ; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[4:5] offset:16
672 ; CHECK-NEXT: global_load_dword v4, v[2:3], off offset:24
673 ; CHECK-NEXT: s_waitcnt vmcnt(0)
674 ; CHECK-NEXT: flat_store_dword v[0:1], v4 offset:24
675 ; CHECK-NEXT: global_load_ushort v4, v[2:3], off offset:28
676 ; CHECK-NEXT: s_waitcnt vmcnt(0)
677 ; CHECK-NEXT: flat_store_short v[0:1], v4 offset:28
678 ; CHECK-NEXT: global_load_ubyte v2, v[2:3], off offset:30
679 ; CHECK-NEXT: s_waitcnt vmcnt(0)
680 ; CHECK-NEXT: flat_store_byte v[0:1], v2 offset:30
681 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
682 ; CHECK-NEXT: s_setpc_b64 s[30:31]
684 tail call void @llvm.memcpy.p0.p4.i64(ptr addrspace(0) noundef nonnull align 1 %dst, ptr addrspace(4) noundef nonnull align 1 %src, i64 31, i1 false)
688 define void @memcpy_p0_p4_sz32_align_1_1(ptr addrspace(0) align 1 %dst, ptr addrspace(4) align 1 readonly %src) {
689 ; CHECK-LABEL: memcpy_p0_p4_sz32_align_1_1:
690 ; CHECK: ; %bb.0: ; %entry
691 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
692 ; CHECK-NEXT: global_load_dwordx2 v[4:5], v[2:3], off
693 ; CHECK-NEXT: s_waitcnt vmcnt(0)
694 ; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[4:5]
695 ; CHECK-NEXT: global_load_dwordx2 v[4:5], v[2:3], off offset:8
696 ; CHECK-NEXT: s_waitcnt vmcnt(0)
697 ; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[4:5] offset:8
698 ; CHECK-NEXT: global_load_dwordx2 v[4:5], v[2:3], off offset:16
699 ; CHECK-NEXT: s_waitcnt vmcnt(0)
700 ; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[4:5] offset:16
701 ; CHECK-NEXT: global_load_dwordx2 v[2:3], v[2:3], off offset:24
702 ; CHECK-NEXT: s_waitcnt vmcnt(0)
703 ; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[2:3] offset:24
704 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
705 ; CHECK-NEXT: s_setpc_b64 s[30:31]
707 tail call void @llvm.memcpy.p0.p4.i64(ptr addrspace(0) noundef nonnull align 1 %dst, ptr addrspace(4) noundef nonnull align 1 %src, i64 32, i1 false)
711 define void @memcpy_p0_p4_sz16_align_2_2(ptr addrspace(0) align 2 %dst, ptr addrspace(4) align 2 readonly %src) {
712 ; CHECK-LABEL: memcpy_p0_p4_sz16_align_2_2:
713 ; CHECK: ; %bb.0: ; %entry
714 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
715 ; CHECK-NEXT: global_load_dwordx2 v[4:5], v[2:3], off
716 ; CHECK-NEXT: s_waitcnt vmcnt(0)
717 ; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[4:5]
718 ; CHECK-NEXT: global_load_dwordx2 v[2:3], v[2:3], off offset:8
719 ; CHECK-NEXT: s_waitcnt vmcnt(0)
720 ; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[2:3] offset:8
721 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
722 ; CHECK-NEXT: s_setpc_b64 s[30:31]
724 tail call void @llvm.memcpy.p0.p4.i64(ptr addrspace(0) noundef nonnull align 2 %dst, ptr addrspace(4) noundef nonnull align 2 %src, i64 16, i1 false)
728 define void @memcpy_p0_p4_sz31_align_2_2(ptr addrspace(0) align 2 %dst, ptr addrspace(4) align 2 readonly %src) {
729 ; CHECK-LABEL: memcpy_p0_p4_sz31_align_2_2:
730 ; CHECK: ; %bb.0: ; %entry
731 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
732 ; CHECK-NEXT: global_load_dwordx2 v[4:5], v[2:3], off
733 ; CHECK-NEXT: s_waitcnt vmcnt(0)
734 ; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[4:5]
735 ; CHECK-NEXT: global_load_dwordx2 v[4:5], v[2:3], off offset:8
736 ; CHECK-NEXT: s_waitcnt vmcnt(0)
737 ; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[4:5] offset:8
738 ; CHECK-NEXT: global_load_dwordx2 v[4:5], v[2:3], off offset:16
739 ; CHECK-NEXT: s_waitcnt vmcnt(0)
740 ; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[4:5] offset:16
741 ; CHECK-NEXT: global_load_dword v4, v[2:3], off offset:24
742 ; CHECK-NEXT: s_waitcnt vmcnt(0)
743 ; CHECK-NEXT: flat_store_dword v[0:1], v4 offset:24
744 ; CHECK-NEXT: global_load_ushort v4, v[2:3], off offset:28
745 ; CHECK-NEXT: s_waitcnt vmcnt(0)
746 ; CHECK-NEXT: flat_store_short v[0:1], v4 offset:28
747 ; CHECK-NEXT: global_load_ubyte v2, v[2:3], off offset:30
748 ; CHECK-NEXT: s_waitcnt vmcnt(0)
749 ; CHECK-NEXT: flat_store_byte v[0:1], v2 offset:30
750 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
751 ; CHECK-NEXT: s_setpc_b64 s[30:31]
753 tail call void @llvm.memcpy.p0.p4.i64(ptr addrspace(0) noundef nonnull align 2 %dst, ptr addrspace(4) noundef nonnull align 2 %src, i64 31, i1 false)
757 define void @memcpy_p0_p4_sz32_align_2_2(ptr addrspace(0) align 2 %dst, ptr addrspace(4) align 2 readonly %src) {
758 ; CHECK-LABEL: memcpy_p0_p4_sz32_align_2_2:
759 ; CHECK: ; %bb.0: ; %entry
760 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
761 ; CHECK-NEXT: global_load_dwordx2 v[4:5], v[2:3], off
762 ; CHECK-NEXT: s_waitcnt vmcnt(0)
763 ; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[4:5]
764 ; CHECK-NEXT: global_load_dwordx2 v[4:5], v[2:3], off offset:8
765 ; CHECK-NEXT: s_waitcnt vmcnt(0)
766 ; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[4:5] offset:8
767 ; CHECK-NEXT: global_load_dwordx2 v[4:5], v[2:3], off offset:16
768 ; CHECK-NEXT: s_waitcnt vmcnt(0)
769 ; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[4:5] offset:16
770 ; CHECK-NEXT: global_load_dwordx2 v[2:3], v[2:3], off offset:24
771 ; CHECK-NEXT: s_waitcnt vmcnt(0)
772 ; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[2:3] offset:24
773 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
774 ; CHECK-NEXT: s_setpc_b64 s[30:31]
776 tail call void @llvm.memcpy.p0.p4.i64(ptr addrspace(0) noundef nonnull align 2 %dst, ptr addrspace(4) noundef nonnull align 2 %src, i64 32, i1 false)
780 define void @memcpy_p0_p4_sz16_align_8_8(ptr addrspace(0) align 8 %dst, ptr addrspace(4) align 8 readonly %src) {
781 ; CHECK-LABEL: memcpy_p0_p4_sz16_align_8_8:
782 ; CHECK: ; %bb.0: ; %entry
783 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
784 ; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off
785 ; CHECK-NEXT: s_waitcnt vmcnt(0)
786 ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5]
787 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
788 ; CHECK-NEXT: s_setpc_b64 s[30:31]
790 tail call void @llvm.memcpy.p0.p4.i64(ptr addrspace(0) noundef nonnull align 8 %dst, ptr addrspace(4) noundef nonnull align 8 %src, i64 16, i1 false)
794 define void @memcpy_p0_p4_sz31_align_8_8(ptr addrspace(0) align 8 %dst, ptr addrspace(4) align 8 readonly %src) {
795 ; CHECK-LABEL: memcpy_p0_p4_sz31_align_8_8:
796 ; CHECK: ; %bb.0: ; %entry
797 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
798 ; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off
799 ; CHECK-NEXT: s_waitcnt vmcnt(0)
800 ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[4:7]
801 ; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off offset:15
802 ; CHECK-NEXT: s_waitcnt vmcnt(0)
803 ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5] offset:15
804 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
805 ; CHECK-NEXT: s_setpc_b64 s[30:31]
807 tail call void @llvm.memcpy.p0.p4.i64(ptr addrspace(0) noundef nonnull align 8 %dst, ptr addrspace(4) noundef nonnull align 8 %src, i64 31, i1 false)
811 define void @memcpy_p0_p4_sz32_align_8_8(ptr addrspace(0) align 8 %dst, ptr addrspace(4) align 8 readonly %src) {
812 ; CHECK-LABEL: memcpy_p0_p4_sz32_align_8_8:
813 ; CHECK: ; %bb.0: ; %entry
814 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
815 ; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off
816 ; CHECK-NEXT: s_waitcnt vmcnt(0)
817 ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[4:7]
818 ; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off offset:16
819 ; CHECK-NEXT: s_waitcnt vmcnt(0)
820 ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5] offset:16
821 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
822 ; CHECK-NEXT: s_setpc_b64 s[30:31]
824 tail call void @llvm.memcpy.p0.p4.i64(ptr addrspace(0) noundef nonnull align 8 %dst, ptr addrspace(4) noundef nonnull align 8 %src, i64 32, i1 false)
828 define void @memcpy_p0_p4_sz16_align_16_16(ptr addrspace(0) align 16 %dst, ptr addrspace(4) align 16 readonly %src) {
829 ; CHECK-LABEL: memcpy_p0_p4_sz16_align_16_16:
830 ; CHECK: ; %bb.0: ; %entry
831 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
832 ; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off
833 ; CHECK-NEXT: s_waitcnt vmcnt(0)
834 ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5]
835 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
836 ; CHECK-NEXT: s_setpc_b64 s[30:31]
838 tail call void @llvm.memcpy.p0.p4.i64(ptr addrspace(0) noundef nonnull align 16 %dst, ptr addrspace(4) noundef nonnull align 16 %src, i64 16, i1 false)
842 define void @memcpy_p0_p4_sz31_align_16_16(ptr addrspace(0) align 16 %dst, ptr addrspace(4) align 16 readonly %src) {
843 ; CHECK-LABEL: memcpy_p0_p4_sz31_align_16_16:
844 ; CHECK: ; %bb.0: ; %entry
845 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
846 ; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off
847 ; CHECK-NEXT: s_waitcnt vmcnt(0)
848 ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[4:7]
849 ; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off offset:15
850 ; CHECK-NEXT: s_waitcnt vmcnt(0)
851 ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5] offset:15
852 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
853 ; CHECK-NEXT: s_setpc_b64 s[30:31]
855 tail call void @llvm.memcpy.p0.p4.i64(ptr addrspace(0) noundef nonnull align 16 %dst, ptr addrspace(4) noundef nonnull align 16 %src, i64 31, i1 false)
859 define void @memcpy_p0_p4_sz32_align_16_16(ptr addrspace(0) align 16 %dst, ptr addrspace(4) align 16 readonly %src) {
860 ; CHECK-LABEL: memcpy_p0_p4_sz32_align_16_16:
861 ; CHECK: ; %bb.0: ; %entry
862 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
863 ; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off
864 ; CHECK-NEXT: s_waitcnt vmcnt(0)
865 ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[4:7]
866 ; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off offset:16
867 ; CHECK-NEXT: s_waitcnt vmcnt(0)
868 ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5] offset:16
869 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
870 ; CHECK-NEXT: s_setpc_b64 s[30:31]
872 tail call void @llvm.memcpy.p0.p4.i64(ptr addrspace(0) noundef nonnull align 16 %dst, ptr addrspace(4) noundef nonnull align 16 %src, i64 32, i1 false)
876 define void @memcpy_p0_p5_sz16_align_1_1(ptr addrspace(0) align 1 %dst, ptr addrspace(5) align 1 readonly %src) {
877 ; CHECK-LABEL: memcpy_p0_p5_sz16_align_1_1:
878 ; CHECK: ; %bb.0: ; %entry
879 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
880 ; CHECK-NEXT: s_clause 0x3
881 ; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen
882 ; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:4
883 ; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:8
884 ; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:12
885 ; CHECK-NEXT: s_waitcnt vmcnt(0)
886 ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[3:6]
887 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
888 ; CHECK-NEXT: s_setpc_b64 s[30:31]
890 tail call void @llvm.memcpy.p0.p5.i64(ptr addrspace(0) noundef nonnull align 1 %dst, ptr addrspace(5) noundef nonnull align 1 %src, i64 16, i1 false)
894 define void @memcpy_p0_p5_sz31_align_1_1(ptr addrspace(0) align 1 %dst, ptr addrspace(5) align 1 readonly %src) {
895 ; CHECK-LABEL: memcpy_p0_p5_sz31_align_1_1:
896 ; CHECK: ; %bb.0: ; %entry
897 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
898 ; CHECK-NEXT: s_clause 0x8
899 ; CHECK-NEXT: buffer_load_dword v7, v2, s[0:3], 0 offen offset:16
900 ; CHECK-NEXT: buffer_load_dword v8, v2, s[0:3], 0 offen offset:20
901 ; CHECK-NEXT: buffer_load_dword v9, v2, s[0:3], 0 offen offset:24
902 ; CHECK-NEXT: buffer_load_ushort v10, v2, s[0:3], 0 offen offset:28
903 ; CHECK-NEXT: buffer_load_ubyte v11, v2, s[0:3], 0 offen offset:30
904 ; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen
905 ; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:4
906 ; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:8
907 ; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:12
908 ; CHECK-NEXT: s_waitcnt vmcnt(5)
909 ; CHECK-NEXT: flat_store_short v[0:1], v10 offset:28
910 ; CHECK-NEXT: s_waitcnt vmcnt(4)
911 ; CHECK-NEXT: flat_store_byte v[0:1], v11 offset:30
912 ; CHECK-NEXT: flat_store_dwordx3 v[0:1], v[7:9] offset:16
913 ; CHECK-NEXT: s_waitcnt vmcnt(0)
914 ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[3:6]
915 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
916 ; CHECK-NEXT: s_setpc_b64 s[30:31]
918 tail call void @llvm.memcpy.p0.p5.i64(ptr addrspace(0) noundef nonnull align 1 %dst, ptr addrspace(5) noundef nonnull align 1 %src, i64 31, i1 false)
922 define void @memcpy_p0_p5_sz32_align_1_1(ptr addrspace(0) align 1 %dst, ptr addrspace(5) align 1 readonly %src) {
923 ; CHECK-LABEL: memcpy_p0_p5_sz32_align_1_1:
924 ; CHECK: ; %bb.0: ; %entry
925 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
926 ; CHECK-NEXT: s_clause 0x7
927 ; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen offset:16
928 ; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:20
929 ; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:24
930 ; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:28
931 ; CHECK-NEXT: buffer_load_dword v7, v2, s[0:3], 0 offen
932 ; CHECK-NEXT: buffer_load_dword v8, v2, s[0:3], 0 offen offset:4
933 ; CHECK-NEXT: buffer_load_dword v9, v2, s[0:3], 0 offen offset:8
934 ; CHECK-NEXT: buffer_load_dword v10, v2, s[0:3], 0 offen offset:12
935 ; CHECK-NEXT: s_waitcnt vmcnt(4)
936 ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[3:6] offset:16
937 ; CHECK-NEXT: s_waitcnt vmcnt(0)
938 ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[7:10]
939 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
940 ; CHECK-NEXT: s_setpc_b64 s[30:31]
942 tail call void @llvm.memcpy.p0.p5.i64(ptr addrspace(0) noundef nonnull align 1 %dst, ptr addrspace(5) noundef nonnull align 1 %src, i64 32, i1 false)
946 define void @memcpy_p0_p5_sz16_align_2_2(ptr addrspace(0) align 2 %dst, ptr addrspace(5) align 2 readonly %src) {
947 ; CHECK-LABEL: memcpy_p0_p5_sz16_align_2_2:
948 ; CHECK: ; %bb.0: ; %entry
949 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
950 ; CHECK-NEXT: s_clause 0x3
951 ; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen
952 ; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:4
953 ; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:8
954 ; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:12
955 ; CHECK-NEXT: s_waitcnt vmcnt(0)
956 ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[3:6]
957 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
958 ; CHECK-NEXT: s_setpc_b64 s[30:31]
960 tail call void @llvm.memcpy.p0.p5.i64(ptr addrspace(0) noundef nonnull align 2 %dst, ptr addrspace(5) noundef nonnull align 2 %src, i64 16, i1 false)
964 define void @memcpy_p0_p5_sz31_align_2_2(ptr addrspace(0) align 2 %dst, ptr addrspace(5) align 2 readonly %src) {
965 ; CHECK-LABEL: memcpy_p0_p5_sz31_align_2_2:
966 ; CHECK: ; %bb.0: ; %entry
967 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
968 ; CHECK-NEXT: s_clause 0x8
969 ; CHECK-NEXT: buffer_load_dword v7, v2, s[0:3], 0 offen offset:16
970 ; CHECK-NEXT: buffer_load_dword v8, v2, s[0:3], 0 offen offset:20
971 ; CHECK-NEXT: buffer_load_dword v9, v2, s[0:3], 0 offen offset:24
972 ; CHECK-NEXT: buffer_load_ushort v10, v2, s[0:3], 0 offen offset:28
973 ; CHECK-NEXT: buffer_load_ubyte v11, v2, s[0:3], 0 offen offset:30
974 ; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen
975 ; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:4
976 ; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:8
977 ; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:12
978 ; CHECK-NEXT: s_waitcnt vmcnt(5)
979 ; CHECK-NEXT: flat_store_short v[0:1], v10 offset:28
980 ; CHECK-NEXT: s_waitcnt vmcnt(4)
981 ; CHECK-NEXT: flat_store_byte v[0:1], v11 offset:30
982 ; CHECK-NEXT: flat_store_dwordx3 v[0:1], v[7:9] offset:16
983 ; CHECK-NEXT: s_waitcnt vmcnt(0)
984 ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[3:6]
985 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
986 ; CHECK-NEXT: s_setpc_b64 s[30:31]
988 tail call void @llvm.memcpy.p0.p5.i64(ptr addrspace(0) noundef nonnull align 2 %dst, ptr addrspace(5) noundef nonnull align 2 %src, i64 31, i1 false)
992 define void @memcpy_p0_p5_sz32_align_2_2(ptr addrspace(0) align 2 %dst, ptr addrspace(5) align 2 readonly %src) {
993 ; CHECK-LABEL: memcpy_p0_p5_sz32_align_2_2:
994 ; CHECK: ; %bb.0: ; %entry
995 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
996 ; CHECK-NEXT: s_clause 0x7
997 ; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen offset:16
998 ; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:20
999 ; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:24
1000 ; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:28
1001 ; CHECK-NEXT: buffer_load_dword v7, v2, s[0:3], 0 offen
1002 ; CHECK-NEXT: buffer_load_dword v8, v2, s[0:3], 0 offen offset:4
1003 ; CHECK-NEXT: buffer_load_dword v9, v2, s[0:3], 0 offen offset:8
1004 ; CHECK-NEXT: buffer_load_dword v10, v2, s[0:3], 0 offen offset:12
1005 ; CHECK-NEXT: s_waitcnt vmcnt(4)
1006 ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[3:6] offset:16
1007 ; CHECK-NEXT: s_waitcnt vmcnt(0)
1008 ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[7:10]
1009 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
1010 ; CHECK-NEXT: s_setpc_b64 s[30:31]
1012 tail call void @llvm.memcpy.p0.p5.i64(ptr addrspace(0) noundef nonnull align 2 %dst, ptr addrspace(5) noundef nonnull align 2 %src, i64 32, i1 false)
1016 define void @memcpy_p0_p5_sz16_align_8_8(ptr addrspace(0) align 8 %dst, ptr addrspace(5) align 8 readonly %src) {
1017 ; CHECK-LABEL: memcpy_p0_p5_sz16_align_8_8:
1018 ; CHECK: ; %bb.0: ; %entry
1019 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1020 ; CHECK-NEXT: s_clause 0x3
1021 ; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen
1022 ; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:4
1023 ; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:8
1024 ; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:12
1025 ; CHECK-NEXT: s_waitcnt vmcnt(0)
1026 ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[3:6]
1027 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
1028 ; CHECK-NEXT: s_setpc_b64 s[30:31]
1030 tail call void @llvm.memcpy.p0.p5.i64(ptr addrspace(0) noundef nonnull align 8 %dst, ptr addrspace(5) noundef nonnull align 8 %src, i64 16, i1 false)
1034 define void @memcpy_p0_p5_sz31_align_8_8(ptr addrspace(0) align 8 %dst, ptr addrspace(5) align 8 readonly %src) {
1035 ; CHECK-LABEL: memcpy_p0_p5_sz31_align_8_8:
1036 ; CHECK: ; %bb.0: ; %entry
1037 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1038 ; CHECK-NEXT: s_clause 0x7
1039 ; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen
1040 ; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:4
1041 ; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:8
1042 ; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:12
1043 ; CHECK-NEXT: buffer_load_dword v7, v2, s[0:3], 0 offen offset:15
1044 ; CHECK-NEXT: buffer_load_dword v8, v2, s[0:3], 0 offen offset:19
1045 ; CHECK-NEXT: buffer_load_dword v9, v2, s[0:3], 0 offen offset:23
1046 ; CHECK-NEXT: buffer_load_dword v10, v2, s[0:3], 0 offen offset:27
1047 ; CHECK-NEXT: s_waitcnt vmcnt(4)
1048 ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[3:6]
1049 ; CHECK-NEXT: s_waitcnt vmcnt(0)
1050 ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[7:10] offset:15
1051 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
1052 ; CHECK-NEXT: s_setpc_b64 s[30:31]
1054 tail call void @llvm.memcpy.p0.p5.i64(ptr addrspace(0) noundef nonnull align 8 %dst, ptr addrspace(5) noundef nonnull align 8 %src, i64 31, i1 false)
1058 define void @memcpy_p0_p5_sz32_align_8_8(ptr addrspace(0) align 8 %dst, ptr addrspace(5) align 8 readonly %src) {
1059 ; CHECK-LABEL: memcpy_p0_p5_sz32_align_8_8:
1060 ; CHECK: ; %bb.0: ; %entry
1061 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1062 ; CHECK-NEXT: s_clause 0x7
1063 ; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen
1064 ; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:4
1065 ; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:8
1066 ; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:12
1067 ; CHECK-NEXT: buffer_load_dword v7, v2, s[0:3], 0 offen offset:16
1068 ; CHECK-NEXT: buffer_load_dword v8, v2, s[0:3], 0 offen offset:20
1069 ; CHECK-NEXT: buffer_load_dword v9, v2, s[0:3], 0 offen offset:24
1070 ; CHECK-NEXT: buffer_load_dword v10, v2, s[0:3], 0 offen offset:28
1071 ; CHECK-NEXT: s_waitcnt vmcnt(4)
1072 ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[3:6]
1073 ; CHECK-NEXT: s_waitcnt vmcnt(0)
1074 ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[7:10] offset:16
1075 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
1076 ; CHECK-NEXT: s_setpc_b64 s[30:31]
1078 tail call void @llvm.memcpy.p0.p5.i64(ptr addrspace(0) noundef nonnull align 8 %dst, ptr addrspace(5) noundef nonnull align 8 %src, i64 32, i1 false)
1082 define void @memcpy_p0_p5_sz16_align_16_16(ptr addrspace(0) align 16 %dst, ptr addrspace(5) align 16 readonly %src) {
1083 ; CHECK-LABEL: memcpy_p0_p5_sz16_align_16_16:
1084 ; CHECK: ; %bb.0: ; %entry
1085 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1086 ; CHECK-NEXT: s_clause 0x3
1087 ; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen
1088 ; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:4
1089 ; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:8
1090 ; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:12
1091 ; CHECK-NEXT: s_waitcnt vmcnt(0)
1092 ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[3:6]
1093 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
1094 ; CHECK-NEXT: s_setpc_b64 s[30:31]
1096 tail call void @llvm.memcpy.p0.p5.i64(ptr addrspace(0) noundef nonnull align 16 %dst, ptr addrspace(5) noundef nonnull align 16 %src, i64 16, i1 false)
1100 define void @memcpy_p0_p5_sz31_align_16_16(ptr addrspace(0) align 16 %dst, ptr addrspace(5) align 16 readonly %src) {
1101 ; CHECK-LABEL: memcpy_p0_p5_sz31_align_16_16:
1102 ; CHECK: ; %bb.0: ; %entry
1103 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1104 ; CHECK-NEXT: s_clause 0x7
1105 ; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen
1106 ; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:4
1107 ; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:8
1108 ; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:12
1109 ; CHECK-NEXT: buffer_load_dword v7, v2, s[0:3], 0 offen offset:15
1110 ; CHECK-NEXT: buffer_load_dword v8, v2, s[0:3], 0 offen offset:19
1111 ; CHECK-NEXT: buffer_load_dword v9, v2, s[0:3], 0 offen offset:23
1112 ; CHECK-NEXT: buffer_load_dword v10, v2, s[0:3], 0 offen offset:27
1113 ; CHECK-NEXT: s_waitcnt vmcnt(4)
1114 ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[3:6]
1115 ; CHECK-NEXT: s_waitcnt vmcnt(0)
1116 ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[7:10] offset:15
1117 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
1118 ; CHECK-NEXT: s_setpc_b64 s[30:31]
1120 tail call void @llvm.memcpy.p0.p5.i64(ptr addrspace(0) noundef nonnull align 16 %dst, ptr addrspace(5) noundef nonnull align 16 %src, i64 31, i1 false)
1124 define void @memcpy_p0_p5_sz32_align_16_16(ptr addrspace(0) align 16 %dst, ptr addrspace(5) align 16 readonly %src) {
1125 ; CHECK-LABEL: memcpy_p0_p5_sz32_align_16_16:
1126 ; CHECK: ; %bb.0: ; %entry
1127 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1128 ; CHECK-NEXT: s_clause 0x7
1129 ; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen
1130 ; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:4
1131 ; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:8
1132 ; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:12
1133 ; CHECK-NEXT: buffer_load_dword v7, v2, s[0:3], 0 offen offset:16
1134 ; CHECK-NEXT: buffer_load_dword v8, v2, s[0:3], 0 offen offset:20
1135 ; CHECK-NEXT: buffer_load_dword v9, v2, s[0:3], 0 offen offset:24
1136 ; CHECK-NEXT: buffer_load_dword v10, v2, s[0:3], 0 offen offset:28
1137 ; CHECK-NEXT: s_waitcnt vmcnt(4)
1138 ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[3:6]
1139 ; CHECK-NEXT: s_waitcnt vmcnt(0)
1140 ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[7:10] offset:16
1141 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
1142 ; CHECK-NEXT: s_setpc_b64 s[30:31]
1144 tail call void @llvm.memcpy.p0.p5.i64(ptr addrspace(0) noundef nonnull align 16 %dst, ptr addrspace(5) noundef nonnull align 16 %src, i64 32, i1 false)
1148 define void @memcpy_p1_p0_sz16_align_1_1(ptr addrspace(1) align 1 %dst, ptr addrspace(0) align 1 readonly %src) {
1149 ; CHECK-LABEL: memcpy_p1_p0_sz16_align_1_1:
1150 ; CHECK: ; %bb.0: ; %entry
1151 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1152 ; CHECK-NEXT: flat_load_dwordx4 v[2:5], v[2:3]
1153 ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1154 ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
1155 ; CHECK-NEXT: s_setpc_b64 s[30:31]
1157 tail call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) noundef nonnull align 1 %dst, ptr addrspace(0) noundef nonnull align 1 %src, i64 16, i1 false)
1161 define void @memcpy_p1_p0_sz31_align_1_1(ptr addrspace(1) align 1 %dst, ptr addrspace(0) align 1 readonly %src) {
1162 ; CHECK-LABEL: memcpy_p1_p0_sz31_align_1_1:
1163 ; CHECK: ; %bb.0: ; %entry
1164 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1165 ; CHECK-NEXT: s_clause 0x2
1166 ; CHECK-NEXT: flat_load_dwordx2 v[6:7], v[2:3] offset:23
1167 ; CHECK-NEXT: flat_load_dwordx2 v[8:9], v[2:3] offset:16
1168 ; CHECK-NEXT: flat_load_dwordx4 v[2:5], v[2:3]
1169 ; CHECK-NEXT: s_waitcnt vmcnt(2) lgkmcnt(2)
1170 ; CHECK-NEXT: global_store_dwordx2 v[0:1], v[6:7], off offset:23
1171 ; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1)
1172 ; CHECK-NEXT: global_store_dwordx2 v[0:1], v[8:9], off offset:16
1173 ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1174 ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
1175 ; CHECK-NEXT: s_setpc_b64 s[30:31]
1177 tail call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) noundef nonnull align 1 %dst, ptr addrspace(0) noundef nonnull align 1 %src, i64 31, i1 false)
1181 define void @memcpy_p1_p0_sz32_align_1_1(ptr addrspace(1) align 1 %dst, ptr addrspace(0) align 1 readonly %src) {
1182 ; CHECK-LABEL: memcpy_p1_p0_sz32_align_1_1:
1183 ; CHECK: ; %bb.0: ; %entry
1184 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1185 ; CHECK-NEXT: s_clause 0x1
1186 ; CHECK-NEXT: flat_load_dwordx4 v[4:7], v[2:3] offset:16
1187 ; CHECK-NEXT: flat_load_dwordx4 v[8:11], v[2:3]
1188 ; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1)
1189 ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:16
1190 ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1191 ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[8:11], off
1192 ; CHECK-NEXT: s_setpc_b64 s[30:31]
1194 tail call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) noundef nonnull align 1 %dst, ptr addrspace(0) noundef nonnull align 1 %src, i64 32, i1 false)
1198 define void @memcpy_p1_p0_sz16_align_2_2(ptr addrspace(1) align 2 %dst, ptr addrspace(0) align 2 readonly %src) {
1199 ; CHECK-LABEL: memcpy_p1_p0_sz16_align_2_2:
1200 ; CHECK: ; %bb.0: ; %entry
1201 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1202 ; CHECK-NEXT: flat_load_dwordx4 v[2:5], v[2:3]
1203 ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1204 ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
1205 ; CHECK-NEXT: s_setpc_b64 s[30:31]
1207 tail call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) noundef nonnull align 2 %dst, ptr addrspace(0) noundef nonnull align 2 %src, i64 16, i1 false)
1211 define void @memcpy_p1_p0_sz31_align_2_2(ptr addrspace(1) align 2 %dst, ptr addrspace(0) align 2 readonly %src) {
1212 ; CHECK-LABEL: memcpy_p1_p0_sz31_align_2_2:
1213 ; CHECK: ; %bb.0: ; %entry
1214 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1215 ; CHECK-NEXT: s_clause 0x2
1216 ; CHECK-NEXT: flat_load_dwordx2 v[6:7], v[2:3] offset:23
1217 ; CHECK-NEXT: flat_load_dwordx2 v[8:9], v[2:3] offset:16
1218 ; CHECK-NEXT: flat_load_dwordx4 v[2:5], v[2:3]
1219 ; CHECK-NEXT: s_waitcnt vmcnt(2) lgkmcnt(2)
1220 ; CHECK-NEXT: global_store_dwordx2 v[0:1], v[6:7], off offset:23
1221 ; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1)
1222 ; CHECK-NEXT: global_store_dwordx2 v[0:1], v[8:9], off offset:16
1223 ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1224 ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
1225 ; CHECK-NEXT: s_setpc_b64 s[30:31]
1227 tail call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) noundef nonnull align 2 %dst, ptr addrspace(0) noundef nonnull align 2 %src, i64 31, i1 false)
1231 define void @memcpy_p1_p0_sz32_align_2_2(ptr addrspace(1) align 2 %dst, ptr addrspace(0) align 2 readonly %src) {
1232 ; CHECK-LABEL: memcpy_p1_p0_sz32_align_2_2:
1233 ; CHECK: ; %bb.0: ; %entry
1234 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1235 ; CHECK-NEXT: s_clause 0x1
1236 ; CHECK-NEXT: flat_load_dwordx4 v[4:7], v[2:3] offset:16
1237 ; CHECK-NEXT: flat_load_dwordx4 v[8:11], v[2:3]
1238 ; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1)
1239 ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:16
1240 ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1241 ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[8:11], off
1242 ; CHECK-NEXT: s_setpc_b64 s[30:31]
1244 tail call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) noundef nonnull align 2 %dst, ptr addrspace(0) noundef nonnull align 2 %src, i64 32, i1 false)
1248 define void @memcpy_p1_p0_sz16_align_8_8(ptr addrspace(1) align 8 %dst, ptr addrspace(0) align 8 readonly %src) {
1249 ; CHECK-LABEL: memcpy_p1_p0_sz16_align_8_8:
1250 ; CHECK: ; %bb.0: ; %entry
1251 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1252 ; CHECK-NEXT: flat_load_dwordx4 v[2:5], v[2:3]
1253 ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1254 ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
1255 ; CHECK-NEXT: s_setpc_b64 s[30:31]
1257 tail call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) noundef nonnull align 8 %dst, ptr addrspace(0) noundef nonnull align 8 %src, i64 16, i1 false)
1261 define void @memcpy_p1_p0_sz31_align_8_8(ptr addrspace(1) align 8 %dst, ptr addrspace(0) align 8 readonly %src) {
1262 ; CHECK-LABEL: memcpy_p1_p0_sz31_align_8_8:
1263 ; CHECK: ; %bb.0: ; %entry
1264 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1265 ; CHECK-NEXT: s_clause 0x1
1266 ; CHECK-NEXT: flat_load_dwordx4 v[4:7], v[2:3] offset:15
1267 ; CHECK-NEXT: flat_load_dwordx4 v[8:11], v[2:3]
1268 ; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1)
1269 ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:15
1270 ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1271 ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[8:11], off
1272 ; CHECK-NEXT: s_setpc_b64 s[30:31]
1274 tail call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) noundef nonnull align 8 %dst, ptr addrspace(0) noundef nonnull align 8 %src, i64 31, i1 false)
1278 define void @memcpy_p1_p0_sz32_align_8_8(ptr addrspace(1) align 8 %dst, ptr addrspace(0) align 8 readonly %src) {
1279 ; CHECK-LABEL: memcpy_p1_p0_sz32_align_8_8:
1280 ; CHECK: ; %bb.0: ; %entry
1281 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1282 ; CHECK-NEXT: s_clause 0x1
1283 ; CHECK-NEXT: flat_load_dwordx4 v[4:7], v[2:3] offset:16
1284 ; CHECK-NEXT: flat_load_dwordx4 v[8:11], v[2:3]
1285 ; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1)
1286 ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:16
1287 ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1288 ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[8:11], off
1289 ; CHECK-NEXT: s_setpc_b64 s[30:31]
1291 tail call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) noundef nonnull align 8 %dst, ptr addrspace(0) noundef nonnull align 8 %src, i64 32, i1 false)
1295 define void @memcpy_p1_p0_sz16_align_16_16(ptr addrspace(1) align 16 %dst, ptr addrspace(0) align 16 readonly %src) {
1296 ; CHECK-LABEL: memcpy_p1_p0_sz16_align_16_16:
1297 ; CHECK: ; %bb.0: ; %entry
1298 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1299 ; CHECK-NEXT: flat_load_dwordx4 v[2:5], v[2:3]
1300 ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1301 ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
1302 ; CHECK-NEXT: s_setpc_b64 s[30:31]
1304 tail call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) noundef nonnull align 16 %dst, ptr addrspace(0) noundef nonnull align 16 %src, i64 16, i1 false)
1308 define void @memcpy_p1_p0_sz31_align_16_16(ptr addrspace(1) align 16 %dst, ptr addrspace(0) align 16 readonly %src) {
1309 ; CHECK-LABEL: memcpy_p1_p0_sz31_align_16_16:
1310 ; CHECK: ; %bb.0: ; %entry
1311 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1312 ; CHECK-NEXT: s_clause 0x1
1313 ; CHECK-NEXT: flat_load_dwordx4 v[4:7], v[2:3] offset:15
1314 ; CHECK-NEXT: flat_load_dwordx4 v[8:11], v[2:3]
1315 ; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1)
1316 ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:15
1317 ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1318 ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[8:11], off
1319 ; CHECK-NEXT: s_setpc_b64 s[30:31]
1321 tail call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) noundef nonnull align 16 %dst, ptr addrspace(0) noundef nonnull align 16 %src, i64 31, i1 false)
1325 define void @memcpy_p1_p0_sz32_align_16_16(ptr addrspace(1) align 16 %dst, ptr addrspace(0) align 16 readonly %src) {
1326 ; CHECK-LABEL: memcpy_p1_p0_sz32_align_16_16:
1327 ; CHECK: ; %bb.0: ; %entry
1328 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1329 ; CHECK-NEXT: s_clause 0x1
1330 ; CHECK-NEXT: flat_load_dwordx4 v[4:7], v[2:3] offset:16
1331 ; CHECK-NEXT: flat_load_dwordx4 v[8:11], v[2:3]
1332 ; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1)
1333 ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:16
1334 ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1335 ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[8:11], off
1336 ; CHECK-NEXT: s_setpc_b64 s[30:31]
1338 tail call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) noundef nonnull align 16 %dst, ptr addrspace(0) noundef nonnull align 16 %src, i64 32, i1 false)
1342 define void @memcpy_p1_p1_sz16_align_1_1(ptr addrspace(1) align 1 %dst, ptr addrspace(1) align 1 readonly %src) {
1343 ; CHECK-LABEL: memcpy_p1_p1_sz16_align_1_1:
1344 ; CHECK: ; %bb.0: ; %entry
1345 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1346 ; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off
1347 ; CHECK-NEXT: s_waitcnt vmcnt(0)
1348 ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
1349 ; CHECK-NEXT: s_setpc_b64 s[30:31]
1351 tail call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) noundef nonnull align 1 %dst, ptr addrspace(1) noundef nonnull align 1 %src, i64 16, i1 false)
1355 define void @memcpy_p1_p1_sz31_align_1_1(ptr addrspace(1) align 1 %dst, ptr addrspace(1) align 1 readonly %src) {
1356 ; CHECK-LABEL: memcpy_p1_p1_sz31_align_1_1:
1357 ; CHECK: ; %bb.0: ; %entry
1358 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1359 ; CHECK-NEXT: s_clause 0x2
1360 ; CHECK-NEXT: global_load_dwordx2 v[8:9], v[2:3], off offset:23
1361 ; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off
1362 ; CHECK-NEXT: global_load_dwordx2 v[2:3], v[2:3], off offset:16
1363 ; CHECK-NEXT: s_waitcnt vmcnt(2)
1364 ; CHECK-NEXT: global_store_dwordx2 v[0:1], v[8:9], off offset:23
1365 ; CHECK-NEXT: s_waitcnt vmcnt(1)
1366 ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[4:7], off
1367 ; CHECK-NEXT: s_waitcnt vmcnt(0)
1368 ; CHECK-NEXT: global_store_dwordx2 v[0:1], v[2:3], off offset:16
1369 ; CHECK-NEXT: s_setpc_b64 s[30:31]
1371 tail call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) noundef nonnull align 1 %dst, ptr addrspace(1) noundef nonnull align 1 %src, i64 31, i1 false)
1375 define void @memcpy_p1_p1_sz32_align_1_1(ptr addrspace(1) align 1 %dst, ptr addrspace(1) align 1 readonly %src) {
1376 ; CHECK-LABEL: memcpy_p1_p1_sz32_align_1_1:
1377 ; CHECK: ; %bb.0: ; %entry
1378 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1379 ; CHECK-NEXT: s_clause 0x1
1380 ; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off offset:16
1381 ; CHECK-NEXT: global_load_dwordx4 v[8:11], v[2:3], off
1382 ; CHECK-NEXT: s_waitcnt vmcnt(1)
1383 ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:16
1384 ; CHECK-NEXT: s_waitcnt vmcnt(0)
1385 ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[8:11], off
1386 ; CHECK-NEXT: s_setpc_b64 s[30:31]
1388 tail call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) noundef nonnull align 1 %dst, ptr addrspace(1) noundef nonnull align 1 %src, i64 32, i1 false)
1392 define void @memcpy_p1_p1_sz16_align_2_2(ptr addrspace(1) align 2 %dst, ptr addrspace(1) align 2 readonly %src) {
1393 ; CHECK-LABEL: memcpy_p1_p1_sz16_align_2_2:
1394 ; CHECK: ; %bb.0: ; %entry
1395 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1396 ; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off
1397 ; CHECK-NEXT: s_waitcnt vmcnt(0)
1398 ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
1399 ; CHECK-NEXT: s_setpc_b64 s[30:31]
1401 tail call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) noundef nonnull align 2 %dst, ptr addrspace(1) noundef nonnull align 2 %src, i64 16, i1 false)
1405 define void @memcpy_p1_p1_sz31_align_2_2(ptr addrspace(1) align 2 %dst, ptr addrspace(1) align 2 readonly %src) {
1406 ; CHECK-LABEL: memcpy_p1_p1_sz31_align_2_2:
1407 ; CHECK: ; %bb.0: ; %entry
1408 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1409 ; CHECK-NEXT: s_clause 0x2
1410 ; CHECK-NEXT: global_load_dwordx2 v[8:9], v[2:3], off offset:23
1411 ; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off
1412 ; CHECK-NEXT: global_load_dwordx2 v[2:3], v[2:3], off offset:16
1413 ; CHECK-NEXT: s_waitcnt vmcnt(2)
1414 ; CHECK-NEXT: global_store_dwordx2 v[0:1], v[8:9], off offset:23
1415 ; CHECK-NEXT: s_waitcnt vmcnt(1)
1416 ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[4:7], off
1417 ; CHECK-NEXT: s_waitcnt vmcnt(0)
1418 ; CHECK-NEXT: global_store_dwordx2 v[0:1], v[2:3], off offset:16
1419 ; CHECK-NEXT: s_setpc_b64 s[30:31]
1421 tail call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) noundef nonnull align 2 %dst, ptr addrspace(1) noundef nonnull align 2 %src, i64 31, i1 false)
1425 define void @memcpy_p1_p1_sz32_align_2_2(ptr addrspace(1) align 2 %dst, ptr addrspace(1) align 2 readonly %src) {
1426 ; CHECK-LABEL: memcpy_p1_p1_sz32_align_2_2:
1427 ; CHECK: ; %bb.0: ; %entry
1428 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1429 ; CHECK-NEXT: s_clause 0x1
1430 ; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off offset:16
1431 ; CHECK-NEXT: global_load_dwordx4 v[8:11], v[2:3], off
1432 ; CHECK-NEXT: s_waitcnt vmcnt(1)
1433 ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:16
1434 ; CHECK-NEXT: s_waitcnt vmcnt(0)
1435 ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[8:11], off
1436 ; CHECK-NEXT: s_setpc_b64 s[30:31]
1438 tail call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) noundef nonnull align 2 %dst, ptr addrspace(1) noundef nonnull align 2 %src, i64 32, i1 false)
1442 define void @memcpy_p1_p1_sz16_align_8_8(ptr addrspace(1) align 8 %dst, ptr addrspace(1) align 8 readonly %src) {
1443 ; CHECK-LABEL: memcpy_p1_p1_sz16_align_8_8:
1444 ; CHECK: ; %bb.0: ; %entry
1445 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1446 ; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off
1447 ; CHECK-NEXT: s_waitcnt vmcnt(0)
1448 ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
1449 ; CHECK-NEXT: s_setpc_b64 s[30:31]
1451 tail call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) noundef nonnull align 8 %dst, ptr addrspace(1) noundef nonnull align 8 %src, i64 16, i1 false)
1455 define void @memcpy_p1_p1_sz31_align_8_8(ptr addrspace(1) align 8 %dst, ptr addrspace(1) align 8 readonly %src) {
1456 ; CHECK-LABEL: memcpy_p1_p1_sz31_align_8_8:
1457 ; CHECK: ; %bb.0: ; %entry
1458 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1459 ; CHECK-NEXT: s_clause 0x1
1460 ; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off offset:15
1461 ; CHECK-NEXT: global_load_dwordx4 v[8:11], v[2:3], off
1462 ; CHECK-NEXT: s_waitcnt vmcnt(1)
1463 ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:15
1464 ; CHECK-NEXT: s_waitcnt vmcnt(0)
1465 ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[8:11], off
1466 ; CHECK-NEXT: s_setpc_b64 s[30:31]
1468 tail call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) noundef nonnull align 8 %dst, ptr addrspace(1) noundef nonnull align 8 %src, i64 31, i1 false)
1472 define void @memcpy_p1_p1_sz32_align_8_8(ptr addrspace(1) align 8 %dst, ptr addrspace(1) align 8 readonly %src) {
1473 ; CHECK-LABEL: memcpy_p1_p1_sz32_align_8_8:
1474 ; CHECK: ; %bb.0: ; %entry
1475 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1476 ; CHECK-NEXT: s_clause 0x1
1477 ; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off offset:16
1478 ; CHECK-NEXT: global_load_dwordx4 v[8:11], v[2:3], off
1479 ; CHECK-NEXT: s_waitcnt vmcnt(1)
1480 ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:16
1481 ; CHECK-NEXT: s_waitcnt vmcnt(0)
1482 ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[8:11], off
1483 ; CHECK-NEXT: s_setpc_b64 s[30:31]
1485 tail call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) noundef nonnull align 8 %dst, ptr addrspace(1) noundef nonnull align 8 %src, i64 32, i1 false)
1489 define void @memcpy_p1_p1_sz16_align_16_16(ptr addrspace(1) align 16 %dst, ptr addrspace(1) align 16 readonly %src) {
1490 ; CHECK-LABEL: memcpy_p1_p1_sz16_align_16_16:
1491 ; CHECK: ; %bb.0: ; %entry
1492 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1493 ; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off
1494 ; CHECK-NEXT: s_waitcnt vmcnt(0)
1495 ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
1496 ; CHECK-NEXT: s_setpc_b64 s[30:31]
1498 tail call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) noundef nonnull align 16 %dst, ptr addrspace(1) noundef nonnull align 16 %src, i64 16, i1 false)
1502 define void @memcpy_p1_p1_sz31_align_16_16(ptr addrspace(1) align 16 %dst, ptr addrspace(1) align 16 readonly %src) {
1503 ; CHECK-LABEL: memcpy_p1_p1_sz31_align_16_16:
1504 ; CHECK: ; %bb.0: ; %entry
1505 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1506 ; CHECK-NEXT: s_clause 0x1
1507 ; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off offset:15
1508 ; CHECK-NEXT: global_load_dwordx4 v[8:11], v[2:3], off
1509 ; CHECK-NEXT: s_waitcnt vmcnt(1)
1510 ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:15
1511 ; CHECK-NEXT: s_waitcnt vmcnt(0)
1512 ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[8:11], off
1513 ; CHECK-NEXT: s_setpc_b64 s[30:31]
1515 tail call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) noundef nonnull align 16 %dst, ptr addrspace(1) noundef nonnull align 16 %src, i64 31, i1 false)
1519 define void @memcpy_p1_p1_sz32_align_16_16(ptr addrspace(1) align 16 %dst, ptr addrspace(1) align 16 readonly %src) {
1520 ; CHECK-LABEL: memcpy_p1_p1_sz32_align_16_16:
1521 ; CHECK: ; %bb.0: ; %entry
1522 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1523 ; CHECK-NEXT: s_clause 0x1
1524 ; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off offset:16
1525 ; CHECK-NEXT: global_load_dwordx4 v[8:11], v[2:3], off
1526 ; CHECK-NEXT: s_waitcnt vmcnt(1)
1527 ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:16
1528 ; CHECK-NEXT: s_waitcnt vmcnt(0)
1529 ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[8:11], off
1530 ; CHECK-NEXT: s_setpc_b64 s[30:31]
1532 tail call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) noundef nonnull align 16 %dst, ptr addrspace(1) noundef nonnull align 16 %src, i64 32, i1 false)
1536 define void @memcpy_p1_p3_sz16_align_1_1(ptr addrspace(1) align 1 %dst, ptr addrspace(3) align 1 readonly %src) {
1537 ; CHECK-LABEL: memcpy_p1_p3_sz16_align_1_1:
1538 ; CHECK: ; %bb.0: ; %entry
1539 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1540 ; CHECK-NEXT: ds_read_b128 v[2:5], v2
1541 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
1542 ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
1543 ; CHECK-NEXT: s_setpc_b64 s[30:31]
1545 tail call void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) noundef nonnull align 1 %dst, ptr addrspace(3) noundef nonnull align 1 %src, i64 16, i1 false)
1549 define void @memcpy_p1_p3_sz31_align_1_1(ptr addrspace(1) align 1 %dst, ptr addrspace(3) align 1 readonly %src) {
1550 ; CHECK-LABEL: memcpy_p1_p3_sz31_align_1_1:
1551 ; CHECK: ; %bb.0: ; %entry
1552 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1553 ; CHECK-NEXT: ds_read_b64 v[7:8], v2
1554 ; CHECK-NEXT: ds_read_b128 v[3:6], v2 offset:8
1555 ; CHECK-NEXT: ds_read_b64 v[9:10], v2 offset:23
1556 ; CHECK-NEXT: s_waitcnt lgkmcnt(2)
1557 ; CHECK-NEXT: global_store_dwordx2 v[0:1], v[7:8], off
1558 ; CHECK-NEXT: s_waitcnt lgkmcnt(1)
1559 ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[3:6], off offset:8
1560 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
1561 ; CHECK-NEXT: global_store_dwordx2 v[0:1], v[9:10], off offset:23
1562 ; CHECK-NEXT: s_setpc_b64 s[30:31]
1564 tail call void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) noundef nonnull align 1 %dst, ptr addrspace(3) noundef nonnull align 1 %src, i64 31, i1 false)
1568 define void @memcpy_p1_p3_sz32_align_1_1(ptr addrspace(1) align 1 %dst, ptr addrspace(3) align 1 readonly %src) {
1569 ; CHECK-LABEL: memcpy_p1_p3_sz32_align_1_1:
1570 ; CHECK: ; %bb.0: ; %entry
1571 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1572 ; CHECK-NEXT: ds_read_b128 v[3:6], v2
1573 ; CHECK-NEXT: ds_read_b128 v[7:10], v2 offset:16
1574 ; CHECK-NEXT: s_waitcnt lgkmcnt(1)
1575 ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[3:6], off
1576 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
1577 ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:16
1578 ; CHECK-NEXT: s_setpc_b64 s[30:31]
1580 tail call void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) noundef nonnull align 1 %dst, ptr addrspace(3) noundef nonnull align 1 %src, i64 32, i1 false)
1584 define void @memcpy_p1_p3_sz16_align_2_2(ptr addrspace(1) align 2 %dst, ptr addrspace(3) align 2 readonly %src) {
1585 ; CHECK-LABEL: memcpy_p1_p3_sz16_align_2_2:
1586 ; CHECK: ; %bb.0: ; %entry
1587 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1588 ; CHECK-NEXT: ds_read_b128 v[2:5], v2
1589 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
1590 ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
1591 ; CHECK-NEXT: s_setpc_b64 s[30:31]
1593 tail call void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) noundef nonnull align 2 %dst, ptr addrspace(3) noundef nonnull align 2 %src, i64 16, i1 false)
1597 define void @memcpy_p1_p3_sz31_align_2_2(ptr addrspace(1) align 2 %dst, ptr addrspace(3) align 2 readonly %src) {
1598 ; CHECK-LABEL: memcpy_p1_p3_sz31_align_2_2:
1599 ; CHECK: ; %bb.0: ; %entry
1600 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1601 ; CHECK-NEXT: ds_read_b64 v[7:8], v2
1602 ; CHECK-NEXT: ds_read_b128 v[3:6], v2 offset:8
1603 ; CHECK-NEXT: ds_read_b64 v[9:10], v2 offset:23
1604 ; CHECK-NEXT: s_waitcnt lgkmcnt(2)
1605 ; CHECK-NEXT: global_store_dwordx2 v[0:1], v[7:8], off
1606 ; CHECK-NEXT: s_waitcnt lgkmcnt(1)
1607 ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[3:6], off offset:8
1608 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
1609 ; CHECK-NEXT: global_store_dwordx2 v[0:1], v[9:10], off offset:23
1610 ; CHECK-NEXT: s_setpc_b64 s[30:31]
1612 tail call void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) noundef nonnull align 2 %dst, ptr addrspace(3) noundef nonnull align 2 %src, i64 31, i1 false)
1616 define void @memcpy_p1_p3_sz32_align_2_2(ptr addrspace(1) align 2 %dst, ptr addrspace(3) align 2 readonly %src) {
1617 ; CHECK-LABEL: memcpy_p1_p3_sz32_align_2_2:
1618 ; CHECK: ; %bb.0: ; %entry
1619 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1620 ; CHECK-NEXT: ds_read_b128 v[3:6], v2
1621 ; CHECK-NEXT: ds_read_b128 v[7:10], v2 offset:16
1622 ; CHECK-NEXT: s_waitcnt lgkmcnt(1)
1623 ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[3:6], off
1624 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
1625 ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:16
1626 ; CHECK-NEXT: s_setpc_b64 s[30:31]
1628 tail call void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) noundef nonnull align 2 %dst, ptr addrspace(3) noundef nonnull align 2 %src, i64 32, i1 false)
1632 define void @memcpy_p1_p3_sz16_align_8_8(ptr addrspace(1) align 8 %dst, ptr addrspace(3) align 8 readonly %src) {
1633 ; CHECK-LABEL: memcpy_p1_p3_sz16_align_8_8:
1634 ; CHECK: ; %bb.0: ; %entry
1635 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1636 ; CHECK-NEXT: ds_read2_b64 v[2:5], v2 offset1:1
1637 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
1638 ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
1639 ; CHECK-NEXT: s_setpc_b64 s[30:31]
1641 tail call void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) noundef nonnull align 8 %dst, ptr addrspace(3) noundef nonnull align 8 %src, i64 16, i1 false)
1645 define void @memcpy_p1_p3_sz31_align_8_8(ptr addrspace(1) align 8 %dst, ptr addrspace(3) align 8 readonly %src) {
1646 ; CHECK-LABEL: memcpy_p1_p3_sz31_align_8_8:
1647 ; CHECK: ; %bb.0: ; %entry
1648 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1649 ; CHECK-NEXT: ds_read2_b64 v[3:6], v2 offset1:1
1650 ; CHECK-NEXT: ds_read_b128 v[7:10], v2 offset:15
1651 ; CHECK-NEXT: s_waitcnt lgkmcnt(1)
1652 ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[3:6], off
1653 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
1654 ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:15
1655 ; CHECK-NEXT: s_setpc_b64 s[30:31]
1657 tail call void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) noundef nonnull align 8 %dst, ptr addrspace(3) noundef nonnull align 8 %src, i64 31, i1 false)
1661 define void @memcpy_p1_p3_sz32_align_8_8(ptr addrspace(1) align 8 %dst, ptr addrspace(3) align 8 readonly %src) {
1662 ; CHECK-LABEL: memcpy_p1_p3_sz32_align_8_8:
1663 ; CHECK: ; %bb.0: ; %entry
1664 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1665 ; CHECK-NEXT: ds_read2_b64 v[3:6], v2 offset1:1
1666 ; CHECK-NEXT: ds_read2_b64 v[7:10], v2 offset0:2 offset1:3
1667 ; CHECK-NEXT: s_waitcnt lgkmcnt(1)
1668 ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[3:6], off
1669 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
1670 ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:16
1671 ; CHECK-NEXT: s_setpc_b64 s[30:31]
1673 tail call void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) noundef nonnull align 8 %dst, ptr addrspace(3) noundef nonnull align 8 %src, i64 32, i1 false)
1677 define void @memcpy_p1_p3_sz16_align_16_16(ptr addrspace(1) align 16 %dst, ptr addrspace(3) align 16 readonly %src) {
1678 ; CHECK-LABEL: memcpy_p1_p3_sz16_align_16_16:
1679 ; CHECK: ; %bb.0: ; %entry
1680 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1681 ; CHECK-NEXT: ds_read_b128 v[2:5], v2
1682 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
1683 ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
1684 ; CHECK-NEXT: s_setpc_b64 s[30:31]
1686 tail call void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) noundef nonnull align 16 %dst, ptr addrspace(3) noundef nonnull align 16 %src, i64 16, i1 false)
1690 define void @memcpy_p1_p3_sz31_align_16_16(ptr addrspace(1) align 16 %dst, ptr addrspace(3) align 16 readonly %src) {
1691 ; CHECK-LABEL: memcpy_p1_p3_sz31_align_16_16:
1692 ; CHECK: ; %bb.0: ; %entry
1693 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1694 ; CHECK-NEXT: ds_read_b128 v[3:6], v2
1695 ; CHECK-NEXT: ds_read_b128 v[7:10], v2 offset:15
1696 ; CHECK-NEXT: s_waitcnt lgkmcnt(1)
1697 ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[3:6], off
1698 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
1699 ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:15
1700 ; CHECK-NEXT: s_setpc_b64 s[30:31]
1702 tail call void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) noundef nonnull align 16 %dst, ptr addrspace(3) noundef nonnull align 16 %src, i64 31, i1 false)
1706 define void @memcpy_p1_p3_sz32_align_16_16(ptr addrspace(1) align 16 %dst, ptr addrspace(3) align 16 readonly %src) {
1707 ; CHECK-LABEL: memcpy_p1_p3_sz32_align_16_16:
1708 ; CHECK: ; %bb.0: ; %entry
1709 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1710 ; CHECK-NEXT: ds_read_b128 v[3:6], v2
1711 ; CHECK-NEXT: ds_read_b128 v[7:10], v2 offset:16
1712 ; CHECK-NEXT: s_waitcnt lgkmcnt(1)
1713 ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[3:6], off
1714 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
1715 ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:16
1716 ; CHECK-NEXT: s_setpc_b64 s[30:31]
1718 tail call void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) noundef nonnull align 16 %dst, ptr addrspace(3) noundef nonnull align 16 %src, i64 32, i1 false)
1722 define void @memcpy_p1_p4_sz16_align_1_1(ptr addrspace(1) align 1 %dst, ptr addrspace(4) align 1 readonly %src) {
1723 ; CHECK-LABEL: memcpy_p1_p4_sz16_align_1_1:
1724 ; CHECK: ; %bb.0: ; %entry
1725 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1726 ; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off
1727 ; CHECK-NEXT: s_waitcnt vmcnt(0)
1728 ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
1729 ; CHECK-NEXT: s_setpc_b64 s[30:31]
1731 tail call void @llvm.memcpy.p1.p4.i64(ptr addrspace(1) noundef nonnull align 1 %dst, ptr addrspace(4) noundef nonnull align 1 %src, i64 16, i1 false)
1735 define void @memcpy_p1_p4_sz31_align_1_1(ptr addrspace(1) align 1 %dst, ptr addrspace(4) align 1 readonly %src) {
1736 ; CHECK-LABEL: memcpy_p1_p4_sz31_align_1_1:
1737 ; CHECK: ; %bb.0: ; %entry
1738 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1739 ; CHECK-NEXT: s_clause 0x1
1740 ; CHECK-NEXT: global_load_dwordx2 v[8:9], v[2:3], off
1741 ; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off offset:8
1742 ; CHECK-NEXT: s_waitcnt vmcnt(1)
1743 ; CHECK-NEXT: global_store_dwordx2 v[0:1], v[8:9], off
1744 ; CHECK-NEXT: s_waitcnt vmcnt(0)
1745 ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:8
1746 ; CHECK-NEXT: global_load_dwordx2 v[2:3], v[2:3], off offset:23
1747 ; CHECK-NEXT: s_waitcnt vmcnt(0)
1748 ; CHECK-NEXT: global_store_dwordx2 v[0:1], v[2:3], off offset:23
1749 ; CHECK-NEXT: s_setpc_b64 s[30:31]
1751 tail call void @llvm.memcpy.p1.p4.i64(ptr addrspace(1) noundef nonnull align 1 %dst, ptr addrspace(4) noundef nonnull align 1 %src, i64 31, i1 false)
1755 define void @memcpy_p1_p4_sz32_align_1_1(ptr addrspace(1) align 1 %dst, ptr addrspace(4) align 1 readonly %src) {
1756 ; CHECK-LABEL: memcpy_p1_p4_sz32_align_1_1:
1757 ; CHECK: ; %bb.0: ; %entry
1758 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1759 ; CHECK-NEXT: s_clause 0x1
1760 ; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off
1761 ; CHECK-NEXT: global_load_dwordx4 v[8:11], v[2:3], off offset:16
1762 ; CHECK-NEXT: s_waitcnt vmcnt(1)
1763 ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[4:7], off
1764 ; CHECK-NEXT: s_waitcnt vmcnt(0)
1765 ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[8:11], off offset:16
1766 ; CHECK-NEXT: s_setpc_b64 s[30:31]
1768 tail call void @llvm.memcpy.p1.p4.i64(ptr addrspace(1) noundef nonnull align 1 %dst, ptr addrspace(4) noundef nonnull align 1 %src, i64 32, i1 false)
1772 define void @memcpy_p1_p4_sz16_align_2_2(ptr addrspace(1) align 2 %dst, ptr addrspace(4) align 2 readonly %src) {
1773 ; CHECK-LABEL: memcpy_p1_p4_sz16_align_2_2:
1774 ; CHECK: ; %bb.0: ; %entry
1775 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1776 ; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off
1777 ; CHECK-NEXT: s_waitcnt vmcnt(0)
1778 ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
1779 ; CHECK-NEXT: s_setpc_b64 s[30:31]
1781 tail call void @llvm.memcpy.p1.p4.i64(ptr addrspace(1) noundef nonnull align 2 %dst, ptr addrspace(4) noundef nonnull align 2 %src, i64 16, i1 false)
1785 define void @memcpy_p1_p4_sz31_align_2_2(ptr addrspace(1) align 2 %dst, ptr addrspace(4) align 2 readonly %src) {
1786 ; CHECK-LABEL: memcpy_p1_p4_sz31_align_2_2:
1787 ; CHECK: ; %bb.0: ; %entry
1788 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1789 ; CHECK-NEXT: s_clause 0x1
1790 ; CHECK-NEXT: global_load_dwordx2 v[8:9], v[2:3], off
1791 ; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off offset:8
1792 ; CHECK-NEXT: s_waitcnt vmcnt(1)
1793 ; CHECK-NEXT: global_store_dwordx2 v[0:1], v[8:9], off
1794 ; CHECK-NEXT: s_waitcnt vmcnt(0)
1795 ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:8
1796 ; CHECK-NEXT: global_load_dwordx2 v[2:3], v[2:3], off offset:23
1797 ; CHECK-NEXT: s_waitcnt vmcnt(0)
1798 ; CHECK-NEXT: global_store_dwordx2 v[0:1], v[2:3], off offset:23
1799 ; CHECK-NEXT: s_setpc_b64 s[30:31]
1801 tail call void @llvm.memcpy.p1.p4.i64(ptr addrspace(1) noundef nonnull align 2 %dst, ptr addrspace(4) noundef nonnull align 2 %src, i64 31, i1 false)
1805 define void @memcpy_p1_p4_sz32_align_2_2(ptr addrspace(1) align 2 %dst, ptr addrspace(4) align 2 readonly %src) {
1806 ; CHECK-LABEL: memcpy_p1_p4_sz32_align_2_2:
1807 ; CHECK: ; %bb.0: ; %entry
1808 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1809 ; CHECK-NEXT: s_clause 0x1
1810 ; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off
1811 ; CHECK-NEXT: global_load_dwordx4 v[8:11], v[2:3], off offset:16
1812 ; CHECK-NEXT: s_waitcnt vmcnt(1)
1813 ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[4:7], off
1814 ; CHECK-NEXT: s_waitcnt vmcnt(0)
1815 ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[8:11], off offset:16
1816 ; CHECK-NEXT: s_setpc_b64 s[30:31]
1818 tail call void @llvm.memcpy.p1.p4.i64(ptr addrspace(1) noundef nonnull align 2 %dst, ptr addrspace(4) noundef nonnull align 2 %src, i64 32, i1 false)
1822 define void @memcpy_p1_p4_sz16_align_8_8(ptr addrspace(1) align 8 %dst, ptr addrspace(4) align 8 readonly %src) {
1823 ; CHECK-LABEL: memcpy_p1_p4_sz16_align_8_8:
1824 ; CHECK: ; %bb.0: ; %entry
1825 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1826 ; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off
1827 ; CHECK-NEXT: s_waitcnt vmcnt(0)
1828 ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
1829 ; CHECK-NEXT: s_setpc_b64 s[30:31]
1831 tail call void @llvm.memcpy.p1.p4.i64(ptr addrspace(1) noundef nonnull align 8 %dst, ptr addrspace(4) noundef nonnull align 8 %src, i64 16, i1 false)
1835 define void @memcpy_p1_p4_sz31_align_8_8(ptr addrspace(1) align 8 %dst, ptr addrspace(4) align 8 readonly %src) {
1836 ; CHECK-LABEL: memcpy_p1_p4_sz31_align_8_8:
1837 ; CHECK: ; %bb.0: ; %entry
1838 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1839 ; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off
1840 ; CHECK-NEXT: s_waitcnt vmcnt(0)
1841 ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[4:7], off
1842 ; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off offset:15
1843 ; CHECK-NEXT: s_waitcnt vmcnt(0)
1844 ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[2:5], off offset:15
1845 ; CHECK-NEXT: s_setpc_b64 s[30:31]
1847 tail call void @llvm.memcpy.p1.p4.i64(ptr addrspace(1) noundef nonnull align 8 %dst, ptr addrspace(4) noundef nonnull align 8 %src, i64 31, i1 false)
1851 define void @memcpy_p1_p4_sz32_align_8_8(ptr addrspace(1) align 8 %dst, ptr addrspace(4) align 8 readonly %src) {
1852 ; CHECK-LABEL: memcpy_p1_p4_sz32_align_8_8:
1853 ; CHECK: ; %bb.0: ; %entry
1854 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1855 ; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off
1856 ; CHECK-NEXT: s_waitcnt vmcnt(0)
1857 ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[4:7], off
1858 ; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off offset:16
1859 ; CHECK-NEXT: s_waitcnt vmcnt(0)
1860 ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[2:5], off offset:16
1861 ; CHECK-NEXT: s_setpc_b64 s[30:31]
1863 tail call void @llvm.memcpy.p1.p4.i64(ptr addrspace(1) noundef nonnull align 8 %dst, ptr addrspace(4) noundef nonnull align 8 %src, i64 32, i1 false)
1867 define void @memcpy_p1_p4_sz16_align_16_16(ptr addrspace(1) align 16 %dst, ptr addrspace(4) align 16 readonly %src) {
1868 ; CHECK-LABEL: memcpy_p1_p4_sz16_align_16_16:
1869 ; CHECK: ; %bb.0: ; %entry
1870 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1871 ; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off
1872 ; CHECK-NEXT: s_waitcnt vmcnt(0)
1873 ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
1874 ; CHECK-NEXT: s_setpc_b64 s[30:31]
1876 tail call void @llvm.memcpy.p1.p4.i64(ptr addrspace(1) noundef nonnull align 16 %dst, ptr addrspace(4) noundef nonnull align 16 %src, i64 16, i1 false)
1880 define void @memcpy_p1_p4_sz31_align_16_16(ptr addrspace(1) align 16 %dst, ptr addrspace(4) align 16 readonly %src) {
1881 ; CHECK-LABEL: memcpy_p1_p4_sz31_align_16_16:
1882 ; CHECK: ; %bb.0: ; %entry
1883 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1884 ; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off
1885 ; CHECK-NEXT: s_waitcnt vmcnt(0)
1886 ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[4:7], off
1887 ; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off offset:15
1888 ; CHECK-NEXT: s_waitcnt vmcnt(0)
1889 ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[2:5], off offset:15
1890 ; CHECK-NEXT: s_setpc_b64 s[30:31]
1892 tail call void @llvm.memcpy.p1.p4.i64(ptr addrspace(1) noundef nonnull align 16 %dst, ptr addrspace(4) noundef nonnull align 16 %src, i64 31, i1 false)
1896 define void @memcpy_p1_p4_sz32_align_16_16(ptr addrspace(1) align 16 %dst, ptr addrspace(4) align 16 readonly %src) {
1897 ; CHECK-LABEL: memcpy_p1_p4_sz32_align_16_16:
1898 ; CHECK: ; %bb.0: ; %entry
1899 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1900 ; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off
1901 ; CHECK-NEXT: s_waitcnt vmcnt(0)
1902 ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[4:7], off
1903 ; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off offset:16
1904 ; CHECK-NEXT: s_waitcnt vmcnt(0)
1905 ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[2:5], off offset:16
1906 ; CHECK-NEXT: s_setpc_b64 s[30:31]
1908 tail call void @llvm.memcpy.p1.p4.i64(ptr addrspace(1) noundef nonnull align 16 %dst, ptr addrspace(4) noundef nonnull align 16 %src, i64 32, i1 false)
1912 define void @memcpy_p1_p5_sz16_align_1_1(ptr addrspace(1) align 1 %dst, ptr addrspace(5) align 1 readonly %src) {
1913 ; CHECK-LABEL: memcpy_p1_p5_sz16_align_1_1:
1914 ; CHECK: ; %bb.0: ; %entry
1915 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1916 ; CHECK-NEXT: s_clause 0x3
1917 ; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen
1918 ; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:4
1919 ; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:8
1920 ; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:12
1921 ; CHECK-NEXT: s_waitcnt vmcnt(0)
1922 ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[3:6], off
1923 ; CHECK-NEXT: s_setpc_b64 s[30:31]
1925 tail call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) noundef nonnull align 1 %dst, ptr addrspace(5) noundef nonnull align 1 %src, i64 16, i1 false)
1929 define void @memcpy_p1_p5_sz31_align_1_1(ptr addrspace(1) align 1 %dst, ptr addrspace(5) align 1 readonly %src) {
1930 ; CHECK-LABEL: memcpy_p1_p5_sz31_align_1_1:
1931 ; CHECK: ; %bb.0: ; %entry
1932 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1933 ; CHECK-NEXT: s_clause 0x7
1934 ; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen
1935 ; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:4
1936 ; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:8
1937 ; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:12
1938 ; CHECK-NEXT: buffer_load_dword v7, v2, s[0:3], 0 offen offset:16
1939 ; CHECK-NEXT: buffer_load_dword v8, v2, s[0:3], 0 offen offset:20
1940 ; CHECK-NEXT: buffer_load_dword v9, v2, s[0:3], 0 offen offset:23
1941 ; CHECK-NEXT: buffer_load_dword v10, v2, s[0:3], 0 offen offset:27
1942 ; CHECK-NEXT: s_waitcnt vmcnt(4)
1943 ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[3:6], off
1944 ; CHECK-NEXT: s_waitcnt vmcnt(2)
1945 ; CHECK-NEXT: global_store_dwordx2 v[0:1], v[7:8], off offset:16
1946 ; CHECK-NEXT: s_waitcnt vmcnt(0)
1947 ; CHECK-NEXT: global_store_dwordx2 v[0:1], v[9:10], off offset:23
1948 ; CHECK-NEXT: s_setpc_b64 s[30:31]
1950 tail call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) noundef nonnull align 1 %dst, ptr addrspace(5) noundef nonnull align 1 %src, i64 31, i1 false)
1954 define void @memcpy_p1_p5_sz32_align_1_1(ptr addrspace(1) align 1 %dst, ptr addrspace(5) align 1 readonly %src) {
1955 ; CHECK-LABEL: memcpy_p1_p5_sz32_align_1_1:
1956 ; CHECK: ; %bb.0: ; %entry
1957 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1958 ; CHECK-NEXT: s_clause 0x7
1959 ; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen
1960 ; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:4
1961 ; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:8
1962 ; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:12
1963 ; CHECK-NEXT: buffer_load_dword v7, v2, s[0:3], 0 offen offset:16
1964 ; CHECK-NEXT: buffer_load_dword v8, v2, s[0:3], 0 offen offset:20
1965 ; CHECK-NEXT: buffer_load_dword v9, v2, s[0:3], 0 offen offset:24
1966 ; CHECK-NEXT: buffer_load_dword v10, v2, s[0:3], 0 offen offset:28
1967 ; CHECK-NEXT: s_waitcnt vmcnt(4)
1968 ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[3:6], off
1969 ; CHECK-NEXT: s_waitcnt vmcnt(0)
1970 ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:16
1971 ; CHECK-NEXT: s_setpc_b64 s[30:31]
1973 tail call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) noundef nonnull align 1 %dst, ptr addrspace(5) noundef nonnull align 1 %src, i64 32, i1 false)
1977 define void @memcpy_p1_p5_sz16_align_2_2(ptr addrspace(1) align 2 %dst, ptr addrspace(5) align 2 readonly %src) {
1978 ; CHECK-LABEL: memcpy_p1_p5_sz16_align_2_2:
1979 ; CHECK: ; %bb.0: ; %entry
1980 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1981 ; CHECK-NEXT: s_clause 0x3
1982 ; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen
1983 ; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:4
1984 ; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:8
1985 ; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:12
1986 ; CHECK-NEXT: s_waitcnt vmcnt(0)
1987 ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[3:6], off
1988 ; CHECK-NEXT: s_setpc_b64 s[30:31]
1990 tail call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) noundef nonnull align 2 %dst, ptr addrspace(5) noundef nonnull align 2 %src, i64 16, i1 false)
1994 define void @memcpy_p1_p5_sz31_align_2_2(ptr addrspace(1) align 2 %dst, ptr addrspace(5) align 2 readonly %src) {
1995 ; CHECK-LABEL: memcpy_p1_p5_sz31_align_2_2:
1996 ; CHECK: ; %bb.0: ; %entry
1997 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1998 ; CHECK-NEXT: s_clause 0x7
1999 ; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen
2000 ; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:4
2001 ; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:8
2002 ; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:12
2003 ; CHECK-NEXT: buffer_load_dword v7, v2, s[0:3], 0 offen offset:16
2004 ; CHECK-NEXT: buffer_load_dword v8, v2, s[0:3], 0 offen offset:20
2005 ; CHECK-NEXT: buffer_load_dword v9, v2, s[0:3], 0 offen offset:23
2006 ; CHECK-NEXT: buffer_load_dword v10, v2, s[0:3], 0 offen offset:27
2007 ; CHECK-NEXT: s_waitcnt vmcnt(4)
2008 ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[3:6], off
2009 ; CHECK-NEXT: s_waitcnt vmcnt(2)
2010 ; CHECK-NEXT: global_store_dwordx2 v[0:1], v[7:8], off offset:16
2011 ; CHECK-NEXT: s_waitcnt vmcnt(0)
2012 ; CHECK-NEXT: global_store_dwordx2 v[0:1], v[9:10], off offset:23
2013 ; CHECK-NEXT: s_setpc_b64 s[30:31]
2015 tail call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) noundef nonnull align 2 %dst, ptr addrspace(5) noundef nonnull align 2 %src, i64 31, i1 false)
2019 define void @memcpy_p1_p5_sz32_align_2_2(ptr addrspace(1) align 2 %dst, ptr addrspace(5) align 2 readonly %src) {
2020 ; CHECK-LABEL: memcpy_p1_p5_sz32_align_2_2:
2021 ; CHECK: ; %bb.0: ; %entry
2022 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2023 ; CHECK-NEXT: s_clause 0x7
2024 ; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen
2025 ; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:4
2026 ; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:8
2027 ; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:12
2028 ; CHECK-NEXT: buffer_load_dword v7, v2, s[0:3], 0 offen offset:16
2029 ; CHECK-NEXT: buffer_load_dword v8, v2, s[0:3], 0 offen offset:20
2030 ; CHECK-NEXT: buffer_load_dword v9, v2, s[0:3], 0 offen offset:24
2031 ; CHECK-NEXT: buffer_load_dword v10, v2, s[0:3], 0 offen offset:28
2032 ; CHECK-NEXT: s_waitcnt vmcnt(4)
2033 ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[3:6], off
2034 ; CHECK-NEXT: s_waitcnt vmcnt(0)
2035 ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:16
2036 ; CHECK-NEXT: s_setpc_b64 s[30:31]
2038 tail call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) noundef nonnull align 2 %dst, ptr addrspace(5) noundef nonnull align 2 %src, i64 32, i1 false)
2042 define void @memcpy_p1_p5_sz16_align_8_8(ptr addrspace(1) align 8 %dst, ptr addrspace(5) align 8 readonly %src) {
2043 ; CHECK-LABEL: memcpy_p1_p5_sz16_align_8_8:
2044 ; CHECK: ; %bb.0: ; %entry
2045 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2046 ; CHECK-NEXT: s_clause 0x3
2047 ; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen
2048 ; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:4
2049 ; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:8
2050 ; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:12
2051 ; CHECK-NEXT: s_waitcnt vmcnt(0)
2052 ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[3:6], off
2053 ; CHECK-NEXT: s_setpc_b64 s[30:31]
2055 tail call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) noundef nonnull align 8 %dst, ptr addrspace(5) noundef nonnull align 8 %src, i64 16, i1 false)
2059 define void @memcpy_p1_p5_sz31_align_8_8(ptr addrspace(1) align 8 %dst, ptr addrspace(5) align 8 readonly %src) {
2060 ; CHECK-LABEL: memcpy_p1_p5_sz31_align_8_8:
2061 ; CHECK: ; %bb.0: ; %entry
2062 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2063 ; CHECK-NEXT: s_clause 0x7
2064 ; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen
2065 ; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:4
2066 ; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:8
2067 ; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:12
2068 ; CHECK-NEXT: buffer_load_dword v7, v2, s[0:3], 0 offen offset:15
2069 ; CHECK-NEXT: buffer_load_dword v8, v2, s[0:3], 0 offen offset:19
2070 ; CHECK-NEXT: buffer_load_dword v9, v2, s[0:3], 0 offen offset:23
2071 ; CHECK-NEXT: buffer_load_dword v10, v2, s[0:3], 0 offen offset:27
2072 ; CHECK-NEXT: s_waitcnt vmcnt(4)
2073 ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[3:6], off
2074 ; CHECK-NEXT: s_waitcnt vmcnt(0)
2075 ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:15
2076 ; CHECK-NEXT: s_setpc_b64 s[30:31]
2078 tail call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) noundef nonnull align 8 %dst, ptr addrspace(5) noundef nonnull align 8 %src, i64 31, i1 false)
2082 define void @memcpy_p1_p5_sz32_align_8_8(ptr addrspace(1) align 8 %dst, ptr addrspace(5) align 8 readonly %src) {
2083 ; CHECK-LABEL: memcpy_p1_p5_sz32_align_8_8:
2084 ; CHECK: ; %bb.0: ; %entry
2085 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2086 ; CHECK-NEXT: s_clause 0x7
2087 ; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen
2088 ; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:4
2089 ; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:8
2090 ; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:12
2091 ; CHECK-NEXT: buffer_load_dword v7, v2, s[0:3], 0 offen offset:16
2092 ; CHECK-NEXT: buffer_load_dword v8, v2, s[0:3], 0 offen offset:20
2093 ; CHECK-NEXT: buffer_load_dword v9, v2, s[0:3], 0 offen offset:24
2094 ; CHECK-NEXT: buffer_load_dword v10, v2, s[0:3], 0 offen offset:28
2095 ; CHECK-NEXT: s_waitcnt vmcnt(4)
2096 ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[3:6], off
2097 ; CHECK-NEXT: s_waitcnt vmcnt(0)
2098 ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:16
2099 ; CHECK-NEXT: s_setpc_b64 s[30:31]
2101 tail call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) noundef nonnull align 8 %dst, ptr addrspace(5) noundef nonnull align 8 %src, i64 32, i1 false)
2105 define void @memcpy_p1_p5_sz16_align_16_16(ptr addrspace(1) align 16 %dst, ptr addrspace(5) align 16 readonly %src) {
2106 ; CHECK-LABEL: memcpy_p1_p5_sz16_align_16_16:
2107 ; CHECK: ; %bb.0: ; %entry
2108 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2109 ; CHECK-NEXT: s_clause 0x3
2110 ; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen
2111 ; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:4
2112 ; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:8
2113 ; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:12
2114 ; CHECK-NEXT: s_waitcnt vmcnt(0)
2115 ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[3:6], off
2116 ; CHECK-NEXT: s_setpc_b64 s[30:31]
2118 tail call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) noundef nonnull align 16 %dst, ptr addrspace(5) noundef nonnull align 16 %src, i64 16, i1 false)
2122 define void @memcpy_p1_p5_sz31_align_16_16(ptr addrspace(1) align 16 %dst, ptr addrspace(5) align 16 readonly %src) {
2123 ; CHECK-LABEL: memcpy_p1_p5_sz31_align_16_16:
2124 ; CHECK: ; %bb.0: ; %entry
2125 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2126 ; CHECK-NEXT: s_clause 0x7
2127 ; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen
2128 ; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:4
2129 ; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:8
2130 ; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:12
2131 ; CHECK-NEXT: buffer_load_dword v7, v2, s[0:3], 0 offen offset:15
2132 ; CHECK-NEXT: buffer_load_dword v8, v2, s[0:3], 0 offen offset:19
2133 ; CHECK-NEXT: buffer_load_dword v9, v2, s[0:3], 0 offen offset:23
2134 ; CHECK-NEXT: buffer_load_dword v10, v2, s[0:3], 0 offen offset:27
2135 ; CHECK-NEXT: s_waitcnt vmcnt(4)
2136 ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[3:6], off
2137 ; CHECK-NEXT: s_waitcnt vmcnt(0)
2138 ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:15
2139 ; CHECK-NEXT: s_setpc_b64 s[30:31]
2141 tail call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) noundef nonnull align 16 %dst, ptr addrspace(5) noundef nonnull align 16 %src, i64 31, i1 false)
2145 define void @memcpy_p1_p5_sz32_align_16_16(ptr addrspace(1) align 16 %dst, ptr addrspace(5) align 16 readonly %src) {
2146 ; CHECK-LABEL: memcpy_p1_p5_sz32_align_16_16:
2147 ; CHECK: ; %bb.0: ; %entry
2148 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2149 ; CHECK-NEXT: s_clause 0x7
2150 ; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen
2151 ; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:4
2152 ; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:8
2153 ; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:12
2154 ; CHECK-NEXT: buffer_load_dword v7, v2, s[0:3], 0 offen offset:16
2155 ; CHECK-NEXT: buffer_load_dword v8, v2, s[0:3], 0 offen offset:20
2156 ; CHECK-NEXT: buffer_load_dword v9, v2, s[0:3], 0 offen offset:24
2157 ; CHECK-NEXT: buffer_load_dword v10, v2, s[0:3], 0 offen offset:28
2158 ; CHECK-NEXT: s_waitcnt vmcnt(4)
2159 ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[3:6], off
2160 ; CHECK-NEXT: s_waitcnt vmcnt(0)
2161 ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:16
2162 ; CHECK-NEXT: s_setpc_b64 s[30:31]
2164 tail call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) noundef nonnull align 16 %dst, ptr addrspace(5) noundef nonnull align 16 %src, i64 32, i1 false)
2168 define void @memcpy_p3_p0_sz16_align_1_1(ptr addrspace(3) align 1 %dst, ptr addrspace(0) align 1 readonly %src) {
2169 ; CHECK-LABEL: memcpy_p3_p0_sz16_align_1_1:
2170 ; CHECK: ; %bb.0: ; %entry
2171 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2172 ; CHECK-NEXT: flat_load_dwordx4 v[1:4], v[1:2]
2173 ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2174 ; CHECK-NEXT: ds_write2_b64 v0, v[1:2], v[3:4] offset1:1
2175 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
2176 ; CHECK-NEXT: s_setpc_b64 s[30:31]
2178 tail call void @llvm.memcpy.p3.p0.i64(ptr addrspace(3) noundef nonnull align 1 %dst, ptr addrspace(0) noundef nonnull align 1 %src, i64 16, i1 false)
2182 define void @memcpy_p3_p0_sz31_align_1_1(ptr addrspace(3) align 1 %dst, ptr addrspace(0) align 1 readonly %src) {
2183 ; CHECK-LABEL: memcpy_p3_p0_sz31_align_1_1:
2184 ; CHECK: ; %bb.0: ; %entry
2185 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2186 ; CHECK-NEXT: s_clause 0x2
2187 ; CHECK-NEXT: flat_load_dwordx2 v[5:6], v[1:2] offset:23
2188 ; CHECK-NEXT: flat_load_dwordx2 v[7:8], v[1:2] offset:16
2189 ; CHECK-NEXT: flat_load_dwordx4 v[1:4], v[1:2]
2190 ; CHECK-NEXT: s_waitcnt vmcnt(2) lgkmcnt(2)
2191 ; CHECK-NEXT: ds_write_b64 v0, v[5:6] offset:23
2192 ; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(2)
2193 ; CHECK-NEXT: ds_write_b64 v0, v[7:8] offset:16
2194 ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(2)
2195 ; CHECK-NEXT: ds_write2_b64 v0, v[1:2], v[3:4] offset1:1
2196 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
2197 ; CHECK-NEXT: s_setpc_b64 s[30:31]
2199 tail call void @llvm.memcpy.p3.p0.i64(ptr addrspace(3) noundef nonnull align 1 %dst, ptr addrspace(0) noundef nonnull align 1 %src, i64 31, i1 false)
2203 define void @memcpy_p3_p0_sz32_align_1_1(ptr addrspace(3) align 1 %dst, ptr addrspace(0) align 1 readonly %src) {
2204 ; CHECK-LABEL: memcpy_p3_p0_sz32_align_1_1:
2205 ; CHECK: ; %bb.0: ; %entry
2206 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2207 ; CHECK-NEXT: s_clause 0x1
2208 ; CHECK-NEXT: flat_load_dwordx4 v[3:6], v[1:2] offset:16
2209 ; CHECK-NEXT: flat_load_dwordx4 v[7:10], v[1:2]
2210 ; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1)
2211 ; CHECK-NEXT: ds_write2_b64 v0, v[3:4], v[5:6] offset0:2 offset1:3
2212 ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(1)
2213 ; CHECK-NEXT: ds_write2_b64 v0, v[7:8], v[9:10] offset1:1
2214 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
2215 ; CHECK-NEXT: s_setpc_b64 s[30:31]
2217 tail call void @llvm.memcpy.p3.p0.i64(ptr addrspace(3) noundef nonnull align 1 %dst, ptr addrspace(0) noundef nonnull align 1 %src, i64 32, i1 false)
2221 define void @memcpy_p3_p0_sz16_align_2_2(ptr addrspace(3) align 2 %dst, ptr addrspace(0) align 2 readonly %src) {
2222 ; CHECK-LABEL: memcpy_p3_p0_sz16_align_2_2:
2223 ; CHECK: ; %bb.0: ; %entry
2224 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2225 ; CHECK-NEXT: flat_load_dwordx4 v[1:4], v[1:2]
2226 ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2227 ; CHECK-NEXT: ds_write2_b64 v0, v[1:2], v[3:4] offset1:1
2228 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
2229 ; CHECK-NEXT: s_setpc_b64 s[30:31]
2231 tail call void @llvm.memcpy.p3.p0.i64(ptr addrspace(3) noundef nonnull align 2 %dst, ptr addrspace(0) noundef nonnull align 2 %src, i64 16, i1 false)
2235 define void @memcpy_p3_p0_sz31_align_2_2(ptr addrspace(3) align 2 %dst, ptr addrspace(0) align 2 readonly %src) {
2236 ; CHECK-LABEL: memcpy_p3_p0_sz31_align_2_2:
2237 ; CHECK: ; %bb.0: ; %entry
2238 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2239 ; CHECK-NEXT: s_clause 0x2
2240 ; CHECK-NEXT: flat_load_dwordx2 v[5:6], v[1:2] offset:23
2241 ; CHECK-NEXT: flat_load_dwordx2 v[7:8], v[1:2] offset:16
2242 ; CHECK-NEXT: flat_load_dwordx4 v[1:4], v[1:2]
2243 ; CHECK-NEXT: s_waitcnt vmcnt(2) lgkmcnt(2)
2244 ; CHECK-NEXT: ds_write_b64 v0, v[5:6] offset:23
2245 ; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(2)
2246 ; CHECK-NEXT: ds_write_b64 v0, v[7:8] offset:16
2247 ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(2)
2248 ; CHECK-NEXT: ds_write2_b64 v0, v[1:2], v[3:4] offset1:1
2249 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
2250 ; CHECK-NEXT: s_setpc_b64 s[30:31]
2252 tail call void @llvm.memcpy.p3.p0.i64(ptr addrspace(3) noundef nonnull align 2 %dst, ptr addrspace(0) noundef nonnull align 2 %src, i64 31, i1 false)
2256 define void @memcpy_p3_p0_sz32_align_2_2(ptr addrspace(3) align 2 %dst, ptr addrspace(0) align 2 readonly %src) {
2257 ; CHECK-LABEL: memcpy_p3_p0_sz32_align_2_2:
2258 ; CHECK: ; %bb.0: ; %entry
2259 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2260 ; CHECK-NEXT: s_clause 0x1
2261 ; CHECK-NEXT: flat_load_dwordx4 v[3:6], v[1:2] offset:16
2262 ; CHECK-NEXT: flat_load_dwordx4 v[7:10], v[1:2]
2263 ; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1)
2264 ; CHECK-NEXT: ds_write2_b64 v0, v[3:4], v[5:6] offset0:2 offset1:3
2265 ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(1)
2266 ; CHECK-NEXT: ds_write2_b64 v0, v[7:8], v[9:10] offset1:1
2267 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
2268 ; CHECK-NEXT: s_setpc_b64 s[30:31]
2270 tail call void @llvm.memcpy.p3.p0.i64(ptr addrspace(3) noundef nonnull align 2 %dst, ptr addrspace(0) noundef nonnull align 2 %src, i64 32, i1 false)
2274 define void @memcpy_p3_p0_sz16_align_8_8(ptr addrspace(3) align 8 %dst, ptr addrspace(0) align 8 readonly %src) {
2275 ; CHECK-LABEL: memcpy_p3_p0_sz16_align_8_8:
2276 ; CHECK: ; %bb.0: ; %entry
2277 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2278 ; CHECK-NEXT: flat_load_dwordx4 v[1:4], v[1:2]
2279 ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2280 ; CHECK-NEXT: ds_write2_b64 v0, v[1:2], v[3:4] offset1:1
2281 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
2282 ; CHECK-NEXT: s_setpc_b64 s[30:31]
2284 tail call void @llvm.memcpy.p3.p0.i64(ptr addrspace(3) noundef nonnull align 8 %dst, ptr addrspace(0) noundef nonnull align 8 %src, i64 16, i1 false)
2288 define void @memcpy_p3_p0_sz31_align_8_8(ptr addrspace(3) align 8 %dst, ptr addrspace(0) align 8 readonly %src) {
2289 ; CHECK-LABEL: memcpy_p3_p0_sz31_align_8_8:
2290 ; CHECK: ; %bb.0: ; %entry
2291 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2292 ; CHECK-NEXT: s_clause 0x1
2293 ; CHECK-NEXT: flat_load_dwordx4 v[3:6], v[1:2]
2294 ; CHECK-NEXT: flat_load_dwordx4 v[7:10], v[1:2] offset:15
2295 ; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1)
2296 ; CHECK-NEXT: ds_write2_b64 v0, v[3:4], v[5:6] offset1:1
2297 ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(1)
2298 ; CHECK-NEXT: ds_write_b128 v0, v[7:10] offset:15
2299 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
2300 ; CHECK-NEXT: s_setpc_b64 s[30:31]
2302 tail call void @llvm.memcpy.p3.p0.i64(ptr addrspace(3) noundef nonnull align 8 %dst, ptr addrspace(0) noundef nonnull align 8 %src, i64 31, i1 false)
2306 define void @memcpy_p3_p0_sz32_align_8_8(ptr addrspace(3) align 8 %dst, ptr addrspace(0) align 8 readonly %src) {
2307 ; CHECK-LABEL: memcpy_p3_p0_sz32_align_8_8:
2308 ; CHECK: ; %bb.0: ; %entry
2309 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2310 ; CHECK-NEXT: s_clause 0x1
2311 ; CHECK-NEXT: flat_load_dwordx4 v[3:6], v[1:2] offset:16
2312 ; CHECK-NEXT: flat_load_dwordx4 v[7:10], v[1:2]
2313 ; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1)
2314 ; CHECK-NEXT: ds_write2_b64 v0, v[3:4], v[5:6] offset0:2 offset1:3
2315 ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(1)
2316 ; CHECK-NEXT: ds_write2_b64 v0, v[7:8], v[9:10] offset1:1
2317 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
2318 ; CHECK-NEXT: s_setpc_b64 s[30:31]
2320 tail call void @llvm.memcpy.p3.p0.i64(ptr addrspace(3) noundef nonnull align 8 %dst, ptr addrspace(0) noundef nonnull align 8 %src, i64 32, i1 false)
2324 define void @memcpy_p3_p0_sz16_align_16_16(ptr addrspace(3) align 16 %dst, ptr addrspace(0) align 16 readonly %src) {
2325 ; CHECK-LABEL: memcpy_p3_p0_sz16_align_16_16:
2326 ; CHECK: ; %bb.0: ; %entry
2327 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2328 ; CHECK-NEXT: flat_load_dwordx4 v[1:4], v[1:2]
2329 ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2330 ; CHECK-NEXT: ds_write_b128 v0, v[1:4]
2331 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
2332 ; CHECK-NEXT: s_setpc_b64 s[30:31]
2334 tail call void @llvm.memcpy.p3.p0.i64(ptr addrspace(3) noundef nonnull align 16 %dst, ptr addrspace(0) noundef nonnull align 16 %src, i64 16, i1 false)
2338 define void @memcpy_p3_p0_sz31_align_16_16(ptr addrspace(3) align 16 %dst, ptr addrspace(0) align 16 readonly %src) {
2339 ; CHECK-LABEL: memcpy_p3_p0_sz31_align_16_16:
2340 ; CHECK: ; %bb.0: ; %entry
2341 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2342 ; CHECK-NEXT: s_clause 0x1
2343 ; CHECK-NEXT: flat_load_dwordx4 v[3:6], v[1:2] offset:15
2344 ; CHECK-NEXT: flat_load_dwordx4 v[7:10], v[1:2]
2345 ; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1)
2346 ; CHECK-NEXT: ds_write_b128 v0, v[3:6] offset:15
2347 ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(1)
2348 ; CHECK-NEXT: ds_write_b128 v0, v[7:10]
2349 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
2350 ; CHECK-NEXT: s_setpc_b64 s[30:31]
2352 tail call void @llvm.memcpy.p3.p0.i64(ptr addrspace(3) noundef nonnull align 16 %dst, ptr addrspace(0) noundef nonnull align 16 %src, i64 31, i1 false)
2356 define void @memcpy_p3_p0_sz32_align_16_16(ptr addrspace(3) align 16 %dst, ptr addrspace(0) align 16 readonly %src) {
2357 ; CHECK-LABEL: memcpy_p3_p0_sz32_align_16_16:
2358 ; CHECK: ; %bb.0: ; %entry
2359 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2360 ; CHECK-NEXT: s_clause 0x1
2361 ; CHECK-NEXT: flat_load_dwordx4 v[3:6], v[1:2] offset:16
2362 ; CHECK-NEXT: flat_load_dwordx4 v[7:10], v[1:2]
2363 ; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1)
2364 ; CHECK-NEXT: ds_write_b128 v0, v[3:6] offset:16
2365 ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(1)
2366 ; CHECK-NEXT: ds_write_b128 v0, v[7:10]
2367 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
2368 ; CHECK-NEXT: s_setpc_b64 s[30:31]
2370 tail call void @llvm.memcpy.p3.p0.i64(ptr addrspace(3) noundef nonnull align 16 %dst, ptr addrspace(0) noundef nonnull align 16 %src, i64 32, i1 false)
2374 define void @memcpy_p3_p1_sz16_align_1_1(ptr addrspace(3) align 1 %dst, ptr addrspace(1) align 1 readonly %src) {
2375 ; CHECK-LABEL: memcpy_p3_p1_sz16_align_1_1:
2376 ; CHECK: ; %bb.0: ; %entry
2377 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2378 ; CHECK-NEXT: global_load_dwordx4 v[1:4], v[1:2], off
2379 ; CHECK-NEXT: s_waitcnt vmcnt(0)
2380 ; CHECK-NEXT: ds_write2_b64 v0, v[1:2], v[3:4] offset1:1
2381 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
2382 ; CHECK-NEXT: s_setpc_b64 s[30:31]
2384 tail call void @llvm.memcpy.p3.p1.i64(ptr addrspace(3) noundef nonnull align 1 %dst, ptr addrspace(1) noundef nonnull align 1 %src, i64 16, i1 false)
2388 define void @memcpy_p3_p1_sz31_align_1_1(ptr addrspace(3) align 1 %dst, ptr addrspace(1) align 1 readonly %src) {
2389 ; CHECK-LABEL: memcpy_p3_p1_sz31_align_1_1:
2390 ; CHECK: ; %bb.0: ; %entry
2391 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2392 ; CHECK-NEXT: s_clause 0x2
2393 ; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off
2394 ; CHECK-NEXT: global_load_dwordx2 v[7:8], v[1:2], off offset:16
2395 ; CHECK-NEXT: global_load_dwordx2 v[1:2], v[1:2], off offset:23
2396 ; CHECK-NEXT: s_waitcnt vmcnt(2)
2397 ; CHECK-NEXT: ds_write2_b64 v0, v[3:4], v[5:6] offset1:1
2398 ; CHECK-NEXT: s_waitcnt vmcnt(1)
2399 ; CHECK-NEXT: ds_write_b64 v0, v[7:8] offset:16
2400 ; CHECK-NEXT: s_waitcnt vmcnt(0)
2401 ; CHECK-NEXT: ds_write_b64 v0, v[1:2] offset:23
2402 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
2403 ; CHECK-NEXT: s_setpc_b64 s[30:31]
2405 tail call void @llvm.memcpy.p3.p1.i64(ptr addrspace(3) noundef nonnull align 1 %dst, ptr addrspace(1) noundef nonnull align 1 %src, i64 31, i1 false)
2409 define void @memcpy_p3_p1_sz32_align_1_1(ptr addrspace(3) align 1 %dst, ptr addrspace(1) align 1 readonly %src) {
2410 ; CHECK-LABEL: memcpy_p3_p1_sz32_align_1_1:
2411 ; CHECK: ; %bb.0: ; %entry
2412 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2413 ; CHECK-NEXT: s_clause 0x1
2414 ; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off
2415 ; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:16
2416 ; CHECK-NEXT: s_waitcnt vmcnt(1)
2417 ; CHECK-NEXT: ds_write2_b64 v0, v[3:4], v[5:6] offset1:1
2418 ; CHECK-NEXT: s_waitcnt vmcnt(0)
2419 ; CHECK-NEXT: ds_write2_b64 v0, v[7:8], v[9:10] offset0:2 offset1:3
2420 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
2421 ; CHECK-NEXT: s_setpc_b64 s[30:31]
2423 tail call void @llvm.memcpy.p3.p1.i64(ptr addrspace(3) noundef nonnull align 1 %dst, ptr addrspace(1) noundef nonnull align 1 %src, i64 32, i1 false)
2427 define void @memcpy_p3_p1_sz16_align_2_2(ptr addrspace(3) align 2 %dst, ptr addrspace(1) align 2 readonly %src) {
2428 ; CHECK-LABEL: memcpy_p3_p1_sz16_align_2_2:
2429 ; CHECK: ; %bb.0: ; %entry
2430 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2431 ; CHECK-NEXT: global_load_dwordx4 v[1:4], v[1:2], off
2432 ; CHECK-NEXT: s_waitcnt vmcnt(0)
2433 ; CHECK-NEXT: ds_write2_b64 v0, v[1:2], v[3:4] offset1:1
2434 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
2435 ; CHECK-NEXT: s_setpc_b64 s[30:31]
2437 tail call void @llvm.memcpy.p3.p1.i64(ptr addrspace(3) noundef nonnull align 2 %dst, ptr addrspace(1) noundef nonnull align 2 %src, i64 16, i1 false)
2441 define void @memcpy_p3_p1_sz31_align_2_2(ptr addrspace(3) align 2 %dst, ptr addrspace(1) align 2 readonly %src) {
2442 ; CHECK-LABEL: memcpy_p3_p1_sz31_align_2_2:
2443 ; CHECK: ; %bb.0: ; %entry
2444 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2445 ; CHECK-NEXT: s_clause 0x2
2446 ; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off
2447 ; CHECK-NEXT: global_load_dwordx2 v[7:8], v[1:2], off offset:16
2448 ; CHECK-NEXT: global_load_dwordx2 v[1:2], v[1:2], off offset:23
2449 ; CHECK-NEXT: s_waitcnt vmcnt(2)
2450 ; CHECK-NEXT: ds_write2_b64 v0, v[3:4], v[5:6] offset1:1
2451 ; CHECK-NEXT: s_waitcnt vmcnt(1)
2452 ; CHECK-NEXT: ds_write_b64 v0, v[7:8] offset:16
2453 ; CHECK-NEXT: s_waitcnt vmcnt(0)
2454 ; CHECK-NEXT: ds_write_b64 v0, v[1:2] offset:23
2455 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
2456 ; CHECK-NEXT: s_setpc_b64 s[30:31]
2458 tail call void @llvm.memcpy.p3.p1.i64(ptr addrspace(3) noundef nonnull align 2 %dst, ptr addrspace(1) noundef nonnull align 2 %src, i64 31, i1 false)
2462 define void @memcpy_p3_p1_sz32_align_2_2(ptr addrspace(3) align 2 %dst, ptr addrspace(1) align 2 readonly %src) {
2463 ; CHECK-LABEL: memcpy_p3_p1_sz32_align_2_2:
2464 ; CHECK: ; %bb.0: ; %entry
2465 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2466 ; CHECK-NEXT: s_clause 0x1
2467 ; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off
2468 ; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:16
2469 ; CHECK-NEXT: s_waitcnt vmcnt(1)
2470 ; CHECK-NEXT: ds_write2_b64 v0, v[3:4], v[5:6] offset1:1
2471 ; CHECK-NEXT: s_waitcnt vmcnt(0)
2472 ; CHECK-NEXT: ds_write2_b64 v0, v[7:8], v[9:10] offset0:2 offset1:3
2473 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
2474 ; CHECK-NEXT: s_setpc_b64 s[30:31]
2476 tail call void @llvm.memcpy.p3.p1.i64(ptr addrspace(3) noundef nonnull align 2 %dst, ptr addrspace(1) noundef nonnull align 2 %src, i64 32, i1 false)
2480 define void @memcpy_p3_p1_sz16_align_8_8(ptr addrspace(3) align 8 %dst, ptr addrspace(1) align 8 readonly %src) {
2481 ; CHECK-LABEL: memcpy_p3_p1_sz16_align_8_8:
2482 ; CHECK: ; %bb.0: ; %entry
2483 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2484 ; CHECK-NEXT: global_load_dwordx4 v[1:4], v[1:2], off
2485 ; CHECK-NEXT: s_waitcnt vmcnt(0)
2486 ; CHECK-NEXT: ds_write2_b64 v0, v[1:2], v[3:4] offset1:1
2487 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
2488 ; CHECK-NEXT: s_setpc_b64 s[30:31]
2490 tail call void @llvm.memcpy.p3.p1.i64(ptr addrspace(3) noundef nonnull align 8 %dst, ptr addrspace(1) noundef nonnull align 8 %src, i64 16, i1 false)
2494 define void @memcpy_p3_p1_sz31_align_8_8(ptr addrspace(3) align 8 %dst, ptr addrspace(1) align 8 readonly %src) {
2495 ; CHECK-LABEL: memcpy_p3_p1_sz31_align_8_8:
2496 ; CHECK: ; %bb.0: ; %entry
2497 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2498 ; CHECK-NEXT: s_clause 0x1
2499 ; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off
2500 ; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:15
2501 ; CHECK-NEXT: s_waitcnt vmcnt(1)
2502 ; CHECK-NEXT: ds_write2_b64 v0, v[3:4], v[5:6] offset1:1
2503 ; CHECK-NEXT: s_waitcnt vmcnt(0)
2504 ; CHECK-NEXT: ds_write_b128 v0, v[7:10] offset:15
2505 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
2506 ; CHECK-NEXT: s_setpc_b64 s[30:31]
2508 tail call void @llvm.memcpy.p3.p1.i64(ptr addrspace(3) noundef nonnull align 8 %dst, ptr addrspace(1) noundef nonnull align 8 %src, i64 31, i1 false)
2512 define void @memcpy_p3_p1_sz32_align_8_8(ptr addrspace(3) align 8 %dst, ptr addrspace(1) align 8 readonly %src) {
2513 ; CHECK-LABEL: memcpy_p3_p1_sz32_align_8_8:
2514 ; CHECK: ; %bb.0: ; %entry
2515 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2516 ; CHECK-NEXT: s_clause 0x1
2517 ; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off
2518 ; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:16
2519 ; CHECK-NEXT: s_waitcnt vmcnt(1)
2520 ; CHECK-NEXT: ds_write2_b64 v0, v[3:4], v[5:6] offset1:1
2521 ; CHECK-NEXT: s_waitcnt vmcnt(0)
2522 ; CHECK-NEXT: ds_write2_b64 v0, v[7:8], v[9:10] offset0:2 offset1:3
2523 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
2524 ; CHECK-NEXT: s_setpc_b64 s[30:31]
2526 tail call void @llvm.memcpy.p3.p1.i64(ptr addrspace(3) noundef nonnull align 8 %dst, ptr addrspace(1) noundef nonnull align 8 %src, i64 32, i1 false)
2530 define void @memcpy_p3_p1_sz16_align_16_16(ptr addrspace(3) align 16 %dst, ptr addrspace(1) align 16 readonly %src) {
2531 ; CHECK-LABEL: memcpy_p3_p1_sz16_align_16_16:
2532 ; CHECK: ; %bb.0: ; %entry
2533 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2534 ; CHECK-NEXT: global_load_dwordx4 v[1:4], v[1:2], off
2535 ; CHECK-NEXT: s_waitcnt vmcnt(0)
2536 ; CHECK-NEXT: ds_write_b128 v0, v[1:4]
2537 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
2538 ; CHECK-NEXT: s_setpc_b64 s[30:31]
2540 tail call void @llvm.memcpy.p3.p1.i64(ptr addrspace(3) noundef nonnull align 16 %dst, ptr addrspace(1) noundef nonnull align 16 %src, i64 16, i1 false)
2544 define void @memcpy_p3_p1_sz31_align_16_16(ptr addrspace(3) align 16 %dst, ptr addrspace(1) align 16 readonly %src) {
2545 ; CHECK-LABEL: memcpy_p3_p1_sz31_align_16_16:
2546 ; CHECK: ; %bb.0: ; %entry
2547 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2548 ; CHECK-NEXT: s_clause 0x1
2549 ; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off
2550 ; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:15
2551 ; CHECK-NEXT: s_waitcnt vmcnt(1)
2552 ; CHECK-NEXT: ds_write_b128 v0, v[3:6]
2553 ; CHECK-NEXT: s_waitcnt vmcnt(0)
2554 ; CHECK-NEXT: ds_write_b128 v0, v[7:10] offset:15
2555 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
2556 ; CHECK-NEXT: s_setpc_b64 s[30:31]
2558 tail call void @llvm.memcpy.p3.p1.i64(ptr addrspace(3) noundef nonnull align 16 %dst, ptr addrspace(1) noundef nonnull align 16 %src, i64 31, i1 false)
2562 define void @memcpy_p3_p1_sz32_align_16_16(ptr addrspace(3) align 16 %dst, ptr addrspace(1) align 16 readonly %src) {
2563 ; CHECK-LABEL: memcpy_p3_p1_sz32_align_16_16:
2564 ; CHECK: ; %bb.0: ; %entry
2565 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2566 ; CHECK-NEXT: s_clause 0x1
2567 ; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off
2568 ; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:16
2569 ; CHECK-NEXT: s_waitcnt vmcnt(1)
2570 ; CHECK-NEXT: ds_write_b128 v0, v[3:6]
2571 ; CHECK-NEXT: s_waitcnt vmcnt(0)
2572 ; CHECK-NEXT: ds_write_b128 v0, v[7:10] offset:16
2573 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
2574 ; CHECK-NEXT: s_setpc_b64 s[30:31]
2576 tail call void @llvm.memcpy.p3.p1.i64(ptr addrspace(3) noundef nonnull align 16 %dst, ptr addrspace(1) noundef nonnull align 16 %src, i64 32, i1 false)
2580 define void @memcpy_p3_p3_sz16_align_1_1(ptr addrspace(3) align 1 %dst, ptr addrspace(3) align 1 readonly %src) {
2581 ; CHECK-LABEL: memcpy_p3_p3_sz16_align_1_1:
2582 ; CHECK: ; %bb.0: ; %entry
2583 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2584 ; CHECK-NEXT: ds_read2_b64 v[1:4], v1 offset1:1
2585 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
2586 ; CHECK-NEXT: ds_write2_b64 v0, v[1:2], v[3:4] offset1:1
2587 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
2588 ; CHECK-NEXT: s_setpc_b64 s[30:31]
2590 tail call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noundef nonnull align 1 %dst, ptr addrspace(3) noundef nonnull align 1 %src, i64 16, i1 false)
2594 define void @memcpy_p3_p3_sz31_align_1_1(ptr addrspace(3) align 1 %dst, ptr addrspace(3) align 1 readonly %src) {
2595 ; CHECK-LABEL: memcpy_p3_p3_sz31_align_1_1:
2596 ; CHECK: ; %bb.0: ; %entry
2597 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2598 ; CHECK-NEXT: ds_read_b64 v[5:6], v1 offset:23
2599 ; CHECK-NEXT: ds_read_b64 v[7:8], v1 offset:16
2600 ; CHECK-NEXT: ds_read2_b64 v[1:4], v1 offset1:1
2601 ; CHECK-NEXT: s_waitcnt lgkmcnt(2)
2602 ; CHECK-NEXT: ds_write_b64 v0, v[5:6] offset:23
2603 ; CHECK-NEXT: s_waitcnt lgkmcnt(2)
2604 ; CHECK-NEXT: ds_write_b64 v0, v[7:8] offset:16
2605 ; CHECK-NEXT: s_waitcnt lgkmcnt(2)
2606 ; CHECK-NEXT: ds_write2_b64 v0, v[1:2], v[3:4] offset1:1
2607 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
2608 ; CHECK-NEXT: s_setpc_b64 s[30:31]
2610 tail call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noundef nonnull align 1 %dst, ptr addrspace(3) noundef nonnull align 1 %src, i64 31, i1 false)
2614 define void @memcpy_p3_p3_sz32_align_1_1(ptr addrspace(3) align 1 %dst, ptr addrspace(3) align 1 readonly %src) {
2615 ; CHECK-LABEL: memcpy_p3_p3_sz32_align_1_1:
2616 ; CHECK: ; %bb.0: ; %entry
2617 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2618 ; CHECK-NEXT: ds_read2_b64 v[2:5], v1 offset0:2 offset1:3
2619 ; CHECK-NEXT: ds_read2_b64 v[6:9], v1 offset1:1
2620 ; CHECK-NEXT: s_waitcnt lgkmcnt(1)
2621 ; CHECK-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset0:2 offset1:3
2622 ; CHECK-NEXT: s_waitcnt lgkmcnt(1)
2623 ; CHECK-NEXT: ds_write2_b64 v0, v[6:7], v[8:9] offset1:1
2624 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
2625 ; CHECK-NEXT: s_setpc_b64 s[30:31]
2627 tail call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noundef nonnull align 1 %dst, ptr addrspace(3) noundef nonnull align 1 %src, i64 32, i1 false)
2631 define void @memcpy_p3_p3_sz16_align_2_2(ptr addrspace(3) align 2 %dst, ptr addrspace(3) align 2 readonly %src) {
2632 ; CHECK-LABEL: memcpy_p3_p3_sz16_align_2_2:
2633 ; CHECK: ; %bb.0: ; %entry
2634 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2635 ; CHECK-NEXT: ds_read2_b64 v[1:4], v1 offset1:1
2636 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
2637 ; CHECK-NEXT: ds_write2_b64 v0, v[1:2], v[3:4] offset1:1
2638 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
2639 ; CHECK-NEXT: s_setpc_b64 s[30:31]
2641 tail call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noundef nonnull align 2 %dst, ptr addrspace(3) noundef nonnull align 2 %src, i64 16, i1 false)
2645 define void @memcpy_p3_p3_sz31_align_2_2(ptr addrspace(3) align 2 %dst, ptr addrspace(3) align 2 readonly %src) {
2646 ; CHECK-LABEL: memcpy_p3_p3_sz31_align_2_2:
2647 ; CHECK: ; %bb.0: ; %entry
2648 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2649 ; CHECK-NEXT: ds_read_b64 v[5:6], v1 offset:23
2650 ; CHECK-NEXT: ds_read_b64 v[7:8], v1 offset:16
2651 ; CHECK-NEXT: ds_read2_b64 v[1:4], v1 offset1:1
2652 ; CHECK-NEXT: s_waitcnt lgkmcnt(2)
2653 ; CHECK-NEXT: ds_write_b64 v0, v[5:6] offset:23
2654 ; CHECK-NEXT: s_waitcnt lgkmcnt(2)
2655 ; CHECK-NEXT: ds_write_b64 v0, v[7:8] offset:16
2656 ; CHECK-NEXT: s_waitcnt lgkmcnt(2)
2657 ; CHECK-NEXT: ds_write2_b64 v0, v[1:2], v[3:4] offset1:1
2658 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
2659 ; CHECK-NEXT: s_setpc_b64 s[30:31]
2661 tail call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noundef nonnull align 2 %dst, ptr addrspace(3) noundef nonnull align 2 %src, i64 31, i1 false)
2665 define void @memcpy_p3_p3_sz32_align_2_2(ptr addrspace(3) align 2 %dst, ptr addrspace(3) align 2 readonly %src) {
2666 ; CHECK-LABEL: memcpy_p3_p3_sz32_align_2_2:
2667 ; CHECK: ; %bb.0: ; %entry
2668 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2669 ; CHECK-NEXT: ds_read2_b64 v[2:5], v1 offset0:2 offset1:3
2670 ; CHECK-NEXT: ds_read2_b64 v[6:9], v1 offset1:1
2671 ; CHECK-NEXT: s_waitcnt lgkmcnt(1)
2672 ; CHECK-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset0:2 offset1:3
2673 ; CHECK-NEXT: s_waitcnt lgkmcnt(1)
2674 ; CHECK-NEXT: ds_write2_b64 v0, v[6:7], v[8:9] offset1:1
2675 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
2676 ; CHECK-NEXT: s_setpc_b64 s[30:31]
2678 tail call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noundef nonnull align 2 %dst, ptr addrspace(3) noundef nonnull align 2 %src, i64 32, i1 false)
2682 define void @memcpy_p3_p3_sz16_align_8_8(ptr addrspace(3) align 8 %dst, ptr addrspace(3) align 8 readonly %src) {
2683 ; CHECK-LABEL: memcpy_p3_p3_sz16_align_8_8:
2684 ; CHECK: ; %bb.0: ; %entry
2685 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2686 ; CHECK-NEXT: ds_read2_b64 v[1:4], v1 offset1:1
2687 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
2688 ; CHECK-NEXT: ds_write2_b64 v0, v[1:2], v[3:4] offset1:1
2689 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
2690 ; CHECK-NEXT: s_setpc_b64 s[30:31]
2692 tail call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noundef nonnull align 8 %dst, ptr addrspace(3) noundef nonnull align 8 %src, i64 16, i1 false)
2696 define void @memcpy_p3_p3_sz31_align_8_8(ptr addrspace(3) align 8 %dst, ptr addrspace(3) align 8 readonly %src) {
2697 ; CHECK-LABEL: memcpy_p3_p3_sz31_align_8_8:
2698 ; CHECK: ; %bb.0: ; %entry
2699 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2700 ; CHECK-NEXT: ds_read2_b64 v[2:5], v1 offset1:1
2701 ; CHECK-NEXT: ds_read_b128 v[6:9], v1 offset:15
2702 ; CHECK-NEXT: s_waitcnt lgkmcnt(1)
2703 ; CHECK-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset1:1
2704 ; CHECK-NEXT: s_waitcnt lgkmcnt(1)
2705 ; CHECK-NEXT: ds_write_b128 v0, v[6:9] offset:15
2706 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
2707 ; CHECK-NEXT: s_setpc_b64 s[30:31]
2709 tail call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noundef nonnull align 8 %dst, ptr addrspace(3) noundef nonnull align 8 %src, i64 31, i1 false)
2713 define void @memcpy_p3_p3_sz32_align_8_8(ptr addrspace(3) align 8 %dst, ptr addrspace(3) align 8 readonly %src) {
2714 ; CHECK-LABEL: memcpy_p3_p3_sz32_align_8_8:
2715 ; CHECK: ; %bb.0: ; %entry
2716 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2717 ; CHECK-NEXT: ds_read2_b64 v[2:5], v1 offset0:2 offset1:3
2718 ; CHECK-NEXT: ds_read2_b64 v[6:9], v1 offset1:1
2719 ; CHECK-NEXT: s_waitcnt lgkmcnt(1)
2720 ; CHECK-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset0:2 offset1:3
2721 ; CHECK-NEXT: s_waitcnt lgkmcnt(1)
2722 ; CHECK-NEXT: ds_write2_b64 v0, v[6:7], v[8:9] offset1:1
2723 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
2724 ; CHECK-NEXT: s_setpc_b64 s[30:31]
2726 tail call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noundef nonnull align 8 %dst, ptr addrspace(3) noundef nonnull align 8 %src, i64 32, i1 false)
2730 define void @memcpy_p3_p3_sz16_align_16_16(ptr addrspace(3) align 16 %dst, ptr addrspace(3) align 16 readonly %src) {
2731 ; CHECK-LABEL: memcpy_p3_p3_sz16_align_16_16:
2732 ; CHECK: ; %bb.0: ; %entry
2733 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2734 ; CHECK-NEXT: ds_read_b128 v[1:4], v1
2735 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
2736 ; CHECK-NEXT: ds_write_b128 v0, v[1:4]
2737 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
2738 ; CHECK-NEXT: s_setpc_b64 s[30:31]
2740 tail call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noundef nonnull align 16 %dst, ptr addrspace(3) noundef nonnull align 16 %src, i64 16, i1 false)
2744 define void @memcpy_p3_p3_sz31_align_16_16(ptr addrspace(3) align 16 %dst, ptr addrspace(3) align 16 readonly %src) {
2745 ; CHECK-LABEL: memcpy_p3_p3_sz31_align_16_16:
2746 ; CHECK: ; %bb.0: ; %entry
2747 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2748 ; CHECK-NEXT: ds_read_b128 v[2:5], v1 offset:15
2749 ; CHECK-NEXT: ds_read_b128 v[6:9], v1
2750 ; CHECK-NEXT: s_waitcnt lgkmcnt(1)
2751 ; CHECK-NEXT: ds_write_b128 v0, v[2:5] offset:15
2752 ; CHECK-NEXT: s_waitcnt lgkmcnt(1)
2753 ; CHECK-NEXT: ds_write_b128 v0, v[6:9]
2754 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
2755 ; CHECK-NEXT: s_setpc_b64 s[30:31]
2757 tail call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noundef nonnull align 16 %dst, ptr addrspace(3) noundef nonnull align 16 %src, i64 31, i1 false)
2761 define void @memcpy_p3_p3_sz32_align_16_16(ptr addrspace(3) align 16 %dst, ptr addrspace(3) align 16 readonly %src) {
2762 ; CHECK-LABEL: memcpy_p3_p3_sz32_align_16_16:
2763 ; CHECK: ; %bb.0: ; %entry
2764 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2765 ; CHECK-NEXT: ds_read_b128 v[2:5], v1 offset:16
2766 ; CHECK-NEXT: ds_read_b128 v[6:9], v1
2767 ; CHECK-NEXT: s_waitcnt lgkmcnt(1)
2768 ; CHECK-NEXT: ds_write_b128 v0, v[2:5] offset:16
2769 ; CHECK-NEXT: s_waitcnt lgkmcnt(1)
2770 ; CHECK-NEXT: ds_write_b128 v0, v[6:9]
2771 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
2772 ; CHECK-NEXT: s_setpc_b64 s[30:31]
2774 tail call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noundef nonnull align 16 %dst, ptr addrspace(3) noundef nonnull align 16 %src, i64 32, i1 false)
2778 define void @memcpy_p3_p4_sz16_align_1_1(ptr addrspace(3) align 1 %dst, ptr addrspace(4) align 1 readonly %src) {
2779 ; CHECK-LABEL: memcpy_p3_p4_sz16_align_1_1:
2780 ; CHECK: ; %bb.0: ; %entry
2781 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2782 ; CHECK-NEXT: global_load_dwordx4 v[1:4], v[1:2], off
2783 ; CHECK-NEXT: s_waitcnt vmcnt(0)
2784 ; CHECK-NEXT: ds_write2_b64 v0, v[1:2], v[3:4] offset1:1
2785 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
2786 ; CHECK-NEXT: s_setpc_b64 s[30:31]
2788 tail call void @llvm.memcpy.p3.p4.i64(ptr addrspace(3) noundef nonnull align 1 %dst, ptr addrspace(4) noundef nonnull align 1 %src, i64 16, i1 false)
2792 define void @memcpy_p3_p4_sz31_align_1_1(ptr addrspace(3) align 1 %dst, ptr addrspace(4) align 1 readonly %src) {
2793 ; CHECK-LABEL: memcpy_p3_p4_sz31_align_1_1:
2794 ; CHECK: ; %bb.0: ; %entry
2795 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2796 ; CHECK-NEXT: s_clause 0x2
2797 ; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off
2798 ; CHECK-NEXT: global_load_dwordx2 v[7:8], v[1:2], off offset:16
2799 ; CHECK-NEXT: global_load_dwordx2 v[1:2], v[1:2], off offset:23
2800 ; CHECK-NEXT: s_waitcnt vmcnt(2)
2801 ; CHECK-NEXT: ds_write2_b64 v0, v[3:4], v[5:6] offset1:1
2802 ; CHECK-NEXT: s_waitcnt vmcnt(1)
2803 ; CHECK-NEXT: ds_write_b64 v0, v[7:8] offset:16
2804 ; CHECK-NEXT: s_waitcnt vmcnt(0)
2805 ; CHECK-NEXT: ds_write_b64 v0, v[1:2] offset:23
2806 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
2807 ; CHECK-NEXT: s_setpc_b64 s[30:31]
2809 tail call void @llvm.memcpy.p3.p4.i64(ptr addrspace(3) noundef nonnull align 1 %dst, ptr addrspace(4) noundef nonnull align 1 %src, i64 31, i1 false)
2813 define void @memcpy_p3_p4_sz32_align_1_1(ptr addrspace(3) align 1 %dst, ptr addrspace(4) align 1 readonly %src) {
2814 ; CHECK-LABEL: memcpy_p3_p4_sz32_align_1_1:
2815 ; CHECK: ; %bb.0: ; %entry
2816 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2817 ; CHECK-NEXT: s_clause 0x1
2818 ; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off
2819 ; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:16
2820 ; CHECK-NEXT: s_waitcnt vmcnt(1)
2821 ; CHECK-NEXT: ds_write2_b64 v0, v[3:4], v[5:6] offset1:1
2822 ; CHECK-NEXT: s_waitcnt vmcnt(0)
2823 ; CHECK-NEXT: ds_write2_b64 v0, v[7:8], v[9:10] offset0:2 offset1:3
2824 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
2825 ; CHECK-NEXT: s_setpc_b64 s[30:31]
2827 tail call void @llvm.memcpy.p3.p4.i64(ptr addrspace(3) noundef nonnull align 1 %dst, ptr addrspace(4) noundef nonnull align 1 %src, i64 32, i1 false)
2831 define void @memcpy_p3_p4_sz16_align_2_2(ptr addrspace(3) align 2 %dst, ptr addrspace(4) align 2 readonly %src) {
2832 ; CHECK-LABEL: memcpy_p3_p4_sz16_align_2_2:
2833 ; CHECK: ; %bb.0: ; %entry
2834 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2835 ; CHECK-NEXT: global_load_dwordx4 v[1:4], v[1:2], off
2836 ; CHECK-NEXT: s_waitcnt vmcnt(0)
2837 ; CHECK-NEXT: ds_write2_b64 v0, v[1:2], v[3:4] offset1:1
2838 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
2839 ; CHECK-NEXT: s_setpc_b64 s[30:31]
2841 tail call void @llvm.memcpy.p3.p4.i64(ptr addrspace(3) noundef nonnull align 2 %dst, ptr addrspace(4) noundef nonnull align 2 %src, i64 16, i1 false)
2845 define void @memcpy_p3_p4_sz31_align_2_2(ptr addrspace(3) align 2 %dst, ptr addrspace(4) align 2 readonly %src) {
2846 ; CHECK-LABEL: memcpy_p3_p4_sz31_align_2_2:
2847 ; CHECK: ; %bb.0: ; %entry
2848 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2849 ; CHECK-NEXT: s_clause 0x2
2850 ; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off
2851 ; CHECK-NEXT: global_load_dwordx2 v[7:8], v[1:2], off offset:16
2852 ; CHECK-NEXT: global_load_dwordx2 v[1:2], v[1:2], off offset:23
2853 ; CHECK-NEXT: s_waitcnt vmcnt(2)
2854 ; CHECK-NEXT: ds_write2_b64 v0, v[3:4], v[5:6] offset1:1
2855 ; CHECK-NEXT: s_waitcnt vmcnt(1)
2856 ; CHECK-NEXT: ds_write_b64 v0, v[7:8] offset:16
2857 ; CHECK-NEXT: s_waitcnt vmcnt(0)
2858 ; CHECK-NEXT: ds_write_b64 v0, v[1:2] offset:23
2859 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
2860 ; CHECK-NEXT: s_setpc_b64 s[30:31]
2862 tail call void @llvm.memcpy.p3.p4.i64(ptr addrspace(3) noundef nonnull align 2 %dst, ptr addrspace(4) noundef nonnull align 2 %src, i64 31, i1 false)
2866 define void @memcpy_p3_p4_sz32_align_2_2(ptr addrspace(3) align 2 %dst, ptr addrspace(4) align 2 readonly %src) {
2867 ; CHECK-LABEL: memcpy_p3_p4_sz32_align_2_2:
2868 ; CHECK: ; %bb.0: ; %entry
2869 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2870 ; CHECK-NEXT: s_clause 0x1
2871 ; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off
2872 ; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:16
2873 ; CHECK-NEXT: s_waitcnt vmcnt(1)
2874 ; CHECK-NEXT: ds_write2_b64 v0, v[3:4], v[5:6] offset1:1
2875 ; CHECK-NEXT: s_waitcnt vmcnt(0)
2876 ; CHECK-NEXT: ds_write2_b64 v0, v[7:8], v[9:10] offset0:2 offset1:3
2877 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
2878 ; CHECK-NEXT: s_setpc_b64 s[30:31]
2880 tail call void @llvm.memcpy.p3.p4.i64(ptr addrspace(3) noundef nonnull align 2 %dst, ptr addrspace(4) noundef nonnull align 2 %src, i64 32, i1 false)
2884 define void @memcpy_p3_p4_sz16_align_8_8(ptr addrspace(3) align 8 %dst, ptr addrspace(4) align 8 readonly %src) {
2885 ; CHECK-LABEL: memcpy_p3_p4_sz16_align_8_8:
2886 ; CHECK: ; %bb.0: ; %entry
2887 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2888 ; CHECK-NEXT: global_load_dwordx4 v[1:4], v[1:2], off
2889 ; CHECK-NEXT: s_waitcnt vmcnt(0)
2890 ; CHECK-NEXT: ds_write2_b64 v0, v[1:2], v[3:4] offset1:1
2891 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
2892 ; CHECK-NEXT: s_setpc_b64 s[30:31]
2894 tail call void @llvm.memcpy.p3.p4.i64(ptr addrspace(3) noundef nonnull align 8 %dst, ptr addrspace(4) noundef nonnull align 8 %src, i64 16, i1 false)
2898 define void @memcpy_p3_p4_sz31_align_8_8(ptr addrspace(3) align 8 %dst, ptr addrspace(4) align 8 readonly %src) {
2899 ; CHECK-LABEL: memcpy_p3_p4_sz31_align_8_8:
2900 ; CHECK: ; %bb.0: ; %entry
2901 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2902 ; CHECK-NEXT: s_clause 0x1
2903 ; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off
2904 ; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:15
2905 ; CHECK-NEXT: s_waitcnt vmcnt(1)
2906 ; CHECK-NEXT: ds_write2_b64 v0, v[3:4], v[5:6] offset1:1
2907 ; CHECK-NEXT: s_waitcnt vmcnt(0)
2908 ; CHECK-NEXT: ds_write_b128 v0, v[7:10] offset:15
2909 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
2910 ; CHECK-NEXT: s_setpc_b64 s[30:31]
2912 tail call void @llvm.memcpy.p3.p4.i64(ptr addrspace(3) noundef nonnull align 8 %dst, ptr addrspace(4) noundef nonnull align 8 %src, i64 31, i1 false)
2916 define void @memcpy_p3_p4_sz32_align_8_8(ptr addrspace(3) align 8 %dst, ptr addrspace(4) align 8 readonly %src) {
2917 ; CHECK-LABEL: memcpy_p3_p4_sz32_align_8_8:
2918 ; CHECK: ; %bb.0: ; %entry
2919 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2920 ; CHECK-NEXT: s_clause 0x1
2921 ; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off
2922 ; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:16
2923 ; CHECK-NEXT: s_waitcnt vmcnt(1)
2924 ; CHECK-NEXT: ds_write2_b64 v0, v[3:4], v[5:6] offset1:1
2925 ; CHECK-NEXT: s_waitcnt vmcnt(0)
2926 ; CHECK-NEXT: ds_write2_b64 v0, v[7:8], v[9:10] offset0:2 offset1:3
2927 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
2928 ; CHECK-NEXT: s_setpc_b64 s[30:31]
2930 tail call void @llvm.memcpy.p3.p4.i64(ptr addrspace(3) noundef nonnull align 8 %dst, ptr addrspace(4) noundef nonnull align 8 %src, i64 32, i1 false)
2934 define void @memcpy_p3_p4_sz16_align_16_16(ptr addrspace(3) align 16 %dst, ptr addrspace(4) align 16 readonly %src) {
2935 ; CHECK-LABEL: memcpy_p3_p4_sz16_align_16_16:
2936 ; CHECK: ; %bb.0: ; %entry
2937 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2938 ; CHECK-NEXT: global_load_dwordx4 v[1:4], v[1:2], off
2939 ; CHECK-NEXT: s_waitcnt vmcnt(0)
2940 ; CHECK-NEXT: ds_write_b128 v0, v[1:4]
2941 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
2942 ; CHECK-NEXT: s_setpc_b64 s[30:31]
2944 tail call void @llvm.memcpy.p3.p4.i64(ptr addrspace(3) noundef nonnull align 16 %dst, ptr addrspace(4) noundef nonnull align 16 %src, i64 16, i1 false)
2948 define void @memcpy_p3_p4_sz31_align_16_16(ptr addrspace(3) align 16 %dst, ptr addrspace(4) align 16 readonly %src) {
2949 ; CHECK-LABEL: memcpy_p3_p4_sz31_align_16_16:
2950 ; CHECK: ; %bb.0: ; %entry
2951 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2952 ; CHECK-NEXT: s_clause 0x1
2953 ; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off
2954 ; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:15
2955 ; CHECK-NEXT: s_waitcnt vmcnt(1)
2956 ; CHECK-NEXT: ds_write_b128 v0, v[3:6]
2957 ; CHECK-NEXT: s_waitcnt vmcnt(0)
2958 ; CHECK-NEXT: ds_write_b128 v0, v[7:10] offset:15
2959 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
2960 ; CHECK-NEXT: s_setpc_b64 s[30:31]
2962 tail call void @llvm.memcpy.p3.p4.i64(ptr addrspace(3) noundef nonnull align 16 %dst, ptr addrspace(4) noundef nonnull align 16 %src, i64 31, i1 false)
2966 define void @memcpy_p3_p4_sz32_align_16_16(ptr addrspace(3) align 16 %dst, ptr addrspace(4) align 16 readonly %src) {
2967 ; CHECK-LABEL: memcpy_p3_p4_sz32_align_16_16:
2968 ; CHECK: ; %bb.0: ; %entry
2969 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2970 ; CHECK-NEXT: s_clause 0x1
2971 ; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off
2972 ; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:16
2973 ; CHECK-NEXT: s_waitcnt vmcnt(1)
2974 ; CHECK-NEXT: ds_write_b128 v0, v[3:6]
2975 ; CHECK-NEXT: s_waitcnt vmcnt(0)
2976 ; CHECK-NEXT: ds_write_b128 v0, v[7:10] offset:16
2977 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
2978 ; CHECK-NEXT: s_setpc_b64 s[30:31]
2980 tail call void @llvm.memcpy.p3.p4.i64(ptr addrspace(3) noundef nonnull align 16 %dst, ptr addrspace(4) noundef nonnull align 16 %src, i64 32, i1 false)
2984 define void @memcpy_p3_p5_sz16_align_1_1(ptr addrspace(3) align 1 %dst, ptr addrspace(5) align 1 readonly %src) {
2985 ; CHECK-LABEL: memcpy_p3_p5_sz16_align_1_1:
2986 ; CHECK: ; %bb.0: ; %entry
2987 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2988 ; CHECK-NEXT: s_clause 0x3
2989 ; CHECK-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen
2990 ; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:4
2991 ; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:8
2992 ; CHECK-NEXT: buffer_load_dword v5, v1, s[0:3], 0 offen offset:12
2993 ; CHECK-NEXT: s_waitcnt vmcnt(0)
2994 ; CHECK-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset1:1
2995 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
2996 ; CHECK-NEXT: s_setpc_b64 s[30:31]
2998 tail call void @llvm.memcpy.p3.p5.i64(ptr addrspace(3) noundef nonnull align 1 %dst, ptr addrspace(5) noundef nonnull align 1 %src, i64 16, i1 false)
3002 define void @memcpy_p3_p5_sz31_align_1_1(ptr addrspace(3) align 1 %dst, ptr addrspace(5) align 1 readonly %src) {
3003 ; CHECK-LABEL: memcpy_p3_p5_sz31_align_1_1:
3004 ; CHECK: ; %bb.0: ; %entry
3005 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3006 ; CHECK-NEXT: s_clause 0x7
3007 ; CHECK-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen
3008 ; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:4
3009 ; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:8
3010 ; CHECK-NEXT: buffer_load_dword v5, v1, s[0:3], 0 offen offset:12
3011 ; CHECK-NEXT: buffer_load_dword v6, v1, s[0:3], 0 offen offset:16
3012 ; CHECK-NEXT: buffer_load_dword v7, v1, s[0:3], 0 offen offset:20
3013 ; CHECK-NEXT: buffer_load_dword v8, v1, s[0:3], 0 offen offset:23
3014 ; CHECK-NEXT: buffer_load_dword v9, v1, s[0:3], 0 offen offset:27
3015 ; CHECK-NEXT: s_waitcnt vmcnt(4)
3016 ; CHECK-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset1:1
3017 ; CHECK-NEXT: s_waitcnt vmcnt(2)
3018 ; CHECK-NEXT: ds_write_b64 v0, v[6:7] offset:16
3019 ; CHECK-NEXT: s_waitcnt vmcnt(0)
3020 ; CHECK-NEXT: ds_write_b64 v0, v[8:9] offset:23
3021 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
3022 ; CHECK-NEXT: s_setpc_b64 s[30:31]
3024 tail call void @llvm.memcpy.p3.p5.i64(ptr addrspace(3) noundef nonnull align 1 %dst, ptr addrspace(5) noundef nonnull align 1 %src, i64 31, i1 false)
3028 define void @memcpy_p3_p5_sz32_align_1_1(ptr addrspace(3) align 1 %dst, ptr addrspace(5) align 1 readonly %src) {
3029 ; CHECK-LABEL: memcpy_p3_p5_sz32_align_1_1:
3030 ; CHECK: ; %bb.0: ; %entry
3031 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3032 ; CHECK-NEXT: s_clause 0x7
3033 ; CHECK-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen
3034 ; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:4
3035 ; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:8
3036 ; CHECK-NEXT: buffer_load_dword v5, v1, s[0:3], 0 offen offset:12
3037 ; CHECK-NEXT: buffer_load_dword v6, v1, s[0:3], 0 offen offset:16
3038 ; CHECK-NEXT: buffer_load_dword v7, v1, s[0:3], 0 offen offset:20
3039 ; CHECK-NEXT: buffer_load_dword v8, v1, s[0:3], 0 offen offset:24
3040 ; CHECK-NEXT: buffer_load_dword v9, v1, s[0:3], 0 offen offset:28
3041 ; CHECK-NEXT: s_waitcnt vmcnt(4)
3042 ; CHECK-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset1:1
3043 ; CHECK-NEXT: s_waitcnt vmcnt(0)
3044 ; CHECK-NEXT: ds_write2_b64 v0, v[6:7], v[8:9] offset0:2 offset1:3
3045 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
3046 ; CHECK-NEXT: s_setpc_b64 s[30:31]
3048 tail call void @llvm.memcpy.p3.p5.i64(ptr addrspace(3) noundef nonnull align 1 %dst, ptr addrspace(5) noundef nonnull align 1 %src, i64 32, i1 false)
3052 define void @memcpy_p3_p5_sz16_align_2_2(ptr addrspace(3) align 2 %dst, ptr addrspace(5) align 2 readonly %src) {
3053 ; CHECK-LABEL: memcpy_p3_p5_sz16_align_2_2:
3054 ; CHECK: ; %bb.0: ; %entry
3055 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3056 ; CHECK-NEXT: s_clause 0x3
3057 ; CHECK-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen
3058 ; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:4
3059 ; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:8
3060 ; CHECK-NEXT: buffer_load_dword v5, v1, s[0:3], 0 offen offset:12
3061 ; CHECK-NEXT: s_waitcnt vmcnt(0)
3062 ; CHECK-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset1:1
3063 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
3064 ; CHECK-NEXT: s_setpc_b64 s[30:31]
3066 tail call void @llvm.memcpy.p3.p5.i64(ptr addrspace(3) noundef nonnull align 2 %dst, ptr addrspace(5) noundef nonnull align 2 %src, i64 16, i1 false)
3070 define void @memcpy_p3_p5_sz31_align_2_2(ptr addrspace(3) align 2 %dst, ptr addrspace(5) align 2 readonly %src) {
3071 ; CHECK-LABEL: memcpy_p3_p5_sz31_align_2_2:
3072 ; CHECK: ; %bb.0: ; %entry
3073 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3074 ; CHECK-NEXT: s_clause 0x7
3075 ; CHECK-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen
3076 ; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:4
3077 ; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:8
3078 ; CHECK-NEXT: buffer_load_dword v5, v1, s[0:3], 0 offen offset:12
3079 ; CHECK-NEXT: buffer_load_dword v6, v1, s[0:3], 0 offen offset:16
3080 ; CHECK-NEXT: buffer_load_dword v7, v1, s[0:3], 0 offen offset:20
3081 ; CHECK-NEXT: buffer_load_dword v8, v1, s[0:3], 0 offen offset:23
3082 ; CHECK-NEXT: buffer_load_dword v9, v1, s[0:3], 0 offen offset:27
3083 ; CHECK-NEXT: s_waitcnt vmcnt(4)
3084 ; CHECK-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset1:1
3085 ; CHECK-NEXT: s_waitcnt vmcnt(2)
3086 ; CHECK-NEXT: ds_write_b64 v0, v[6:7] offset:16
3087 ; CHECK-NEXT: s_waitcnt vmcnt(0)
3088 ; CHECK-NEXT: ds_write_b64 v0, v[8:9] offset:23
3089 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
3090 ; CHECK-NEXT: s_setpc_b64 s[30:31]
3092 tail call void @llvm.memcpy.p3.p5.i64(ptr addrspace(3) noundef nonnull align 2 %dst, ptr addrspace(5) noundef nonnull align 2 %src, i64 31, i1 false)
3096 define void @memcpy_p3_p5_sz32_align_2_2(ptr addrspace(3) align 2 %dst, ptr addrspace(5) align 2 readonly %src) {
3097 ; CHECK-LABEL: memcpy_p3_p5_sz32_align_2_2:
3098 ; CHECK: ; %bb.0: ; %entry
3099 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3100 ; CHECK-NEXT: s_clause 0x7
3101 ; CHECK-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen
3102 ; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:4
3103 ; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:8
3104 ; CHECK-NEXT: buffer_load_dword v5, v1, s[0:3], 0 offen offset:12
3105 ; CHECK-NEXT: buffer_load_dword v6, v1, s[0:3], 0 offen offset:16
3106 ; CHECK-NEXT: buffer_load_dword v7, v1, s[0:3], 0 offen offset:20
3107 ; CHECK-NEXT: buffer_load_dword v8, v1, s[0:3], 0 offen offset:24
3108 ; CHECK-NEXT: buffer_load_dword v9, v1, s[0:3], 0 offen offset:28
3109 ; CHECK-NEXT: s_waitcnt vmcnt(4)
3110 ; CHECK-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset1:1
3111 ; CHECK-NEXT: s_waitcnt vmcnt(0)
3112 ; CHECK-NEXT: ds_write2_b64 v0, v[6:7], v[8:9] offset0:2 offset1:3
3113 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
3114 ; CHECK-NEXT: s_setpc_b64 s[30:31]
3116 tail call void @llvm.memcpy.p3.p5.i64(ptr addrspace(3) noundef nonnull align 2 %dst, ptr addrspace(5) noundef nonnull align 2 %src, i64 32, i1 false)
3120 define void @memcpy_p3_p5_sz16_align_8_8(ptr addrspace(3) align 8 %dst, ptr addrspace(5) align 8 readonly %src) {
3121 ; CHECK-LABEL: memcpy_p3_p5_sz16_align_8_8:
3122 ; CHECK: ; %bb.0: ; %entry
3123 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3124 ; CHECK-NEXT: s_clause 0x3
3125 ; CHECK-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen
3126 ; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:4
3127 ; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:8
3128 ; CHECK-NEXT: buffer_load_dword v5, v1, s[0:3], 0 offen offset:12
3129 ; CHECK-NEXT: s_waitcnt vmcnt(0)
3130 ; CHECK-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset1:1
3131 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
3132 ; CHECK-NEXT: s_setpc_b64 s[30:31]
3134 tail call void @llvm.memcpy.p3.p5.i64(ptr addrspace(3) noundef nonnull align 8 %dst, ptr addrspace(5) noundef nonnull align 8 %src, i64 16, i1 false)
3138 define void @memcpy_p3_p5_sz31_align_8_8(ptr addrspace(3) align 8 %dst, ptr addrspace(5) align 8 readonly %src) {
3139 ; CHECK-LABEL: memcpy_p3_p5_sz31_align_8_8:
3140 ; CHECK: ; %bb.0: ; %entry
3141 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3142 ; CHECK-NEXT: s_clause 0x7
3143 ; CHECK-NEXT: buffer_load_dword v6, v1, s[0:3], 0 offen
3144 ; CHECK-NEXT: buffer_load_dword v7, v1, s[0:3], 0 offen offset:4
3145 ; CHECK-NEXT: buffer_load_dword v8, v1, s[0:3], 0 offen offset:8
3146 ; CHECK-NEXT: buffer_load_dword v9, v1, s[0:3], 0 offen offset:12
3147 ; CHECK-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen offset:15
3148 ; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:19
3149 ; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:23
3150 ; CHECK-NEXT: buffer_load_dword v5, v1, s[0:3], 0 offen offset:27
3151 ; CHECK-NEXT: s_waitcnt vmcnt(4)
3152 ; CHECK-NEXT: ds_write2_b64 v0, v[6:7], v[8:9] offset1:1
3153 ; CHECK-NEXT: s_waitcnt vmcnt(0)
3154 ; CHECK-NEXT: ds_write_b128 v0, v[2:5] offset:15
3155 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
3156 ; CHECK-NEXT: s_setpc_b64 s[30:31]
3158 tail call void @llvm.memcpy.p3.p5.i64(ptr addrspace(3) noundef nonnull align 8 %dst, ptr addrspace(5) noundef nonnull align 8 %src, i64 31, i1 false)
3162 define void @memcpy_p3_p5_sz32_align_8_8(ptr addrspace(3) align 8 %dst, ptr addrspace(5) align 8 readonly %src) {
3163 ; CHECK-LABEL: memcpy_p3_p5_sz32_align_8_8:
3164 ; CHECK: ; %bb.0: ; %entry
3165 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3166 ; CHECK-NEXT: s_clause 0x7
3167 ; CHECK-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen
3168 ; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:4
3169 ; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:8
3170 ; CHECK-NEXT: buffer_load_dword v5, v1, s[0:3], 0 offen offset:12
3171 ; CHECK-NEXT: buffer_load_dword v6, v1, s[0:3], 0 offen offset:16
3172 ; CHECK-NEXT: buffer_load_dword v7, v1, s[0:3], 0 offen offset:20
3173 ; CHECK-NEXT: buffer_load_dword v8, v1, s[0:3], 0 offen offset:24
3174 ; CHECK-NEXT: buffer_load_dword v9, v1, s[0:3], 0 offen offset:28
3175 ; CHECK-NEXT: s_waitcnt vmcnt(4)
3176 ; CHECK-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset1:1
3177 ; CHECK-NEXT: s_waitcnt vmcnt(0)
3178 ; CHECK-NEXT: ds_write2_b64 v0, v[6:7], v[8:9] offset0:2 offset1:3
3179 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
3180 ; CHECK-NEXT: s_setpc_b64 s[30:31]
3182 tail call void @llvm.memcpy.p3.p5.i64(ptr addrspace(3) noundef nonnull align 8 %dst, ptr addrspace(5) noundef nonnull align 8 %src, i64 32, i1 false)
3186 define void @memcpy_p3_p5_sz16_align_16_16(ptr addrspace(3) align 16 %dst, ptr addrspace(5) align 16 readonly %src) {
3187 ; CHECK-LABEL: memcpy_p3_p5_sz16_align_16_16:
3188 ; CHECK: ; %bb.0: ; %entry
3189 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3190 ; CHECK-NEXT: s_clause 0x3
3191 ; CHECK-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen
3192 ; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:4
3193 ; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:8
3194 ; CHECK-NEXT: buffer_load_dword v5, v1, s[0:3], 0 offen offset:12
3195 ; CHECK-NEXT: s_waitcnt vmcnt(0)
3196 ; CHECK-NEXT: ds_write_b128 v0, v[2:5]
3197 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
3198 ; CHECK-NEXT: s_setpc_b64 s[30:31]
3200 tail call void @llvm.memcpy.p3.p5.i64(ptr addrspace(3) noundef nonnull align 16 %dst, ptr addrspace(5) noundef nonnull align 16 %src, i64 16, i1 false)
3204 define void @memcpy_p3_p5_sz31_align_16_16(ptr addrspace(3) align 16 %dst, ptr addrspace(5) align 16 readonly %src) {
3205 ; CHECK-LABEL: memcpy_p3_p5_sz31_align_16_16:
3206 ; CHECK: ; %bb.0: ; %entry
3207 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3208 ; CHECK-NEXT: s_clause 0x7
3209 ; CHECK-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen
3210 ; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:4
3211 ; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:8
3212 ; CHECK-NEXT: buffer_load_dword v5, v1, s[0:3], 0 offen offset:12
3213 ; CHECK-NEXT: buffer_load_dword v6, v1, s[0:3], 0 offen offset:15
3214 ; CHECK-NEXT: buffer_load_dword v7, v1, s[0:3], 0 offen offset:19
3215 ; CHECK-NEXT: buffer_load_dword v8, v1, s[0:3], 0 offen offset:23
3216 ; CHECK-NEXT: buffer_load_dword v9, v1, s[0:3], 0 offen offset:27
3217 ; CHECK-NEXT: s_waitcnt vmcnt(4)
3218 ; CHECK-NEXT: ds_write_b128 v0, v[2:5]
3219 ; CHECK-NEXT: s_waitcnt vmcnt(0)
3220 ; CHECK-NEXT: ds_write_b128 v0, v[6:9] offset:15
3221 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
3222 ; CHECK-NEXT: s_setpc_b64 s[30:31]
3224 tail call void @llvm.memcpy.p3.p5.i64(ptr addrspace(3) noundef nonnull align 16 %dst, ptr addrspace(5) noundef nonnull align 16 %src, i64 31, i1 false)
3228 define void @memcpy_p3_p5_sz32_align_16_16(ptr addrspace(3) align 16 %dst, ptr addrspace(5) align 16 readonly %src) {
3229 ; CHECK-LABEL: memcpy_p3_p5_sz32_align_16_16:
3230 ; CHECK: ; %bb.0: ; %entry
3231 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3232 ; CHECK-NEXT: s_clause 0x7
3233 ; CHECK-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen
3234 ; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:4
3235 ; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:8
3236 ; CHECK-NEXT: buffer_load_dword v5, v1, s[0:3], 0 offen offset:12
3237 ; CHECK-NEXT: buffer_load_dword v6, v1, s[0:3], 0 offen offset:16
3238 ; CHECK-NEXT: buffer_load_dword v7, v1, s[0:3], 0 offen offset:20
3239 ; CHECK-NEXT: buffer_load_dword v8, v1, s[0:3], 0 offen offset:24
3240 ; CHECK-NEXT: buffer_load_dword v9, v1, s[0:3], 0 offen offset:28
3241 ; CHECK-NEXT: s_waitcnt vmcnt(4)
3242 ; CHECK-NEXT: ds_write_b128 v0, v[2:5]
3243 ; CHECK-NEXT: s_waitcnt vmcnt(0)
3244 ; CHECK-NEXT: ds_write_b128 v0, v[6:9] offset:16
3245 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
3246 ; CHECK-NEXT: s_setpc_b64 s[30:31]
3248 tail call void @llvm.memcpy.p3.p5.i64(ptr addrspace(3) noundef nonnull align 16 %dst, ptr addrspace(5) noundef nonnull align 16 %src, i64 32, i1 false)
3252 define void @memcpy_p5_p0_sz16_align_1_1(ptr addrspace(5) align 1 %dst, ptr addrspace(0) align 1 readonly %src) {
3253 ; CHECK-LABEL: memcpy_p5_p0_sz16_align_1_1:
3254 ; CHECK: ; %bb.0: ; %entry
3255 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3256 ; CHECK-NEXT: flat_load_dwordx4 v[1:4], v[1:2]
3257 ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
3258 ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:12
3259 ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:8
3260 ; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
3261 ; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
3262 ; CHECK-NEXT: s_setpc_b64 s[30:31]
3264 tail call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) noundef nonnull align 1 %dst, ptr addrspace(0) noundef nonnull align 1 %src, i64 16, i1 false)
3268 define void @memcpy_p5_p0_sz31_align_1_1(ptr addrspace(5) align 1 %dst, ptr addrspace(0) align 1 readonly %src) {
3269 ; CHECK-LABEL: memcpy_p5_p0_sz31_align_1_1:
3270 ; CHECK: ; %bb.0: ; %entry
3271 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3272 ; CHECK-NEXT: s_clause 0x3
3273 ; CHECK-NEXT: flat_load_ubyte v8, v[1:2] offset:30
3274 ; CHECK-NEXT: flat_load_ushort v9, v[1:2] offset:28
3275 ; CHECK-NEXT: flat_load_dwordx3 v[5:7], v[1:2] offset:16
3276 ; CHECK-NEXT: flat_load_dwordx4 v[1:4], v[1:2]
3277 ; CHECK-NEXT: s_waitcnt vmcnt(3) lgkmcnt(3)
3278 ; CHECK-NEXT: buffer_store_byte v8, v0, s[0:3], 0 offen offset:30
3279 ; CHECK-NEXT: s_waitcnt vmcnt(2) lgkmcnt(2)
3280 ; CHECK-NEXT: buffer_store_short v9, v0, s[0:3], 0 offen offset:28
3281 ; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1)
3282 ; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:24
3283 ; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:20
3284 ; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:16
3285 ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
3286 ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:12
3287 ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:8
3288 ; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
3289 ; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
3290 ; CHECK-NEXT: s_setpc_b64 s[30:31]
3292 tail call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) noundef nonnull align 1 %dst, ptr addrspace(0) noundef nonnull align 1 %src, i64 31, i1 false)
3296 define void @memcpy_p5_p0_sz32_align_1_1(ptr addrspace(5) align 1 %dst, ptr addrspace(0) align 1 readonly %src) {
3297 ; CHECK-LABEL: memcpy_p5_p0_sz32_align_1_1:
3298 ; CHECK: ; %bb.0: ; %entry
3299 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3300 ; CHECK-NEXT: s_clause 0x1
3301 ; CHECK-NEXT: flat_load_dwordx4 v[3:6], v[1:2] offset:16
3302 ; CHECK-NEXT: flat_load_dwordx4 v[7:10], v[1:2]
3303 ; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1)
3304 ; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:28
3305 ; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:24
3306 ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:20
3307 ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:16
3308 ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
3309 ; CHECK-NEXT: buffer_store_dword v10, v0, s[0:3], 0 offen offset:12
3310 ; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:8
3311 ; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:4
3312 ; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen
3313 ; CHECK-NEXT: s_setpc_b64 s[30:31]
3315 tail call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) noundef nonnull align 1 %dst, ptr addrspace(0) noundef nonnull align 1 %src, i64 32, i1 false)
3319 define void @memcpy_p5_p0_sz16_align_2_2(ptr addrspace(5) align 2 %dst, ptr addrspace(0) align 2 readonly %src) {
3320 ; CHECK-LABEL: memcpy_p5_p0_sz16_align_2_2:
3321 ; CHECK: ; %bb.0: ; %entry
3322 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3323 ; CHECK-NEXT: flat_load_dwordx4 v[1:4], v[1:2]
3324 ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
3325 ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:12
3326 ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:8
3327 ; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
3328 ; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
3329 ; CHECK-NEXT: s_setpc_b64 s[30:31]
3331 tail call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) noundef nonnull align 2 %dst, ptr addrspace(0) noundef nonnull align 2 %src, i64 16, i1 false)
3335 define void @memcpy_p5_p0_sz31_align_2_2(ptr addrspace(5) align 2 %dst, ptr addrspace(0) align 2 readonly %src) {
3336 ; CHECK-LABEL: memcpy_p5_p0_sz31_align_2_2:
3337 ; CHECK: ; %bb.0: ; %entry
3338 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3339 ; CHECK-NEXT: s_clause 0x3
3340 ; CHECK-NEXT: flat_load_ubyte v8, v[1:2] offset:30
3341 ; CHECK-NEXT: flat_load_ushort v9, v[1:2] offset:28
3342 ; CHECK-NEXT: flat_load_dwordx3 v[5:7], v[1:2] offset:16
3343 ; CHECK-NEXT: flat_load_dwordx4 v[1:4], v[1:2]
3344 ; CHECK-NEXT: s_waitcnt vmcnt(3) lgkmcnt(3)
3345 ; CHECK-NEXT: buffer_store_byte v8, v0, s[0:3], 0 offen offset:30
3346 ; CHECK-NEXT: s_waitcnt vmcnt(2) lgkmcnt(2)
3347 ; CHECK-NEXT: buffer_store_short v9, v0, s[0:3], 0 offen offset:28
3348 ; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1)
3349 ; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:24
3350 ; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:20
3351 ; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:16
3352 ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
3353 ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:12
3354 ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:8
3355 ; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
3356 ; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
3357 ; CHECK-NEXT: s_setpc_b64 s[30:31]
3359 tail call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) noundef nonnull align 2 %dst, ptr addrspace(0) noundef nonnull align 2 %src, i64 31, i1 false)
3363 define void @memcpy_p5_p0_sz32_align_2_2(ptr addrspace(5) align 2 %dst, ptr addrspace(0) align 2 readonly %src) {
3364 ; CHECK-LABEL: memcpy_p5_p0_sz32_align_2_2:
3365 ; CHECK: ; %bb.0: ; %entry
3366 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3367 ; CHECK-NEXT: s_clause 0x1
3368 ; CHECK-NEXT: flat_load_dwordx4 v[3:6], v[1:2] offset:16
3369 ; CHECK-NEXT: flat_load_dwordx4 v[7:10], v[1:2]
3370 ; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1)
3371 ; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:28
3372 ; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:24
3373 ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:20
3374 ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:16
3375 ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
3376 ; CHECK-NEXT: buffer_store_dword v10, v0, s[0:3], 0 offen offset:12
3377 ; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:8
3378 ; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:4
3379 ; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen
3380 ; CHECK-NEXT: s_setpc_b64 s[30:31]
3382 tail call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) noundef nonnull align 2 %dst, ptr addrspace(0) noundef nonnull align 2 %src, i64 32, i1 false)
3386 define void @memcpy_p5_p0_sz16_align_8_8(ptr addrspace(5) align 8 %dst, ptr addrspace(0) align 8 readonly %src) {
3387 ; CHECK-LABEL: memcpy_p5_p0_sz16_align_8_8:
3388 ; CHECK: ; %bb.0: ; %entry
3389 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3390 ; CHECK-NEXT: flat_load_dwordx4 v[1:4], v[1:2]
3391 ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
3392 ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:12
3393 ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:8
3394 ; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
3395 ; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
3396 ; CHECK-NEXT: s_setpc_b64 s[30:31]
3398 tail call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) noundef nonnull align 8 %dst, ptr addrspace(0) noundef nonnull align 8 %src, i64 16, i1 false)
3402 define void @memcpy_p5_p0_sz31_align_8_8(ptr addrspace(5) align 8 %dst, ptr addrspace(0) align 8 readonly %src) {
3403 ; CHECK-LABEL: memcpy_p5_p0_sz31_align_8_8:
3404 ; CHECK: ; %bb.0: ; %entry
3405 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3406 ; CHECK-NEXT: s_clause 0x1
3407 ; CHECK-NEXT: flat_load_dwordx4 v[3:6], v[1:2] offset:15
3408 ; CHECK-NEXT: flat_load_dwordx4 v[7:10], v[1:2]
3409 ; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1)
3410 ; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:27
3411 ; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:23
3412 ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:19
3413 ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:15
3414 ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
3415 ; CHECK-NEXT: buffer_store_dword v10, v0, s[0:3], 0 offen offset:12
3416 ; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:8
3417 ; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:4
3418 ; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen
3419 ; CHECK-NEXT: s_setpc_b64 s[30:31]
3421 tail call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) noundef nonnull align 8 %dst, ptr addrspace(0) noundef nonnull align 8 %src, i64 31, i1 false)
3425 define void @memcpy_p5_p0_sz32_align_8_8(ptr addrspace(5) align 8 %dst, ptr addrspace(0) align 8 readonly %src) {
3426 ; CHECK-LABEL: memcpy_p5_p0_sz32_align_8_8:
3427 ; CHECK: ; %bb.0: ; %entry
3428 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3429 ; CHECK-NEXT: s_clause 0x1
3430 ; CHECK-NEXT: flat_load_dwordx4 v[3:6], v[1:2] offset:16
3431 ; CHECK-NEXT: flat_load_dwordx4 v[7:10], v[1:2]
3432 ; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1)
3433 ; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:28
3434 ; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:24
3435 ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:20
3436 ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:16
3437 ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
3438 ; CHECK-NEXT: buffer_store_dword v10, v0, s[0:3], 0 offen offset:12
3439 ; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:8
3440 ; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:4
3441 ; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen
3442 ; CHECK-NEXT: s_setpc_b64 s[30:31]
3444 tail call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) noundef nonnull align 8 %dst, ptr addrspace(0) noundef nonnull align 8 %src, i64 32, i1 false)
3448 define void @memcpy_p5_p0_sz16_align_16_16(ptr addrspace(5) align 16 %dst, ptr addrspace(0) align 16 readonly %src) {
3449 ; CHECK-LABEL: memcpy_p5_p0_sz16_align_16_16:
3450 ; CHECK: ; %bb.0: ; %entry
3451 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3452 ; CHECK-NEXT: flat_load_dwordx4 v[1:4], v[1:2]
3453 ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
3454 ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:12
3455 ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:8
3456 ; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
3457 ; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
3458 ; CHECK-NEXT: s_setpc_b64 s[30:31]
3460 tail call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) noundef nonnull align 16 %dst, ptr addrspace(0) noundef nonnull align 16 %src, i64 16, i1 false)
3464 define void @memcpy_p5_p0_sz31_align_16_16(ptr addrspace(5) align 16 %dst, ptr addrspace(0) align 16 readonly %src) {
3465 ; CHECK-LABEL: memcpy_p5_p0_sz31_align_16_16:
3466 ; CHECK: ; %bb.0: ; %entry
3467 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3468 ; CHECK-NEXT: s_clause 0x1
3469 ; CHECK-NEXT: flat_load_dwordx4 v[3:6], v[1:2] offset:15
3470 ; CHECK-NEXT: flat_load_dwordx4 v[7:10], v[1:2]
3471 ; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1)
3472 ; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:27
3473 ; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:23
3474 ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:19
3475 ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:15
3476 ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
3477 ; CHECK-NEXT: buffer_store_dword v10, v0, s[0:3], 0 offen offset:12
3478 ; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:8
3479 ; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:4
3480 ; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen
3481 ; CHECK-NEXT: s_setpc_b64 s[30:31]
3483 tail call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) noundef nonnull align 16 %dst, ptr addrspace(0) noundef nonnull align 16 %src, i64 31, i1 false)
3487 define void @memcpy_p5_p0_sz32_align_16_16(ptr addrspace(5) align 16 %dst, ptr addrspace(0) align 16 readonly %src) {
3488 ; CHECK-LABEL: memcpy_p5_p0_sz32_align_16_16:
3489 ; CHECK: ; %bb.0: ; %entry
3490 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3491 ; CHECK-NEXT: s_clause 0x1
3492 ; CHECK-NEXT: flat_load_dwordx4 v[3:6], v[1:2] offset:16
3493 ; CHECK-NEXT: flat_load_dwordx4 v[7:10], v[1:2]
3494 ; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1)
3495 ; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:28
3496 ; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:24
3497 ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:20
3498 ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:16
3499 ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
3500 ; CHECK-NEXT: buffer_store_dword v10, v0, s[0:3], 0 offen offset:12
3501 ; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:8
3502 ; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:4
3503 ; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen
3504 ; CHECK-NEXT: s_setpc_b64 s[30:31]
3506 tail call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) noundef nonnull align 16 %dst, ptr addrspace(0) noundef nonnull align 16 %src, i64 32, i1 false)
3510 define void @memcpy_p5_p1_sz16_align_1_1(ptr addrspace(5) align 1 %dst, ptr addrspace(1) align 1 readonly %src) {
3511 ; CHECK-LABEL: memcpy_p5_p1_sz16_align_1_1:
3512 ; CHECK: ; %bb.0: ; %entry
3513 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3514 ; CHECK-NEXT: global_load_dwordx4 v[1:4], v[1:2], off
3515 ; CHECK-NEXT: s_waitcnt vmcnt(0)
3516 ; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
3517 ; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
3518 ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:12
3519 ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:8
3520 ; CHECK-NEXT: s_setpc_b64 s[30:31]
3522 tail call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) noundef nonnull align 1 %dst, ptr addrspace(1) noundef nonnull align 1 %src, i64 16, i1 false)
3526 define void @memcpy_p5_p1_sz31_align_1_1(ptr addrspace(5) align 1 %dst, ptr addrspace(1) align 1 readonly %src) {
3527 ; CHECK-LABEL: memcpy_p5_p1_sz31_align_1_1:
3528 ; CHECK: ; %bb.0: ; %entry
3529 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3530 ; CHECK-NEXT: s_clause 0x3
3531 ; CHECK-NEXT: global_load_dwordx3 v[5:7], v[1:2], off offset:16
3532 ; CHECK-NEXT: global_load_ushort v8, v[1:2], off offset:28
3533 ; CHECK-NEXT: global_load_ubyte v9, v[1:2], off offset:30
3534 ; CHECK-NEXT: global_load_dwordx4 v[1:4], v[1:2], off
3535 ; CHECK-NEXT: s_waitcnt vmcnt(3)
3536 ; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:24
3537 ; CHECK-NEXT: s_waitcnt vmcnt(2)
3538 ; CHECK-NEXT: buffer_store_short v8, v0, s[0:3], 0 offen offset:28
3539 ; CHECK-NEXT: s_waitcnt vmcnt(1)
3540 ; CHECK-NEXT: buffer_store_byte v9, v0, s[0:3], 0 offen offset:30
3541 ; CHECK-NEXT: s_waitcnt vmcnt(0)
3542 ; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
3543 ; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
3544 ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:12
3545 ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:8
3546 ; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:20
3547 ; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:16
3548 ; CHECK-NEXT: s_setpc_b64 s[30:31]
3550 tail call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) noundef nonnull align 1 %dst, ptr addrspace(1) noundef nonnull align 1 %src, i64 31, i1 false)
3554 define void @memcpy_p5_p1_sz32_align_1_1(ptr addrspace(5) align 1 %dst, ptr addrspace(1) align 1 readonly %src) {
3555 ; CHECK-LABEL: memcpy_p5_p1_sz32_align_1_1:
3556 ; CHECK: ; %bb.0: ; %entry
3557 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3558 ; CHECK-NEXT: s_clause 0x1
3559 ; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off
3560 ; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:16
3561 ; CHECK-NEXT: s_waitcnt vmcnt(1)
3562 ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:4
3563 ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen
3564 ; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:12
3565 ; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:8
3566 ; CHECK-NEXT: s_waitcnt vmcnt(0)
3567 ; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:20
3568 ; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:16
3569 ; CHECK-NEXT: buffer_store_dword v10, v0, s[0:3], 0 offen offset:28
3570 ; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:24
3571 ; CHECK-NEXT: s_setpc_b64 s[30:31]
3573 tail call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) noundef nonnull align 1 %dst, ptr addrspace(1) noundef nonnull align 1 %src, i64 32, i1 false)
3577 define void @memcpy_p5_p1_sz16_align_2_2(ptr addrspace(5) align 2 %dst, ptr addrspace(1) align 2 readonly %src) {
3578 ; CHECK-LABEL: memcpy_p5_p1_sz16_align_2_2:
3579 ; CHECK: ; %bb.0: ; %entry
3580 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3581 ; CHECK-NEXT: global_load_dwordx4 v[1:4], v[1:2], off
3582 ; CHECK-NEXT: s_waitcnt vmcnt(0)
3583 ; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
3584 ; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
3585 ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:12
3586 ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:8
3587 ; CHECK-NEXT: s_setpc_b64 s[30:31]
3589 tail call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) noundef nonnull align 2 %dst, ptr addrspace(1) noundef nonnull align 2 %src, i64 16, i1 false)
3593 define void @memcpy_p5_p1_sz31_align_2_2(ptr addrspace(5) align 2 %dst, ptr addrspace(1) align 2 readonly %src) {
3594 ; CHECK-LABEL: memcpy_p5_p1_sz31_align_2_2:
3595 ; CHECK: ; %bb.0: ; %entry
3596 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3597 ; CHECK-NEXT: s_clause 0x3
3598 ; CHECK-NEXT: global_load_dwordx3 v[5:7], v[1:2], off offset:16
3599 ; CHECK-NEXT: global_load_ushort v8, v[1:2], off offset:28
3600 ; CHECK-NEXT: global_load_ubyte v9, v[1:2], off offset:30
3601 ; CHECK-NEXT: global_load_dwordx4 v[1:4], v[1:2], off
3602 ; CHECK-NEXT: s_waitcnt vmcnt(3)
3603 ; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:24
3604 ; CHECK-NEXT: s_waitcnt vmcnt(2)
3605 ; CHECK-NEXT: buffer_store_short v8, v0, s[0:3], 0 offen offset:28
3606 ; CHECK-NEXT: s_waitcnt vmcnt(1)
3607 ; CHECK-NEXT: buffer_store_byte v9, v0, s[0:3], 0 offen offset:30
3608 ; CHECK-NEXT: s_waitcnt vmcnt(0)
3609 ; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
3610 ; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
3611 ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:12
3612 ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:8
3613 ; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:20
3614 ; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:16
3615 ; CHECK-NEXT: s_setpc_b64 s[30:31]
3617 tail call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) noundef nonnull align 2 %dst, ptr addrspace(1) noundef nonnull align 2 %src, i64 31, i1 false)
3621 define void @memcpy_p5_p1_sz32_align_2_2(ptr addrspace(5) align 2 %dst, ptr addrspace(1) align 2 readonly %src) {
3622 ; CHECK-LABEL: memcpy_p5_p1_sz32_align_2_2:
3623 ; CHECK: ; %bb.0: ; %entry
3624 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3625 ; CHECK-NEXT: s_clause 0x1
3626 ; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off
3627 ; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:16
3628 ; CHECK-NEXT: s_waitcnt vmcnt(1)
3629 ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:4
3630 ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen
3631 ; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:12
3632 ; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:8
3633 ; CHECK-NEXT: s_waitcnt vmcnt(0)
3634 ; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:20
3635 ; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:16
3636 ; CHECK-NEXT: buffer_store_dword v10, v0, s[0:3], 0 offen offset:28
3637 ; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:24
3638 ; CHECK-NEXT: s_setpc_b64 s[30:31]
3640 tail call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) noundef nonnull align 2 %dst, ptr addrspace(1) noundef nonnull align 2 %src, i64 32, i1 false)
3644 define void @memcpy_p5_p1_sz16_align_8_8(ptr addrspace(5) align 8 %dst, ptr addrspace(1) align 8 readonly %src) {
3645 ; CHECK-LABEL: memcpy_p5_p1_sz16_align_8_8:
3646 ; CHECK: ; %bb.0: ; %entry
3647 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3648 ; CHECK-NEXT: global_load_dwordx4 v[1:4], v[1:2], off
3649 ; CHECK-NEXT: s_waitcnt vmcnt(0)
3650 ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:12
3651 ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:8
3652 ; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
3653 ; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
3654 ; CHECK-NEXT: s_setpc_b64 s[30:31]
3656 tail call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) noundef nonnull align 8 %dst, ptr addrspace(1) noundef nonnull align 8 %src, i64 16, i1 false)
3660 define void @memcpy_p5_p1_sz31_align_8_8(ptr addrspace(5) align 8 %dst, ptr addrspace(1) align 8 readonly %src) {
3661 ; CHECK-LABEL: memcpy_p5_p1_sz31_align_8_8:
3662 ; CHECK: ; %bb.0: ; %entry
3663 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3664 ; CHECK-NEXT: s_clause 0x1
3665 ; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off
3666 ; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:15
3667 ; CHECK-NEXT: s_waitcnt vmcnt(1)
3668 ; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:12
3669 ; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:8
3670 ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:4
3671 ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen
3672 ; CHECK-NEXT: s_waitcnt vmcnt(0)
3673 ; CHECK-NEXT: buffer_store_dword v10, v0, s[0:3], 0 offen offset:27
3674 ; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:23
3675 ; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:19
3676 ; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:15
3677 ; CHECK-NEXT: s_setpc_b64 s[30:31]
3679 tail call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) noundef nonnull align 8 %dst, ptr addrspace(1) noundef nonnull align 8 %src, i64 31, i1 false)
3683 define void @memcpy_p5_p1_sz32_align_8_8(ptr addrspace(5) align 8 %dst, ptr addrspace(1) align 8 readonly %src) {
3684 ; CHECK-LABEL: memcpy_p5_p1_sz32_align_8_8:
3685 ; CHECK: ; %bb.0: ; %entry
3686 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3687 ; CHECK-NEXT: s_clause 0x1
3688 ; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off
3689 ; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:16
3690 ; CHECK-NEXT: s_waitcnt vmcnt(1)
3691 ; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:12
3692 ; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:8
3693 ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:4
3694 ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen
3695 ; CHECK-NEXT: s_waitcnt vmcnt(0)
3696 ; CHECK-NEXT: buffer_store_dword v10, v0, s[0:3], 0 offen offset:28
3697 ; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:24
3698 ; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:20
3699 ; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:16
3700 ; CHECK-NEXT: s_setpc_b64 s[30:31]
3702 tail call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) noundef nonnull align 8 %dst, ptr addrspace(1) noundef nonnull align 8 %src, i64 32, i1 false)
3706 define void @memcpy_p5_p1_sz16_align_16_16(ptr addrspace(5) align 16 %dst, ptr addrspace(1) align 16 readonly %src) {
3707 ; CHECK-LABEL: memcpy_p5_p1_sz16_align_16_16:
3708 ; CHECK: ; %bb.0: ; %entry
3709 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3710 ; CHECK-NEXT: global_load_dwordx4 v[1:4], v[1:2], off
3711 ; CHECK-NEXT: s_waitcnt vmcnt(0)
3712 ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:12
3713 ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:8
3714 ; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
3715 ; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
3716 ; CHECK-NEXT: s_setpc_b64 s[30:31]
3718 tail call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) noundef nonnull align 16 %dst, ptr addrspace(1) noundef nonnull align 16 %src, i64 16, i1 false)
3722 define void @memcpy_p5_p1_sz31_align_16_16(ptr addrspace(5) align 16 %dst, ptr addrspace(1) align 16 readonly %src) {
3723 ; CHECK-LABEL: memcpy_p5_p1_sz31_align_16_16:
3724 ; CHECK: ; %bb.0: ; %entry
3725 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3726 ; CHECK-NEXT: s_clause 0x1
3727 ; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off
3728 ; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:15
3729 ; CHECK-NEXT: s_waitcnt vmcnt(1)
3730 ; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:12
3731 ; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:8
3732 ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:4
3733 ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen
3734 ; CHECK-NEXT: s_waitcnt vmcnt(0)
3735 ; CHECK-NEXT: buffer_store_dword v10, v0, s[0:3], 0 offen offset:27
3736 ; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:23
3737 ; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:19
3738 ; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:15
3739 ; CHECK-NEXT: s_setpc_b64 s[30:31]
3741 tail call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) noundef nonnull align 16 %dst, ptr addrspace(1) noundef nonnull align 16 %src, i64 31, i1 false)
3745 define void @memcpy_p5_p1_sz32_align_16_16(ptr addrspace(5) align 16 %dst, ptr addrspace(1) align 16 readonly %src) {
3746 ; CHECK-LABEL: memcpy_p5_p1_sz32_align_16_16:
3747 ; CHECK: ; %bb.0: ; %entry
3748 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3749 ; CHECK-NEXT: s_clause 0x1
3750 ; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off
3751 ; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:16
3752 ; CHECK-NEXT: s_waitcnt vmcnt(1)
3753 ; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:12
3754 ; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:8
3755 ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:4
3756 ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen
3757 ; CHECK-NEXT: s_waitcnt vmcnt(0)
3758 ; CHECK-NEXT: buffer_store_dword v10, v0, s[0:3], 0 offen offset:28
3759 ; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:24
3760 ; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:20
3761 ; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:16
3762 ; CHECK-NEXT: s_setpc_b64 s[30:31]
3764 tail call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) noundef nonnull align 16 %dst, ptr addrspace(1) noundef nonnull align 16 %src, i64 32, i1 false)
3768 define void @memcpy_p5_p3_sz16_align_1_1(ptr addrspace(5) align 1 %dst, ptr addrspace(3) align 1 readonly %src) {
3769 ; CHECK-LABEL: memcpy_p5_p3_sz16_align_1_1:
3770 ; CHECK: ; %bb.0: ; %entry
3771 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3772 ; CHECK-NEXT: ds_read2_b64 v[1:4], v1 offset1:1
3773 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
3774 ; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
3775 ; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
3776 ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:12
3777 ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:8
3778 ; CHECK-NEXT: s_setpc_b64 s[30:31]
3780 tail call void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) noundef nonnull align 1 %dst, ptr addrspace(3) noundef nonnull align 1 %src, i64 16, i1 false)
3784 define void @memcpy_p5_p3_sz31_align_1_1(ptr addrspace(5) align 1 %dst, ptr addrspace(3) align 1 readonly %src) {
3785 ; CHECK-LABEL: memcpy_p5_p3_sz31_align_1_1:
3786 ; CHECK: ; %bb.0: ; %entry
3787 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3788 ; CHECK-NEXT: ds_read_b32 v8, v1 offset:24
3789 ; CHECK-NEXT: ds_read_u16 v9, v1 offset:28
3790 ; CHECK-NEXT: ds_read_u8 v10, v1 offset:30
3791 ; CHECK-NEXT: ds_read2_b64 v[2:5], v1 offset1:1
3792 ; CHECK-NEXT: ds_read_b64 v[6:7], v1 offset:16
3793 ; CHECK-NEXT: s_waitcnt lgkmcnt(4)
3794 ; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:24
3795 ; CHECK-NEXT: s_waitcnt lgkmcnt(3)
3796 ; CHECK-NEXT: buffer_store_short v9, v0, s[0:3], 0 offen offset:28
3797 ; CHECK-NEXT: s_waitcnt lgkmcnt(2)
3798 ; CHECK-NEXT: buffer_store_byte v10, v0, s[0:3], 0 offen offset:30
3799 ; CHECK-NEXT: s_waitcnt lgkmcnt(1)
3800 ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:4
3801 ; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen
3802 ; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:12
3803 ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:8
3804 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
3805 ; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:20
3806 ; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:16
3807 ; CHECK-NEXT: s_setpc_b64 s[30:31]
3809 tail call void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) noundef nonnull align 1 %dst, ptr addrspace(3) noundef nonnull align 1 %src, i64 31, i1 false)
3813 define void @memcpy_p5_p3_sz32_align_1_1(ptr addrspace(5) align 1 %dst, ptr addrspace(3) align 1 readonly %src) {
3814 ; CHECK-LABEL: memcpy_p5_p3_sz32_align_1_1:
3815 ; CHECK: ; %bb.0: ; %entry
3816 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3817 ; CHECK-NEXT: ds_read2_b64 v[2:5], v1 offset1:1
3818 ; CHECK-NEXT: ds_read2_b64 v[6:9], v1 offset0:2 offset1:3
3819 ; CHECK-NEXT: s_waitcnt lgkmcnt(1)
3820 ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:4
3821 ; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen
3822 ; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:12
3823 ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:8
3824 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
3825 ; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:20
3826 ; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:16
3827 ; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:28
3828 ; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:24
3829 ; CHECK-NEXT: s_setpc_b64 s[30:31]
3831 tail call void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) noundef nonnull align 1 %dst, ptr addrspace(3) noundef nonnull align 1 %src, i64 32, i1 false)
3835 define void @memcpy_p5_p3_sz16_align_2_2(ptr addrspace(5) align 2 %dst, ptr addrspace(3) align 2 readonly %src) {
3836 ; CHECK-LABEL: memcpy_p5_p3_sz16_align_2_2:
3837 ; CHECK: ; %bb.0: ; %entry
3838 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3839 ; CHECK-NEXT: ds_read2_b64 v[1:4], v1 offset1:1
3840 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
3841 ; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
3842 ; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
3843 ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:12
3844 ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:8
3845 ; CHECK-NEXT: s_setpc_b64 s[30:31]
3847 tail call void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) noundef nonnull align 2 %dst, ptr addrspace(3) noundef nonnull align 2 %src, i64 16, i1 false)
3851 define void @memcpy_p5_p3_sz31_align_2_2(ptr addrspace(5) align 2 %dst, ptr addrspace(3) align 2 readonly %src) {
3852 ; CHECK-LABEL: memcpy_p5_p3_sz31_align_2_2:
3853 ; CHECK: ; %bb.0: ; %entry
3854 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3855 ; CHECK-NEXT: ds_read_b32 v8, v1 offset:24
3856 ; CHECK-NEXT: ds_read_u16 v9, v1 offset:28
3857 ; CHECK-NEXT: ds_read_u8 v10, v1 offset:30
3858 ; CHECK-NEXT: ds_read2_b64 v[2:5], v1 offset1:1
3859 ; CHECK-NEXT: ds_read_b64 v[6:7], v1 offset:16
3860 ; CHECK-NEXT: s_waitcnt lgkmcnt(4)
3861 ; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:24
3862 ; CHECK-NEXT: s_waitcnt lgkmcnt(3)
3863 ; CHECK-NEXT: buffer_store_short v9, v0, s[0:3], 0 offen offset:28
3864 ; CHECK-NEXT: s_waitcnt lgkmcnt(2)
3865 ; CHECK-NEXT: buffer_store_byte v10, v0, s[0:3], 0 offen offset:30
3866 ; CHECK-NEXT: s_waitcnt lgkmcnt(1)
3867 ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:4
3868 ; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen
3869 ; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:12
3870 ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:8
3871 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
3872 ; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:20
3873 ; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:16
3874 ; CHECK-NEXT: s_setpc_b64 s[30:31]
3876 tail call void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) noundef nonnull align 2 %dst, ptr addrspace(3) noundef nonnull align 2 %src, i64 31, i1 false)
3880 define void @memcpy_p5_p3_sz32_align_2_2(ptr addrspace(5) align 2 %dst, ptr addrspace(3) align 2 readonly %src) {
3881 ; CHECK-LABEL: memcpy_p5_p3_sz32_align_2_2:
3882 ; CHECK: ; %bb.0: ; %entry
3883 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3884 ; CHECK-NEXT: ds_read2_b64 v[2:5], v1 offset1:1
3885 ; CHECK-NEXT: ds_read2_b64 v[6:9], v1 offset0:2 offset1:3
3886 ; CHECK-NEXT: s_waitcnt lgkmcnt(1)
3887 ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:4
3888 ; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen
3889 ; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:12
3890 ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:8
3891 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
3892 ; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:20
3893 ; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:16
3894 ; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:28
3895 ; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:24
3896 ; CHECK-NEXT: s_setpc_b64 s[30:31]
3898 tail call void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) noundef nonnull align 2 %dst, ptr addrspace(3) noundef nonnull align 2 %src, i64 32, i1 false)
3902 define void @memcpy_p5_p3_sz16_align_8_8(ptr addrspace(5) align 8 %dst, ptr addrspace(3) align 8 readonly %src) {
3903 ; CHECK-LABEL: memcpy_p5_p3_sz16_align_8_8:
3904 ; CHECK: ; %bb.0: ; %entry
3905 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3906 ; CHECK-NEXT: ds_read2_b64 v[1:4], v1 offset1:1
3907 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
3908 ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:12
3909 ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:8
3910 ; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
3911 ; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
3912 ; CHECK-NEXT: s_setpc_b64 s[30:31]
3914 tail call void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) noundef nonnull align 8 %dst, ptr addrspace(3) noundef nonnull align 8 %src, i64 16, i1 false)
3918 define void @memcpy_p5_p3_sz31_align_8_8(ptr addrspace(5) align 8 %dst, ptr addrspace(3) align 8 readonly %src) {
3919 ; CHECK-LABEL: memcpy_p5_p3_sz31_align_8_8:
3920 ; CHECK: ; %bb.0: ; %entry
3921 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3922 ; CHECK-NEXT: ds_read2_b64 v[2:5], v1 offset1:1
3923 ; CHECK-NEXT: ds_read_b128 v[6:9], v1 offset:15
3924 ; CHECK-NEXT: s_waitcnt lgkmcnt(1)
3925 ; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:12
3926 ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:8
3927 ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:4
3928 ; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen
3929 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
3930 ; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:27
3931 ; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:23
3932 ; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:19
3933 ; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:15
3934 ; CHECK-NEXT: s_setpc_b64 s[30:31]
3936 tail call void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) noundef nonnull align 8 %dst, ptr addrspace(3) noundef nonnull align 8 %src, i64 31, i1 false)
3940 define void @memcpy_p5_p3_sz32_align_8_8(ptr addrspace(5) align 8 %dst, ptr addrspace(3) align 8 readonly %src) {
3941 ; CHECK-LABEL: memcpy_p5_p3_sz32_align_8_8:
3942 ; CHECK: ; %bb.0: ; %entry
3943 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3944 ; CHECK-NEXT: ds_read2_b64 v[2:5], v1 offset1:1
3945 ; CHECK-NEXT: ds_read2_b64 v[6:9], v1 offset0:2 offset1:3
3946 ; CHECK-NEXT: s_waitcnt lgkmcnt(1)
3947 ; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:12
3948 ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:8
3949 ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:4
3950 ; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen
3951 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
3952 ; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:28
3953 ; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:24
3954 ; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:20
3955 ; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:16
3956 ; CHECK-NEXT: s_setpc_b64 s[30:31]
3958 tail call void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) noundef nonnull align 8 %dst, ptr addrspace(3) noundef nonnull align 8 %src, i64 32, i1 false)
3962 define void @memcpy_p5_p3_sz16_align_16_16(ptr addrspace(5) align 16 %dst, ptr addrspace(3) align 16 readonly %src) {
3963 ; CHECK-LABEL: memcpy_p5_p3_sz16_align_16_16:
3964 ; CHECK: ; %bb.0: ; %entry
3965 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3966 ; CHECK-NEXT: ds_read_b128 v[1:4], v1
3967 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
3968 ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:12
3969 ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:8
3970 ; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
3971 ; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
3972 ; CHECK-NEXT: s_setpc_b64 s[30:31]
3974 tail call void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) noundef nonnull align 16 %dst, ptr addrspace(3) noundef nonnull align 16 %src, i64 16, i1 false)
3978 define void @memcpy_p5_p3_sz31_align_16_16(ptr addrspace(5) align 16 %dst, ptr addrspace(3) align 16 readonly %src) {
3979 ; CHECK-LABEL: memcpy_p5_p3_sz31_align_16_16:
3980 ; CHECK: ; %bb.0: ; %entry
3981 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3982 ; CHECK-NEXT: ds_read_b128 v[2:5], v1
3983 ; CHECK-NEXT: ds_read_b128 v[6:9], v1 offset:15
3984 ; CHECK-NEXT: s_waitcnt lgkmcnt(1)
3985 ; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:12
3986 ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:8
3987 ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:4
3988 ; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen
3989 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
3990 ; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:27
3991 ; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:23
3992 ; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:19
3993 ; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:15
3994 ; CHECK-NEXT: s_setpc_b64 s[30:31]
3996 tail call void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) noundef nonnull align 16 %dst, ptr addrspace(3) noundef nonnull align 16 %src, i64 31, i1 false)
4000 define void @memcpy_p5_p3_sz32_align_16_16(ptr addrspace(5) align 16 %dst, ptr addrspace(3) align 16 readonly %src) {
4001 ; CHECK-LABEL: memcpy_p5_p3_sz32_align_16_16:
4002 ; CHECK: ; %bb.0: ; %entry
4003 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4004 ; CHECK-NEXT: ds_read_b128 v[2:5], v1
4005 ; CHECK-NEXT: ds_read_b128 v[6:9], v1 offset:16
4006 ; CHECK-NEXT: s_waitcnt lgkmcnt(1)
4007 ; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:12
4008 ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:8
4009 ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:4
4010 ; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen
4011 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
4012 ; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:28
4013 ; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:24
4014 ; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:20
4015 ; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:16
4016 ; CHECK-NEXT: s_setpc_b64 s[30:31]
4018 tail call void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) noundef nonnull align 16 %dst, ptr addrspace(3) noundef nonnull align 16 %src, i64 32, i1 false)
4022 define void @memcpy_p5_p4_sz16_align_1_1(ptr addrspace(5) align 1 %dst, ptr addrspace(4) align 1 readonly %src) {
4023 ; CHECK-LABEL: memcpy_p5_p4_sz16_align_1_1:
4024 ; CHECK: ; %bb.0: ; %entry
4025 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4026 ; CHECK-NEXT: global_load_dwordx4 v[1:4], v[1:2], off
4027 ; CHECK-NEXT: s_waitcnt vmcnt(0)
4028 ; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
4029 ; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
4030 ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:12
4031 ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:8
4032 ; CHECK-NEXT: s_setpc_b64 s[30:31]
4034 tail call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) noundef nonnull align 1 %dst, ptr addrspace(4) noundef nonnull align 1 %src, i64 16, i1 false)
4038 define void @memcpy_p5_p4_sz31_align_1_1(ptr addrspace(5) align 1 %dst, ptr addrspace(4) align 1 readonly %src) {
4039 ; CHECK-LABEL: memcpy_p5_p4_sz31_align_1_1:
4040 ; CHECK: ; %bb.0: ; %entry
4041 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4042 ; CHECK-NEXT: s_clause 0x3
4043 ; CHECK-NEXT: global_load_dwordx3 v[5:7], v[1:2], off offset:16
4044 ; CHECK-NEXT: global_load_ushort v8, v[1:2], off offset:28
4045 ; CHECK-NEXT: global_load_ubyte v9, v[1:2], off offset:30
4046 ; CHECK-NEXT: global_load_dwordx4 v[1:4], v[1:2], off
4047 ; CHECK-NEXT: s_waitcnt vmcnt(3)
4048 ; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:24
4049 ; CHECK-NEXT: s_waitcnt vmcnt(2)
4050 ; CHECK-NEXT: buffer_store_short v8, v0, s[0:3], 0 offen offset:28
4051 ; CHECK-NEXT: s_waitcnt vmcnt(1)
4052 ; CHECK-NEXT: buffer_store_byte v9, v0, s[0:3], 0 offen offset:30
4053 ; CHECK-NEXT: s_waitcnt vmcnt(0)
4054 ; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
4055 ; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
4056 ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:12
4057 ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:8
4058 ; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:20
4059 ; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:16
4060 ; CHECK-NEXT: s_setpc_b64 s[30:31]
4062 tail call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) noundef nonnull align 1 %dst, ptr addrspace(4) noundef nonnull align 1 %src, i64 31, i1 false)
4066 define void @memcpy_p5_p4_sz32_align_1_1(ptr addrspace(5) align 1 %dst, ptr addrspace(4) align 1 readonly %src) {
4067 ; CHECK-LABEL: memcpy_p5_p4_sz32_align_1_1:
4068 ; CHECK: ; %bb.0: ; %entry
4069 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4070 ; CHECK-NEXT: s_clause 0x1
4071 ; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off
4072 ; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:16
4073 ; CHECK-NEXT: s_waitcnt vmcnt(1)
4074 ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:4
4075 ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen
4076 ; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:12
4077 ; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:8
4078 ; CHECK-NEXT: s_waitcnt vmcnt(0)
4079 ; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:20
4080 ; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:16
4081 ; CHECK-NEXT: buffer_store_dword v10, v0, s[0:3], 0 offen offset:28
4082 ; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:24
4083 ; CHECK-NEXT: s_setpc_b64 s[30:31]
4085 tail call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) noundef nonnull align 1 %dst, ptr addrspace(4) noundef nonnull align 1 %src, i64 32, i1 false)
4089 define void @memcpy_p5_p4_sz16_align_2_2(ptr addrspace(5) align 2 %dst, ptr addrspace(4) align 2 readonly %src) {
4090 ; CHECK-LABEL: memcpy_p5_p4_sz16_align_2_2:
4091 ; CHECK: ; %bb.0: ; %entry
4092 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4093 ; CHECK-NEXT: global_load_dwordx4 v[1:4], v[1:2], off
4094 ; CHECK-NEXT: s_waitcnt vmcnt(0)
4095 ; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
4096 ; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
4097 ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:12
4098 ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:8
4099 ; CHECK-NEXT: s_setpc_b64 s[30:31]
4101 tail call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) noundef nonnull align 2 %dst, ptr addrspace(4) noundef nonnull align 2 %src, i64 16, i1 false)
4105 define void @memcpy_p5_p4_sz31_align_2_2(ptr addrspace(5) align 2 %dst, ptr addrspace(4) align 2 readonly %src) {
4106 ; CHECK-LABEL: memcpy_p5_p4_sz31_align_2_2:
4107 ; CHECK: ; %bb.0: ; %entry
4108 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4109 ; CHECK-NEXT: s_clause 0x3
4110 ; CHECK-NEXT: global_load_dwordx3 v[5:7], v[1:2], off offset:16
4111 ; CHECK-NEXT: global_load_ushort v8, v[1:2], off offset:28
4112 ; CHECK-NEXT: global_load_ubyte v9, v[1:2], off offset:30
4113 ; CHECK-NEXT: global_load_dwordx4 v[1:4], v[1:2], off
4114 ; CHECK-NEXT: s_waitcnt vmcnt(3)
4115 ; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:24
4116 ; CHECK-NEXT: s_waitcnt vmcnt(2)
4117 ; CHECK-NEXT: buffer_store_short v8, v0, s[0:3], 0 offen offset:28
4118 ; CHECK-NEXT: s_waitcnt vmcnt(1)
4119 ; CHECK-NEXT: buffer_store_byte v9, v0, s[0:3], 0 offen offset:30
4120 ; CHECK-NEXT: s_waitcnt vmcnt(0)
4121 ; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
4122 ; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
4123 ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:12
4124 ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:8
4125 ; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:20
4126 ; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:16
4127 ; CHECK-NEXT: s_setpc_b64 s[30:31]
4129 tail call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) noundef nonnull align 2 %dst, ptr addrspace(4) noundef nonnull align 2 %src, i64 31, i1 false)
4133 define void @memcpy_p5_p4_sz32_align_2_2(ptr addrspace(5) align 2 %dst, ptr addrspace(4) align 2 readonly %src) {
4134 ; CHECK-LABEL: memcpy_p5_p4_sz32_align_2_2:
4135 ; CHECK: ; %bb.0: ; %entry
4136 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4137 ; CHECK-NEXT: s_clause 0x1
4138 ; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off
4139 ; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:16
4140 ; CHECK-NEXT: s_waitcnt vmcnt(1)
4141 ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:4
4142 ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen
4143 ; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:12
4144 ; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:8
4145 ; CHECK-NEXT: s_waitcnt vmcnt(0)
4146 ; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:20
4147 ; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:16
4148 ; CHECK-NEXT: buffer_store_dword v10, v0, s[0:3], 0 offen offset:28
4149 ; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:24
4150 ; CHECK-NEXT: s_setpc_b64 s[30:31]
4152 tail call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) noundef nonnull align 2 %dst, ptr addrspace(4) noundef nonnull align 2 %src, i64 32, i1 false)
4156 define void @memcpy_p5_p4_sz16_align_8_8(ptr addrspace(5) align 8 %dst, ptr addrspace(4) align 8 readonly %src) {
4157 ; CHECK-LABEL: memcpy_p5_p4_sz16_align_8_8:
4158 ; CHECK: ; %bb.0: ; %entry
4159 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4160 ; CHECK-NEXT: global_load_dwordx4 v[1:4], v[1:2], off
4161 ; CHECK-NEXT: s_waitcnt vmcnt(0)
4162 ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:12
4163 ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:8
4164 ; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
4165 ; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
4166 ; CHECK-NEXT: s_setpc_b64 s[30:31]
4168 tail call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) noundef nonnull align 8 %dst, ptr addrspace(4) noundef nonnull align 8 %src, i64 16, i1 false)
4172 define void @memcpy_p5_p4_sz31_align_8_8(ptr addrspace(5) align 8 %dst, ptr addrspace(4) align 8 readonly %src) {
4173 ; CHECK-LABEL: memcpy_p5_p4_sz31_align_8_8:
4174 ; CHECK: ; %bb.0: ; %entry
4175 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4176 ; CHECK-NEXT: s_clause 0x1
4177 ; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off
4178 ; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:15
4179 ; CHECK-NEXT: s_waitcnt vmcnt(1)
4180 ; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:12
4181 ; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:8
4182 ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:4
4183 ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen
4184 ; CHECK-NEXT: s_waitcnt vmcnt(0)
4185 ; CHECK-NEXT: buffer_store_dword v10, v0, s[0:3], 0 offen offset:27
4186 ; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:23
4187 ; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:19
4188 ; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:15
4189 ; CHECK-NEXT: s_setpc_b64 s[30:31]
4191 tail call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) noundef nonnull align 8 %dst, ptr addrspace(4) noundef nonnull align 8 %src, i64 31, i1 false)
4195 define void @memcpy_p5_p4_sz32_align_8_8(ptr addrspace(5) align 8 %dst, ptr addrspace(4) align 8 readonly %src) {
4196 ; CHECK-LABEL: memcpy_p5_p4_sz32_align_8_8:
4197 ; CHECK: ; %bb.0: ; %entry
4198 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4199 ; CHECK-NEXT: s_clause 0x1
4200 ; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off
4201 ; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:16
4202 ; CHECK-NEXT: s_waitcnt vmcnt(1)
4203 ; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:12
4204 ; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:8
4205 ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:4
4206 ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen
4207 ; CHECK-NEXT: s_waitcnt vmcnt(0)
4208 ; CHECK-NEXT: buffer_store_dword v10, v0, s[0:3], 0 offen offset:28
4209 ; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:24
4210 ; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:20
4211 ; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:16
4212 ; CHECK-NEXT: s_setpc_b64 s[30:31]
4214 tail call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) noundef nonnull align 8 %dst, ptr addrspace(4) noundef nonnull align 8 %src, i64 32, i1 false)
4218 define void @memcpy_p5_p4_sz16_align_16_16(ptr addrspace(5) align 16 %dst, ptr addrspace(4) align 16 readonly %src) {
4219 ; CHECK-LABEL: memcpy_p5_p4_sz16_align_16_16:
4220 ; CHECK: ; %bb.0: ; %entry
4221 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4222 ; CHECK-NEXT: global_load_dwordx4 v[1:4], v[1:2], off
4223 ; CHECK-NEXT: s_waitcnt vmcnt(0)
4224 ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:12
4225 ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:8
4226 ; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
4227 ; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
4228 ; CHECK-NEXT: s_setpc_b64 s[30:31]
4230 tail call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) noundef nonnull align 16 %dst, ptr addrspace(4) noundef nonnull align 16 %src, i64 16, i1 false)
4234 define void @memcpy_p5_p4_sz31_align_16_16(ptr addrspace(5) align 16 %dst, ptr addrspace(4) align 16 readonly %src) {
4235 ; CHECK-LABEL: memcpy_p5_p4_sz31_align_16_16:
4236 ; CHECK: ; %bb.0: ; %entry
4237 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4238 ; CHECK-NEXT: s_clause 0x1
4239 ; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off
4240 ; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:15
4241 ; CHECK-NEXT: s_waitcnt vmcnt(1)
4242 ; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:12
4243 ; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:8
4244 ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:4
4245 ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen
4246 ; CHECK-NEXT: s_waitcnt vmcnt(0)
4247 ; CHECK-NEXT: buffer_store_dword v10, v0, s[0:3], 0 offen offset:27
4248 ; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:23
4249 ; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:19
4250 ; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:15
4251 ; CHECK-NEXT: s_setpc_b64 s[30:31]
4253 tail call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) noundef nonnull align 16 %dst, ptr addrspace(4) noundef nonnull align 16 %src, i64 31, i1 false)
4257 define void @memcpy_p5_p4_sz32_align_16_16(ptr addrspace(5) align 16 %dst, ptr addrspace(4) align 16 readonly %src) {
4258 ; CHECK-LABEL: memcpy_p5_p4_sz32_align_16_16:
4259 ; CHECK: ; %bb.0: ; %entry
4260 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4261 ; CHECK-NEXT: s_clause 0x1
4262 ; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off
4263 ; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:16
4264 ; CHECK-NEXT: s_waitcnt vmcnt(1)
4265 ; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:12
4266 ; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:8
4267 ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:4
4268 ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen
4269 ; CHECK-NEXT: s_waitcnt vmcnt(0)
4270 ; CHECK-NEXT: buffer_store_dword v10, v0, s[0:3], 0 offen offset:28
4271 ; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:24
4272 ; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:20
4273 ; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:16
4274 ; CHECK-NEXT: s_setpc_b64 s[30:31]
4276 tail call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) noundef nonnull align 16 %dst, ptr addrspace(4) noundef nonnull align 16 %src, i64 32, i1 false)
4280 define void @memcpy_p5_p5_sz16_align_1_1(ptr addrspace(5) align 1 %dst, ptr addrspace(5) align 1 readonly %src) {
4281 ; CHECK-LABEL: memcpy_p5_p5_sz16_align_1_1:
4282 ; CHECK: ; %bb.0: ; %entry
4283 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4284 ; CHECK-NEXT: s_clause 0x3
4285 ; CHECK-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen offset:8
4286 ; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:12
4287 ; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen
4288 ; CHECK-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen offset:4
4289 ; CHECK-NEXT: s_waitcnt vmcnt(3)
4290 ; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:8
4291 ; CHECK-NEXT: s_waitcnt vmcnt(2)
4292 ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:12
4293 ; CHECK-NEXT: s_waitcnt vmcnt(1)
4294 ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen
4295 ; CHECK-NEXT: s_waitcnt vmcnt(0)
4296 ; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:4
4297 ; CHECK-NEXT: s_setpc_b64 s[30:31]
4299 tail call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) noundef nonnull align 1 %dst, ptr addrspace(5) noundef nonnull align 1 %src, i64 16, i1 false)
4303 define void @memcpy_p5_p5_sz31_align_1_1(ptr addrspace(5) align 1 %dst, ptr addrspace(5) align 1 readonly %src) {
4304 ; CHECK-LABEL: memcpy_p5_p5_sz31_align_1_1:
4305 ; CHECK: ; %bb.0: ; %entry
4306 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4307 ; CHECK-NEXT: s_clause 0x8
4308 ; CHECK-NEXT: buffer_load_ushort v2, v1, s[0:3], 0 offen offset:28
4309 ; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:24
4310 ; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:16
4311 ; CHECK-NEXT: buffer_load_dword v5, v1, s[0:3], 0 offen offset:20
4312 ; CHECK-NEXT: buffer_load_dword v6, v1, s[0:3], 0 offen offset:8
4313 ; CHECK-NEXT: buffer_load_dword v7, v1, s[0:3], 0 offen offset:12
4314 ; CHECK-NEXT: buffer_load_dword v8, v1, s[0:3], 0 offen
4315 ; CHECK-NEXT: buffer_load_dword v9, v1, s[0:3], 0 offen offset:4
4316 ; CHECK-NEXT: buffer_load_ubyte v1, v1, s[0:3], 0 offen offset:30
4317 ; CHECK-NEXT: s_waitcnt vmcnt(8)
4318 ; CHECK-NEXT: buffer_store_short v2, v0, s[0:3], 0 offen offset:28
4319 ; CHECK-NEXT: s_waitcnt vmcnt(7)
4320 ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:24
4321 ; CHECK-NEXT: s_waitcnt vmcnt(6)
4322 ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:16
4323 ; CHECK-NEXT: s_waitcnt vmcnt(5)
4324 ; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:20
4325 ; CHECK-NEXT: s_waitcnt vmcnt(4)
4326 ; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:8
4327 ; CHECK-NEXT: s_waitcnt vmcnt(3)
4328 ; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:12
4329 ; CHECK-NEXT: s_waitcnt vmcnt(2)
4330 ; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen
4331 ; CHECK-NEXT: s_waitcnt vmcnt(1)
4332 ; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:4
4333 ; CHECK-NEXT: s_waitcnt vmcnt(0)
4334 ; CHECK-NEXT: buffer_store_byte v1, v0, s[0:3], 0 offen offset:30
4335 ; CHECK-NEXT: s_setpc_b64 s[30:31]
4337 tail call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) noundef nonnull align 1 %dst, ptr addrspace(5) noundef nonnull align 1 %src, i64 31, i1 false)
4341 define void @memcpy_p5_p5_sz32_align_1_1(ptr addrspace(5) align 1 %dst, ptr addrspace(5) align 1 readonly %src) {
4342 ; CHECK-LABEL: memcpy_p5_p5_sz32_align_1_1:
4343 ; CHECK: ; %bb.0: ; %entry
4344 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4345 ; CHECK-NEXT: s_clause 0x7
4346 ; CHECK-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen offset:24
4347 ; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:28
4348 ; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:16
4349 ; CHECK-NEXT: buffer_load_dword v5, v1, s[0:3], 0 offen offset:20
4350 ; CHECK-NEXT: buffer_load_dword v6, v1, s[0:3], 0 offen offset:8
4351 ; CHECK-NEXT: buffer_load_dword v7, v1, s[0:3], 0 offen offset:12
4352 ; CHECK-NEXT: buffer_load_dword v8, v1, s[0:3], 0 offen
4353 ; CHECK-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen offset:4
4354 ; CHECK-NEXT: s_waitcnt vmcnt(7)
4355 ; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:24
4356 ; CHECK-NEXT: s_waitcnt vmcnt(6)
4357 ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:28
4358 ; CHECK-NEXT: s_waitcnt vmcnt(5)
4359 ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:16
4360 ; CHECK-NEXT: s_waitcnt vmcnt(4)
4361 ; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:20
4362 ; CHECK-NEXT: s_waitcnt vmcnt(3)
4363 ; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:8
4364 ; CHECK-NEXT: s_waitcnt vmcnt(2)
4365 ; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:12
4366 ; CHECK-NEXT: s_waitcnt vmcnt(1)
4367 ; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen
4368 ; CHECK-NEXT: s_waitcnt vmcnt(0)
4369 ; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:4
4370 ; CHECK-NEXT: s_setpc_b64 s[30:31]
4372 tail call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) noundef nonnull align 1 %dst, ptr addrspace(5) noundef nonnull align 1 %src, i64 32, i1 false)
4376 define void @memcpy_p5_p5_sz16_align_2_2(ptr addrspace(5) align 2 %dst, ptr addrspace(5) align 2 readonly %src) {
4377 ; CHECK-LABEL: memcpy_p5_p5_sz16_align_2_2:
4378 ; CHECK: ; %bb.0: ; %entry
4379 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4380 ; CHECK-NEXT: s_clause 0x3
4381 ; CHECK-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen offset:8
4382 ; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:12
4383 ; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen
4384 ; CHECK-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen offset:4
4385 ; CHECK-NEXT: s_waitcnt vmcnt(3)
4386 ; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:8
4387 ; CHECK-NEXT: s_waitcnt vmcnt(2)
4388 ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:12
4389 ; CHECK-NEXT: s_waitcnt vmcnt(1)
4390 ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen
4391 ; CHECK-NEXT: s_waitcnt vmcnt(0)
4392 ; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:4
4393 ; CHECK-NEXT: s_setpc_b64 s[30:31]
4395 tail call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) noundef nonnull align 2 %dst, ptr addrspace(5) noundef nonnull align 2 %src, i64 16, i1 false)
4399 define void @memcpy_p5_p5_sz31_align_2_2(ptr addrspace(5) align 2 %dst, ptr addrspace(5) align 2 readonly %src) {
4400 ; CHECK-LABEL: memcpy_p5_p5_sz31_align_2_2:
4401 ; CHECK: ; %bb.0: ; %entry
4402 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4403 ; CHECK-NEXT: s_clause 0x8
4404 ; CHECK-NEXT: buffer_load_ushort v2, v1, s[0:3], 0 offen offset:28
4405 ; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:24
4406 ; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:16
4407 ; CHECK-NEXT: buffer_load_dword v5, v1, s[0:3], 0 offen offset:20
4408 ; CHECK-NEXT: buffer_load_dword v6, v1, s[0:3], 0 offen offset:8
4409 ; CHECK-NEXT: buffer_load_dword v7, v1, s[0:3], 0 offen offset:12
4410 ; CHECK-NEXT: buffer_load_dword v8, v1, s[0:3], 0 offen
4411 ; CHECK-NEXT: buffer_load_dword v9, v1, s[0:3], 0 offen offset:4
4412 ; CHECK-NEXT: buffer_load_ubyte v1, v1, s[0:3], 0 offen offset:30
4413 ; CHECK-NEXT: s_waitcnt vmcnt(8)
4414 ; CHECK-NEXT: buffer_store_short v2, v0, s[0:3], 0 offen offset:28
4415 ; CHECK-NEXT: s_waitcnt vmcnt(7)
4416 ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:24
4417 ; CHECK-NEXT: s_waitcnt vmcnt(6)
4418 ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:16
4419 ; CHECK-NEXT: s_waitcnt vmcnt(5)
4420 ; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:20
4421 ; CHECK-NEXT: s_waitcnt vmcnt(4)
4422 ; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:8
4423 ; CHECK-NEXT: s_waitcnt vmcnt(3)
4424 ; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:12
4425 ; CHECK-NEXT: s_waitcnt vmcnt(2)
4426 ; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen
4427 ; CHECK-NEXT: s_waitcnt vmcnt(1)
4428 ; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:4
4429 ; CHECK-NEXT: s_waitcnt vmcnt(0)
4430 ; CHECK-NEXT: buffer_store_byte v1, v0, s[0:3], 0 offen offset:30
4431 ; CHECK-NEXT: s_setpc_b64 s[30:31]
4433 tail call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) noundef nonnull align 2 %dst, ptr addrspace(5) noundef nonnull align 2 %src, i64 31, i1 false)
4437 define void @memcpy_p5_p5_sz32_align_2_2(ptr addrspace(5) align 2 %dst, ptr addrspace(5) align 2 readonly %src) {
4438 ; CHECK-LABEL: memcpy_p5_p5_sz32_align_2_2:
4439 ; CHECK: ; %bb.0: ; %entry
4440 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4441 ; CHECK-NEXT: s_clause 0x7
4442 ; CHECK-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen offset:24
4443 ; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:28
4444 ; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:16
4445 ; CHECK-NEXT: buffer_load_dword v5, v1, s[0:3], 0 offen offset:20
4446 ; CHECK-NEXT: buffer_load_dword v6, v1, s[0:3], 0 offen offset:8
4447 ; CHECK-NEXT: buffer_load_dword v7, v1, s[0:3], 0 offen offset:12
4448 ; CHECK-NEXT: buffer_load_dword v8, v1, s[0:3], 0 offen
4449 ; CHECK-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen offset:4
4450 ; CHECK-NEXT: s_waitcnt vmcnt(7)
4451 ; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:24
4452 ; CHECK-NEXT: s_waitcnt vmcnt(6)
4453 ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:28
4454 ; CHECK-NEXT: s_waitcnt vmcnt(5)
4455 ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:16
4456 ; CHECK-NEXT: s_waitcnt vmcnt(4)
4457 ; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:20
4458 ; CHECK-NEXT: s_waitcnt vmcnt(3)
4459 ; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:8
4460 ; CHECK-NEXT: s_waitcnt vmcnt(2)
4461 ; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:12
4462 ; CHECK-NEXT: s_waitcnt vmcnt(1)
4463 ; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen
4464 ; CHECK-NEXT: s_waitcnt vmcnt(0)
4465 ; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:4
4466 ; CHECK-NEXT: s_setpc_b64 s[30:31]
4468 tail call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) noundef nonnull align 2 %dst, ptr addrspace(5) noundef nonnull align 2 %src, i64 32, i1 false)
4472 define void @memcpy_p5_p5_sz16_align_8_8(ptr addrspace(5) align 8 %dst, ptr addrspace(5) align 8 readonly %src) {
4473 ; CHECK-LABEL: memcpy_p5_p5_sz16_align_8_8:
4474 ; CHECK: ; %bb.0: ; %entry
4475 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4476 ; CHECK-NEXT: s_clause 0x3
4477 ; CHECK-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen
4478 ; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:4
4479 ; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:8
4480 ; CHECK-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen offset:12
4481 ; CHECK-NEXT: s_waitcnt vmcnt(3)
4482 ; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen
4483 ; CHECK-NEXT: s_waitcnt vmcnt(2)
4484 ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:4
4485 ; CHECK-NEXT: s_waitcnt vmcnt(1)
4486 ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:8
4487 ; CHECK-NEXT: s_waitcnt vmcnt(0)
4488 ; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:12
4489 ; CHECK-NEXT: s_setpc_b64 s[30:31]
4491 tail call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) noundef nonnull align 8 %dst, ptr addrspace(5) noundef nonnull align 8 %src, i64 16, i1 false)
4495 define void @memcpy_p5_p5_sz31_align_8_8(ptr addrspace(5) align 8 %dst, ptr addrspace(5) align 8 readonly %src) {
4496 ; CHECK-LABEL: memcpy_p5_p5_sz31_align_8_8:
4497 ; CHECK: ; %bb.0: ; %entry
4498 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4499 ; CHECK-NEXT: s_clause 0x7
4500 ; CHECK-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen offset:15
4501 ; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:19
4502 ; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:23
4503 ; CHECK-NEXT: buffer_load_dword v5, v1, s[0:3], 0 offen offset:27
4504 ; CHECK-NEXT: buffer_load_dword v6, v1, s[0:3], 0 offen
4505 ; CHECK-NEXT: buffer_load_dword v7, v1, s[0:3], 0 offen offset:4
4506 ; CHECK-NEXT: buffer_load_dword v8, v1, s[0:3], 0 offen offset:8
4507 ; CHECK-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen offset:12
4508 ; CHECK-NEXT: s_waitcnt vmcnt(7)
4509 ; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:15
4510 ; CHECK-NEXT: s_waitcnt vmcnt(6)
4511 ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:19
4512 ; CHECK-NEXT: s_waitcnt vmcnt(5)
4513 ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:23
4514 ; CHECK-NEXT: s_waitcnt vmcnt(4)
4515 ; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:27
4516 ; CHECK-NEXT: s_waitcnt vmcnt(3)
4517 ; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen
4518 ; CHECK-NEXT: s_waitcnt vmcnt(2)
4519 ; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:4
4520 ; CHECK-NEXT: s_waitcnt vmcnt(1)
4521 ; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:8
4522 ; CHECK-NEXT: s_waitcnt vmcnt(0)
4523 ; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:12
4524 ; CHECK-NEXT: s_setpc_b64 s[30:31]
4526 tail call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) noundef nonnull align 8 %dst, ptr addrspace(5) noundef nonnull align 8 %src, i64 31, i1 false)
4530 define void @memcpy_p5_p5_sz32_align_8_8(ptr addrspace(5) align 8 %dst, ptr addrspace(5) align 8 readonly %src) {
4531 ; CHECK-LABEL: memcpy_p5_p5_sz32_align_8_8:
4532 ; CHECK: ; %bb.0: ; %entry
4533 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4534 ; CHECK-NEXT: s_clause 0x7
4535 ; CHECK-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen offset:16
4536 ; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:20
4537 ; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:24
4538 ; CHECK-NEXT: buffer_load_dword v5, v1, s[0:3], 0 offen offset:28
4539 ; CHECK-NEXT: buffer_load_dword v6, v1, s[0:3], 0 offen
4540 ; CHECK-NEXT: buffer_load_dword v7, v1, s[0:3], 0 offen offset:4
4541 ; CHECK-NEXT: buffer_load_dword v8, v1, s[0:3], 0 offen offset:8
4542 ; CHECK-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen offset:12
4543 ; CHECK-NEXT: s_waitcnt vmcnt(7)
4544 ; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:16
4545 ; CHECK-NEXT: s_waitcnt vmcnt(6)
4546 ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:20
4547 ; CHECK-NEXT: s_waitcnt vmcnt(5)
4548 ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:24
4549 ; CHECK-NEXT: s_waitcnt vmcnt(4)
4550 ; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:28
4551 ; CHECK-NEXT: s_waitcnt vmcnt(3)
4552 ; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen
4553 ; CHECK-NEXT: s_waitcnt vmcnt(2)
4554 ; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:4
4555 ; CHECK-NEXT: s_waitcnt vmcnt(1)
4556 ; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:8
4557 ; CHECK-NEXT: s_waitcnt vmcnt(0)
4558 ; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:12
4559 ; CHECK-NEXT: s_setpc_b64 s[30:31]
4561 tail call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) noundef nonnull align 8 %dst, ptr addrspace(5) noundef nonnull align 8 %src, i64 32, i1 false)
4565 define void @memcpy_p5_p5_sz16_align_16_16(ptr addrspace(5) align 16 %dst, ptr addrspace(5) align 16 readonly %src) {
4566 ; CHECK-LABEL: memcpy_p5_p5_sz16_align_16_16:
4567 ; CHECK: ; %bb.0: ; %entry
4568 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4569 ; CHECK-NEXT: s_clause 0x3
4570 ; CHECK-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen
4571 ; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:4
4572 ; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:8
4573 ; CHECK-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen offset:12
4574 ; CHECK-NEXT: s_waitcnt vmcnt(3)
4575 ; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen
4576 ; CHECK-NEXT: s_waitcnt vmcnt(2)
4577 ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:4
4578 ; CHECK-NEXT: s_waitcnt vmcnt(1)
4579 ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:8
4580 ; CHECK-NEXT: s_waitcnt vmcnt(0)
4581 ; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:12
4582 ; CHECK-NEXT: s_setpc_b64 s[30:31]
4584 tail call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) noundef nonnull align 16 %dst, ptr addrspace(5) noundef nonnull align 16 %src, i64 16, i1 false)
4588 define void @memcpy_p5_p5_sz31_align_16_16(ptr addrspace(5) align 16 %dst, ptr addrspace(5) align 16 readonly %src) {
4589 ; CHECK-LABEL: memcpy_p5_p5_sz31_align_16_16:
4590 ; CHECK: ; %bb.0: ; %entry
4591 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4592 ; CHECK-NEXT: s_clause 0x7
4593 ; CHECK-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen offset:15
4594 ; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:19
4595 ; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:23
4596 ; CHECK-NEXT: buffer_load_dword v5, v1, s[0:3], 0 offen offset:27
4597 ; CHECK-NEXT: buffer_load_dword v6, v1, s[0:3], 0 offen
4598 ; CHECK-NEXT: buffer_load_dword v7, v1, s[0:3], 0 offen offset:4
4599 ; CHECK-NEXT: buffer_load_dword v8, v1, s[0:3], 0 offen offset:8
4600 ; CHECK-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen offset:12
4601 ; CHECK-NEXT: s_waitcnt vmcnt(7)
4602 ; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:15
4603 ; CHECK-NEXT: s_waitcnt vmcnt(6)
4604 ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:19
4605 ; CHECK-NEXT: s_waitcnt vmcnt(5)
4606 ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:23
4607 ; CHECK-NEXT: s_waitcnt vmcnt(4)
4608 ; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:27
4609 ; CHECK-NEXT: s_waitcnt vmcnt(3)
4610 ; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen
4611 ; CHECK-NEXT: s_waitcnt vmcnt(2)
4612 ; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:4
4613 ; CHECK-NEXT: s_waitcnt vmcnt(1)
4614 ; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:8
4615 ; CHECK-NEXT: s_waitcnt vmcnt(0)
4616 ; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:12
4617 ; CHECK-NEXT: s_setpc_b64 s[30:31]
4619 tail call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) noundef nonnull align 16 %dst, ptr addrspace(5) noundef nonnull align 16 %src, i64 31, i1 false)
4623 define void @memcpy_p5_p5_sz32_align_16_16(ptr addrspace(5) align 16 %dst, ptr addrspace(5) align 16 readonly %src) {
4624 ; CHECK-LABEL: memcpy_p5_p5_sz32_align_16_16:
4625 ; CHECK: ; %bb.0: ; %entry
4626 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4627 ; CHECK-NEXT: s_clause 0x7
4628 ; CHECK-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen offset:16
4629 ; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:20
4630 ; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:24
4631 ; CHECK-NEXT: buffer_load_dword v5, v1, s[0:3], 0 offen offset:28
4632 ; CHECK-NEXT: buffer_load_dword v6, v1, s[0:3], 0 offen
4633 ; CHECK-NEXT: buffer_load_dword v7, v1, s[0:3], 0 offen offset:4
4634 ; CHECK-NEXT: buffer_load_dword v8, v1, s[0:3], 0 offen offset:8
4635 ; CHECK-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen offset:12
4636 ; CHECK-NEXT: s_waitcnt vmcnt(7)
4637 ; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:16
4638 ; CHECK-NEXT: s_waitcnt vmcnt(6)
4639 ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:20
4640 ; CHECK-NEXT: s_waitcnt vmcnt(5)
4641 ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:24
4642 ; CHECK-NEXT: s_waitcnt vmcnt(4)
4643 ; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:28
4644 ; CHECK-NEXT: s_waitcnt vmcnt(3)
4645 ; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen
4646 ; CHECK-NEXT: s_waitcnt vmcnt(2)
4647 ; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:4
4648 ; CHECK-NEXT: s_waitcnt vmcnt(1)
4649 ; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:8
4650 ; CHECK-NEXT: s_waitcnt vmcnt(0)
4651 ; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:12
4652 ; CHECK-NEXT: s_setpc_b64 s[30:31]
4654 tail call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) noundef nonnull align 16 %dst, ptr addrspace(5) noundef nonnull align 16 %src, i64 32, i1 false)
4658 declare void @llvm.memcpy.p0.p0.i64(ptr addrspace(0) noalias nocapture writeonly, ptr addrspace(0) noalias nocapture readonly, i64, i1 immarg) #2
4659 declare void @llvm.memcpy.p0.p1.i64(ptr addrspace(0) noalias nocapture writeonly, ptr addrspace(1) noalias nocapture readonly, i64, i1 immarg) #2
4660 declare void @llvm.memcpy.p0.p3.i64(ptr addrspace(0) noalias nocapture writeonly, ptr addrspace(3) noalias nocapture readonly, i64, i1 immarg) #2
4661 declare void @llvm.memcpy.p0.p4.i64(ptr addrspace(0) noalias nocapture writeonly, ptr addrspace(4) noalias nocapture readonly, i64, i1 immarg) #2
4662 declare void @llvm.memcpy.p0.p5.i64(ptr addrspace(0) noalias nocapture writeonly, ptr addrspace(5) noalias nocapture readonly, i64, i1 immarg) #2
4663 declare void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) noalias nocapture writeonly, ptr addrspace(0) noalias nocapture readonly, i64, i1 immarg) #2
4664 declare void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) noalias nocapture writeonly, ptr addrspace(1) noalias nocapture readonly, i64, i1 immarg) #2
4665 declare void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) noalias nocapture writeonly, ptr addrspace(3) noalias nocapture readonly, i64, i1 immarg) #2
4666 declare void @llvm.memcpy.p1.p4.i64(ptr addrspace(1) noalias nocapture writeonly, ptr addrspace(4) noalias nocapture readonly, i64, i1 immarg) #2
4667 declare void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) noalias nocapture writeonly, ptr addrspace(5) noalias nocapture readonly, i64, i1 immarg) #2
4668 declare void @llvm.memcpy.p3.p0.i64(ptr addrspace(3) noalias nocapture writeonly, ptr addrspace(0) noalias nocapture readonly, i64, i1 immarg) #2
4669 declare void @llvm.memcpy.p3.p1.i64(ptr addrspace(3) noalias nocapture writeonly, ptr addrspace(1) noalias nocapture readonly, i64, i1 immarg) #2
4670 declare void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noalias nocapture writeonly, ptr addrspace(3) noalias nocapture readonly, i64, i1 immarg) #2
4671 declare void @llvm.memcpy.p3.p4.i64(ptr addrspace(3) noalias nocapture writeonly, ptr addrspace(4) noalias nocapture readonly, i64, i1 immarg) #2
4672 declare void @llvm.memcpy.p3.p5.i64(ptr addrspace(3) noalias nocapture writeonly, ptr addrspace(5) noalias nocapture readonly, i64, i1 immarg) #2
4673 declare void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) noalias nocapture writeonly, ptr addrspace(0) noalias nocapture readonly, i64, i1 immarg) #2
4674 declare void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) noalias nocapture writeonly, ptr addrspace(1) noalias nocapture readonly, i64, i1 immarg) #2
4675 declare void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) noalias nocapture writeonly, ptr addrspace(3) noalias nocapture readonly, i64, i1 immarg) #2
4676 declare void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) noalias nocapture writeonly, ptr addrspace(4) noalias nocapture readonly, i64, i1 immarg) #2
4677 declare void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) noalias nocapture writeonly, ptr addrspace(5) noalias nocapture readonly, i64, i1 immarg) #2
4679 attributes #0 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }