1 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -o - %s | FileCheck %s
2 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -stop-after=finalize-isel -o - %s | FileCheck --check-prefix=MIR %s
4 ; Ensure that the scoped AA is attached on loads/stores lowered from mem ops.
6 ; Re-evaluate the slot numbers of scopes as that numbering could be changed run-by-run.
8 ; MIR-DAG: ![[DOMAIN:[0-9]+]] = distinct !{!{{[0-9]+}}, !"bax"}
9 ; MIR-DAG: ![[SCOPE0:[0-9]+]] = distinct !{!{{[0-9]+}}, ![[DOMAIN]], !"bax: %p"}
10 ; MIR-DAG: ![[SCOPE1:[0-9]+]] = distinct !{!{{[0-9]+}}, ![[DOMAIN]], !"bax: %q"}
11 ; MIR-DAG: ![[SET0:[0-9]+]] = !{![[SCOPE0]]}
12 ; MIR-DAG: ![[SET1:[0-9]+]] = !{![[SCOPE1]]}
14 ; MIR-LABEL: name: test_memcpy
15 ; MIR: [[LOAD:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 %{{[0-9]+}}, 16, 0, implicit $exec :: (load (s128) from %ir.add.ptr, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]], addrspace 1)
16 ; MIR: GLOBAL_STORE_DWORDX4 %{{[0-9]+}}, killed [[LOAD]], 0, 0, implicit $exec :: (store (s128) into %ir.p, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]], addrspace 1)
17 define i32 @test_memcpy(ptr addrspace(1) nocapture %p, ptr addrspace(1) nocapture readonly %q) {
18 ; Check loads of %q are scheduled ahead of that store of the memcpy on %p.
19 ; CHECK-LABEL: test_memcpy:
20 ; CHECK-DAG: global_load_dwordx2 v[[[Q0:[0-9]+]]:[[Q1:[0-9]+]]], v[2:3], off
21 ; CHECK-DAG: global_load_dwordx4 [[PVAL:v\[[0-9]+:[0-9]+\]]], v[0:1], off offset:16
22 ; CHECK-DAG: v_add_nc_u32_e32 v{{[0-9]+}}, v[[Q0]], v[[Q1]]
23 ; CHECK: global_store_dwordx4 v[0:1], [[PVAL]], off
24 ; CHECK: s_setpc_b64 s[30:31]
25 %add.ptr = getelementptr inbounds i32, ptr addrspace(1) %p, i64 4
26 tail call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) noundef nonnull align 4 dereferenceable(16) %p, ptr addrspace(1) noundef nonnull align 4 dereferenceable(16) %add.ptr, i64 16, i1 false), !alias.scope !2, !noalias !4
27 %v0 = load i32, ptr addrspace(1) %q, align 4, !alias.scope !4, !noalias !2
28 %q1 = getelementptr inbounds i32, ptr addrspace(1) %q, i64 1
29 %v1 = load i32, ptr addrspace(1) %q1, align 4, !alias.scope !4, !noalias !2
30 %add = add i32 %v0, %v1
34 ; MIR-LABEL: name: test_memcpy_inline
35 ; MIR: [[LOAD:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 %{{[0-9]+}}, 16, 0, implicit $exec :: (load (s128) from %ir.add.ptr, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]], addrspace 1)
36 ; MIR: GLOBAL_STORE_DWORDX4 %{{[0-9]+}}, killed [[LOAD]], 0, 0, implicit $exec :: (store (s128) into %ir.p, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]], addrspace 1)
37 define i32 @test_memcpy_inline(ptr addrspace(1) nocapture %p, ptr addrspace(1) nocapture readonly %q) {
38 ; Check loads of %q are scheduled ahead of that store of the memcpy on %p.
39 ; CHECK-LABEL: test_memcpy_inline:
40 ; CHECK-DAG: global_load_dwordx2 v[[[Q0:[0-9]+]]:[[Q1:[0-9]+]]], v[2:3], off
41 ; CHECK-DAG: global_load_dwordx4 [[PVAL:v\[[0-9]+:[0-9]+\]]], v[0:1], off offset:16
42 ; CHECK-DAG: v_add_nc_u32_e32 v{{[0-9]+}}, v[[Q0]], v[[Q1]]
43 ; CHECK: global_store_dwordx4 v[0:1], [[PVAL]], off
44 ; CHECK: s_setpc_b64 s[30:31]
45 %add.ptr = getelementptr inbounds i32, ptr addrspace(1) %p, i64 4
46 tail call void @llvm.memcpy.inline.p1.p1.i64(ptr addrspace(1) noundef nonnull align 4 dereferenceable(16) %p, ptr addrspace(1) noundef nonnull align 4 dereferenceable(16) %add.ptr, i64 16, i1 false), !alias.scope !2, !noalias !4
47 %v0 = load i32, ptr addrspace(1) %q, align 4, !alias.scope !4, !noalias !2
48 %q1 = getelementptr inbounds i32, ptr addrspace(1) %q, i64 1
49 %v1 = load i32, ptr addrspace(1) %q1, align 4, !alias.scope !4, !noalias !2
50 %add = add i32 %v0, %v1
54 ; MIR-LABEL: name: test_memmove
55 ; MIR: [[LOAD:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 %{{[0-9]+}}, 16, 0, implicit $exec :: (load (s128) from %ir.add.ptr, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]], addrspace 1)
56 ; MIR: GLOBAL_STORE_DWORDX4 %{{[0-9]+}}, killed [[LOAD]], 0, 0, implicit $exec :: (store (s128) into %ir.p, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]], addrspace 1)
57 define i32 @test_memmove(ptr addrspace(1) nocapture %p, ptr addrspace(1) nocapture readonly %q) {
58 ; Check loads of %q are scheduled ahead of that store of the memmove on %p.
59 ; CHECK-LABEL: test_memmove:
60 ; CHECK-DAG: global_load_dwordx2 v[[[Q0:[0-9]+]]:[[Q1:[0-9]+]]], v[2:3], off
61 ; CHECK-DAG: global_load_dwordx4 [[PVAL:v\[[0-9]+:[0-9]+\]]], v[0:1], off offset:16
62 ; CHECK-DAG: v_add_nc_u32_e32 v{{[0-9]+}}, v[[Q0]], v[[Q1]]
63 ; CHECK: global_store_dwordx4 v[0:1], [[PVAL]]
64 ; CHECK: s_setpc_b64 s[30:31]
65 %add.ptr = getelementptr inbounds i32, ptr addrspace(1) %p, i64 4
66 tail call void @llvm.memmove.p1.p1.i64(ptr addrspace(1) noundef nonnull align 4 dereferenceable(16) %p, ptr addrspace(1) noundef nonnull align 4 dereferenceable(16) %add.ptr, i64 16, i1 false), !alias.scope !2, !noalias !4
67 %v0 = load i32, ptr addrspace(1) %q, align 4, !alias.scope !4, !noalias !2
68 %q1 = getelementptr inbounds i32, ptr addrspace(1) %q, i64 1
69 %v1 = load i32, ptr addrspace(1) %q1, align 4, !alias.scope !4, !noalias !2
70 %add = add i32 %v0, %v1
74 ; MIR-LABEL: name: test_memset
75 ; MIR: GLOBAL_STORE_DWORDX4 killed %{{[0-9]+}}, killed %{{[0-9]+}}, 0, 0, implicit $exec :: (store (s128) into %ir.p, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]], addrspace 1)
76 define i32 @test_memset(ptr addrspace(1) nocapture %p, ptr addrspace(1) nocapture readonly %q) {
77 ; Check loads of %q are scheduled ahead of that store of the memset on %p.
78 ; CHECK-LABEL: test_memset:
79 ; CHECK-DAG: global_load_dwordx2 v[[[Q0:[0-9]+]]:[[Q1:[0-9]+]]], v[2:3], off
80 ; CHECK-DAG: v_mov_b32_e32 v[[PVAL:[0-9]+]], 0xaaaaaaaa
81 ; CHECK: global_store_dwordx4 v[0:1], v[[[PVAL]]{{:[0-9]+\]}}, off
82 ; CHECK: v_add_nc_u32_e32 v{{[0-9]+}}, v[[Q0]], v[[Q1]]
83 ; CHECK: s_setpc_b64 s[30:31]
84 tail call void @llvm.memset.p1.i64(ptr addrspace(1) noundef nonnull align 4 dereferenceable(16) %p, i8 170, i64 16, i1 false), !alias.scope !2, !noalias !4
85 %v0 = load i32, ptr addrspace(1) %q, align 4, !alias.scope !4, !noalias !2
86 %q1 = getelementptr inbounds i32, ptr addrspace(1) %q, i64 1
87 %v1 = load i32, ptr addrspace(1) %q1, align 4, !alias.scope !4, !noalias !2
88 %add = add i32 %v0, %v1
92 declare void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) noalias nocapture writeonly, ptr addrspace(1) noalias nocapture readonly, i64, i1 immarg)
93 declare void @llvm.memcpy.inline.p1.p1.i64(ptr addrspace(1) noalias nocapture writeonly, ptr addrspace(1) noalias nocapture readonly, i64, i1 immarg)
94 declare void @llvm.memmove.p1.p1.i64(ptr addrspace(1) nocapture writeonly, ptr addrspace(1) nocapture readonly, i64, i1 immarg)
95 declare void @llvm.memset.p1.i64(ptr addrspace(1) nocapture writeonly, i8, i64, i1 immarg)
97 !0 = distinct !{!0, !"bax"}
98 !1 = distinct !{!1, !0, !"bax: %p"}
100 !3 = distinct !{!3, !0, !"bax: %q"}