test/CodeGen/AMDGPU/memory-legalizer-store.ll

   1 ; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX8,GFX89 %s
   2 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX8,GFX89 %s
   3 ; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx900 -verify-machineinstrs -amdgpu-enable-global-sgpr-addr < %s | FileCheck --check-prefixes=GCN,GFX9,GFX89 %s
   4 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -amdgpu-enable-global-sgpr-addr < %s | FileCheck --check-prefixes=GCN,GFX9,GFX89 %s
   5
   6 declare i32 @llvm.amdgcn.workitem.id.x()
   7
   8 ; GCN-LABEL: {{^}}system_unordered:
   9 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
  10 ; GCN:       flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
  11 define amdgpu_kernel void @system_unordered(
  12     i32 %in, i32* %out) {
  13 entry:
  14   store atomic i32 %in, i32* %out unordered, align 4
  15   ret void
  16 }
  17
  18 ; GCN-LABEL: {{^}}system_monotonic:
  19 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
  20 ; GCN:       flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
  21 define amdgpu_kernel void @system_monotonic(
  22     i32 %in, i32* %out) {
  23 entry:
  24   store atomic i32 %in, i32* %out monotonic, align 4
  25   ret void
  26 }
  27
  28 ; GCN-LABEL: {{^}}system_release:
  29 ; GCN:        s_waitcnt vmcnt(0){{$}}
  30 ; GCN-NEXT:   flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
  31 define amdgpu_kernel void @system_release(
  32     i32 %in, i32* %out) {
  33 entry:
  34   store atomic i32 %in, i32* %out release, align 4
  35   ret void
  36 }
  37
  38 ; GCN-LABEL: {{^}}system_seq_cst:
  39 ; GCN:        s_waitcnt vmcnt(0){{$}}
  40 ; GCN-NEXT:   flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
  41 define amdgpu_kernel void @system_seq_cst(
  42     i32 %in, i32* %out) {
  43 entry:
  44   store atomic i32 %in, i32* %out seq_cst, align 4
  45   ret void
  46 }
  47
  48 ; GCN-LABEL: {{^}}singlethread_unordered:
  49 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
  50 ; GCN:       flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
  51 define amdgpu_kernel void @singlethread_unordered(
  52     i32 %in, i32* %out) {
  53 entry:
  54   store atomic i32 %in, i32* %out syncscope("singlethread") unordered, align 4
  55   ret void
  56 }
  57
  58 ; GCN-LABEL: {{^}}singlethread_monotonic:
  59 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
  60 ; GCN:       flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
  61 define amdgpu_kernel void @singlethread_monotonic(
  62     i32 %in, i32* %out) {
  63 entry:
  64   store atomic i32 %in, i32* %out syncscope("singlethread") monotonic, align 4
  65   ret void
  66 }
  67
  68 ; GCN-LABEL: {{^}}singlethread_release:
  69 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
  70 ; GCN:       flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
  71 define amdgpu_kernel void @singlethread_release(
  72     i32 %in, i32* %out) {
  73 entry:
  74   store atomic i32 %in, i32* %out syncscope("singlethread") release, align 4
  75   ret void
  76 }
  77
  78 ; GCN-LABEL: {{^}}singlethread_seq_cst:
  79 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
  80 ; GCN:       flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
  81 define amdgpu_kernel void @singlethread_seq_cst(
  82     i32 %in, i32* %out) {
  83 entry:
  84   store atomic i32 %in, i32* %out syncscope("singlethread") seq_cst, align 4
  85   ret void
  86 }
  87
  88 ; GCN-LABEL: {{^}}agent_unordered:
  89 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
  90 ; GCN:       flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
  91 define amdgpu_kernel void @agent_unordered(
  92     i32 %in, i32* %out) {
  93 entry:
  94   store atomic i32 %in, i32* %out syncscope("agent") unordered, align 4
  95   ret void
  96 }
  97
  98 ; GCN-LABEL: {{^}}agent_monotonic:
  99 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
 100 ; GCN:       flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
 101 define amdgpu_kernel void @agent_monotonic(
 102     i32 %in, i32* %out) {
 103 entry:
 104   store atomic i32 %in, i32* %out syncscope("agent") monotonic, align 4
 105   ret void
 106 }
 107
 108 ; GCN-LABEL: {{^}}agent_release:
 109 ; GCN:        s_waitcnt vmcnt(0){{$}}
 110 ; GCN-NEXT:   flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
 111 define amdgpu_kernel void @agent_release(
 112     i32 %in, i32* %out) {
 113 entry:
 114   store atomic i32 %in, i32* %out syncscope("agent") release, align 4
 115   ret void
 116 }
 117
 118 ; GCN-LABEL: {{^}}agent_seq_cst:
 119 ; GCN:        s_waitcnt vmcnt(0){{$}}
 120 ; GCN-NEXT:   flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
 121 define amdgpu_kernel void @agent_seq_cst(
 122     i32 %in, i32* %out) {
 123 entry:
 124   store atomic i32 %in, i32* %out syncscope("agent") seq_cst, align 4
 125   ret void
 126 }
 127
 128 ; GCN-LABEL: {{^}}workgroup_unordered:
 129 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
 130 ; GCN:       flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
 131 define amdgpu_kernel void @workgroup_unordered(
 132     i32 %in, i32* %out) {
 133 entry:
 134   store atomic i32 %in, i32* %out syncscope("workgroup") unordered, align 4
 135   ret void
 136 }
 137
 138 ; GCN-LABEL: {{^}}workgroup_monotonic:
 139 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
 140 ; GCN:       flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
 141 define amdgpu_kernel void @workgroup_monotonic(
 142     i32 %in, i32* %out) {
 143 entry:
 144   store atomic i32 %in, i32* %out syncscope("workgroup") monotonic, align 4
 145   ret void
 146 }
 147
 148 ; GCN-LABEL: {{^}}workgroup_release:
 149 ; GFX89-NOT:  s_waitcnt vmcnt(0){{$}}
 150 ; GCN:        flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
 151 define amdgpu_kernel void @workgroup_release(
 152     i32 %in, i32* %out) {
 153 entry:
 154   store atomic i32 %in, i32* %out syncscope("workgroup") release, align 4
 155   ret void
 156 }
 157
 158 ; GCN-LABEL: {{^}}workgroup_seq_cst:
 159 ; GFX89-NOT:  s_waitcnt vmcnt(0){{$}}
 160 ; GCN:        flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
 161 define amdgpu_kernel void @workgroup_seq_cst(
 162     i32 %in, i32* %out) {
 163 entry:
 164   store atomic i32 %in, i32* %out syncscope("workgroup") seq_cst, align 4
 165   ret void
 166 }
 167
 168 ; GCN-LABEL: {{^}}wavefront_unordered:
 169 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
 170 ; GCN:       flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
 171 define amdgpu_kernel void @wavefront_unordered(
 172     i32 %in, i32* %out) {
 173 entry:
 174   store atomic i32 %in, i32* %out syncscope("wavefront") unordered, align 4
 175   ret void
 176 }
 177
 178 ; GCN-LABEL: {{^}}wavefront_monotonic:
 179 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
 180 ; GCN:       flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
 181 define amdgpu_kernel void @wavefront_monotonic(
 182     i32 %in, i32* %out) {
 183 entry:
 184   store atomic i32 %in, i32* %out syncscope("wavefront") monotonic, align 4
 185   ret void
 186 }
 187
 188 ; GCN-LABEL: {{^}}wavefront_release:
 189 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
 190 ; GCN:       flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
 191 define amdgpu_kernel void @wavefront_release(
 192     i32 %in, i32* %out) {
 193 entry:
 194   store atomic i32 %in, i32* %out syncscope("wavefront") release, align 4
 195   ret void
 196 }
 197
 198 ; GCN-LABEL: {{^}}wavefront_seq_cst:
 199 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
 200 ; GCN:       flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
 201 define amdgpu_kernel void @wavefront_seq_cst(
 202     i32 %in, i32* %out) {
 203 entry:
 204   store atomic i32 %in, i32* %out syncscope("wavefront") seq_cst, align 4
 205   ret void
 206 }
 207
 208 ; GCN-LABEL: {{^}}nontemporal_private_0:
 209 ; GFX89: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen glc slc{{$}}
 210 define amdgpu_kernel void @nontemporal_private_0(
 211     i32* %in, i32 addrspace(5)* %out) {
 212 entry:
 213   %val = load i32, i32* %in, align 4
 214   store i32 %val, i32 addrspace(5)* %out, !nontemporal !0
 215   ret void
 216 }
 217
 218 ; GCN-LABEL: {{^}}nontemporal_private_1:
 219 ; GFX89: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen glc slc{{$}}
 220 define amdgpu_kernel void @nontemporal_private_1(
 221     i32* %in, i32 addrspace(5)* %out) {
 222 entry:
 223   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 224   %val = load i32, i32* %in, align 4
 225   %out.gep = getelementptr inbounds i32, i32 addrspace(5)* %out, i32 %tid
 226   store i32 %val, i32 addrspace(5)* %out.gep, !nontemporal !0
 227   ret void
 228 }
 229
 230 ; GCN-LABEL: {{^}}nontemporal_global_0:
 231 ; GFX8:  flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc slc{{$}}
 232 ; GFX9:  global_store_dword v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off glc slc{{$}}
 233 define amdgpu_kernel void @nontemporal_global_0(
 234     i32* %in, i32 addrspace(1)* %out) {
 235 entry:
 236   %val = load i32, i32* %in, align 4
 237   store i32 %val, i32 addrspace(1)* %out, !nontemporal !0
 238   ret void
 239 }
 240
 241 ; GCN-LABEL: {{^}}nontemporal_global_1:
 242 ; GFX8:  flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc slc{{$}}
 243 ; GFX9:  global_store_dword v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] glc slc{{$}}
 244 define amdgpu_kernel void @nontemporal_global_1(
 245     i32* %in, i32 addrspace(1)* %out) {
 246 entry:
 247   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 248   %val = load i32, i32* %in, align 4
 249   %out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %tid
 250   store i32 %val, i32 addrspace(1)* %out.gep, !nontemporal !0
 251   ret void
 252 }
 253
 254 ; GCN-LABEL: {{^}}nontemporal_local_0:
 255 ; GCN: ds_write_b32 v{{[0-9]+}}, v{{[0-9]+}}{{$}}
 256 define amdgpu_kernel void @nontemporal_local_0(
 257     i32* %in, i32 addrspace(3)* %out) {
 258 entry:
 259   %val = load i32, i32* %in, align 4
 260   store i32 %val, i32 addrspace(3)* %out, !nontemporal !0
 261   ret void
 262 }
 263
 264 ; GCN-LABEL: {{^}}nontemporal_local_1:
 265 ; GCN: ds_write_b32 v{{[0-9]+}}, v{{[0-9]+}}{{$}}
 266 define amdgpu_kernel void @nontemporal_local_1(
 267     i32* %in, i32 addrspace(3)* %out) {
 268 entry:
 269   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 270   %val = load i32, i32* %in, align 4
 271   %out.gep = getelementptr inbounds i32, i32 addrspace(3)* %out, i32 %tid
 272   store i32 %val, i32 addrspace(3)* %out.gep, !nontemporal !0
 273   ret void
 274 }
 275
 276 ; GCN-LABEL: {{^}}nontemporal_flat_0:
 277 ; GFX89: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc slc{{$}}
 278 define amdgpu_kernel void @nontemporal_flat_0(
 279     i32* %in, i32* %out) {
 280 entry:
 281   %val = load i32, i32* %in, align 4
 282   store i32 %val, i32* %out, !nontemporal !0
 283   ret void
 284 }
 285
 286 ; GCN-LABEL: {{^}}nontemporal_flat_1:
 287 ; GFX89: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc slc{{$}}
 288 define amdgpu_kernel void @nontemporal_flat_1(
 289     i32* %in, i32* %out) {
 290 entry:
 291   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 292   %val = load i32, i32* %in, align 4
 293   %out.gep = getelementptr inbounds i32, i32* %out, i32 %tid
 294   store i32 %val, i32* %out.gep, !nontemporal !0
 295   ret void
 296 }
 297
 298 !0 = !{i32 1}