1 ; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,FUNC %s
2 ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-enable-ds128 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,FUNC %s
3 ; RUN: llc -mtriple=amdgcn -mcpu=tonga -mattr=-enable-ds128 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,FUNC %s
4 ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -mattr=-enable-ds128 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9,FUNC %s
5 ; RUN: llc -mtriple=r600 -mcpu=redwood < %s | FileCheck -check-prefixes=EG,FUNC %s
7 ; Testing for ds_read_b128
8 ; RUN: llc -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs -mattr=+enable-ds128 < %s | FileCheck -check-prefixes=CIVI,FUNC %s
9 ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs -mattr=+enable-ds128 < %s | FileCheck -check-prefixes=CIVI,FUNC %s
11 ; FUNC-LABEL: {{^}}local_load_f64:
15 ; GCN: ds_read_b64 [[VAL:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}{{$}}
16 ; GCN: ds_write_b64 v{{[0-9]+}}, [[VAL]]
20 define amdgpu_kernel void @local_load_f64(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
21 %ld = load double, ptr addrspace(3) %in
22 store double %ld, ptr addrspace(3) %out
26 ; FUNC-LABEL: {{^}}local_load_v2f64:
36 define amdgpu_kernel void @local_load_v2f64(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
38 %ld = load <2 x double>, ptr addrspace(3) %in
39 store <2 x double> %ld, ptr addrspace(3) %out
43 ; FUNC-LABEL: {{^}}local_load_v3f64:
47 ; GCN-DAG: ds_read2_b64
48 ; GCN-DAG: ds_read_b64
56 define amdgpu_kernel void @local_load_v3f64(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
58 %ld = load <3 x double>, ptr addrspace(3) %in
59 store <3 x double> %ld, ptr addrspace(3) %out
63 ; FUNC-LABEL: {{^}}local_load_v4f64:
79 define amdgpu_kernel void @local_load_v4f64(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
81 %ld = load <4 x double>, ptr addrspace(3) %in
82 store <4 x double> %ld, ptr addrspace(3) %out
86 ; FUNC-LABEL: {{^}}local_load_v8f64:
111 define amdgpu_kernel void @local_load_v8f64(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
113 %ld = load <8 x double>, ptr addrspace(3) %in
114 store <8 x double> %ld, ptr addrspace(3) %out
118 ; FUNC-LABEL: {{^}}local_load_v16f64:
119 ; SICIV: s_mov_b32 m0
170 define amdgpu_kernel void @local_load_v16f64(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
172 %ld = load <16 x double>, ptr addrspace(3) %in
173 store <16 x double> %ld, ptr addrspace(3) %out
177 ; Tests if ds_read_b128 gets generated for the 16 byte aligned load.
178 ; FUNC-LABEL: {{^}}local_load_v2f64_to_128:
181 ; CIVI: ds_write_b128
187 define amdgpu_kernel void @local_load_v2f64_to_128(ptr addrspace(3) %out, ptr addrspace(3) %in) {
189 %ld = load <2 x double>, ptr addrspace(3) %in, align 16
190 store <2 x double> %ld, ptr addrspace(3) %out, align 16
194 attributes #0 = { nounwind }