1 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --enable-var-scope --check-prefix=GCN %s
3 ; Check that vectorizer does not create slow misaligned loads
5 ; GCN-LABEL: {{^}}ds1align1:
6 ; GCN-COUNT-2: ds_read_u8
7 ; GCN-COUNT-2: ds_write_b8
8 define amdgpu_kernel void @ds1align1(ptr addrspace(3) %in, ptr addrspace(3) %out) {
9 %val1 = load i8, ptr addrspace(3) %in, align 1
10 %gep1 = getelementptr i8, ptr addrspace(3) %in, i32 1
11 %val2 = load i8, ptr addrspace(3) %gep1, align 1
12 store i8 %val1, ptr addrspace(3) %out, align 1
13 %gep2 = getelementptr i8, ptr addrspace(3) %out, i32 1
14 store i8 %val2, ptr addrspace(3) %gep2, align 1
18 ; GCN-LABEL: {{^}}ds2align2:
19 ; GCN-COUNT-2: ds_read_u16
20 ; GCN-COUNT-2: ds_write_b16
21 define amdgpu_kernel void @ds2align2(ptr addrspace(3) %in, ptr addrspace(3) %out) {
22 %val1 = load i16, ptr addrspace(3) %in, align 2
23 %gep1 = getelementptr i16, ptr addrspace(3) %in, i32 1
24 %val2 = load i16, ptr addrspace(3) %gep1, align 2
25 store i16 %val1, ptr addrspace(3) %out, align 2
26 %gep2 = getelementptr i16, ptr addrspace(3) %out, i32 1
27 store i16 %val2, ptr addrspace(3) %gep2, align 2
31 ; GCN-LABEL: {{^}}ds4align4:
34 define amdgpu_kernel void @ds4align4(ptr addrspace(3) %in, ptr addrspace(3) %out) {
35 %val1 = load i32, ptr addrspace(3) %in, align 4
36 %gep1 = getelementptr i32, ptr addrspace(3) %in, i32 1
37 %val2 = load i32, ptr addrspace(3) %gep1, align 4
38 store i32 %val1, ptr addrspace(3) %out, align 4
39 %gep2 = getelementptr i32, ptr addrspace(3) %out, i32 1
40 store i32 %val2, ptr addrspace(3) %gep2, align 4
44 ; GCN-LABEL: {{^}}ds8align8:
47 define amdgpu_kernel void @ds8align8(ptr addrspace(3) %in, ptr addrspace(3) %out) {
48 %val1 = load i64, ptr addrspace(3) %in, align 8
49 %gep1 = getelementptr i64, ptr addrspace(3) %in, i64 1
50 %val2 = load i64, ptr addrspace(3) %gep1, align 8
51 store i64 %val1, ptr addrspace(3) %out, align 8
52 %gep2 = getelementptr i64, ptr addrspace(3) %out, i64 1
53 store i64 %val2, ptr addrspace(3) %gep2, align 8
57 ; GCN-LABEL: {{^}}ds1align2:
60 define amdgpu_kernel void @ds1align2(ptr addrspace(3) %in, ptr addrspace(3) %out) {
61 %val1 = load i8, ptr addrspace(3) %in, align 2
62 %gep1 = getelementptr i8, ptr addrspace(3) %in, i32 1
63 %val2 = load i8, ptr addrspace(3) %gep1, align 2
64 store i8 %val1, ptr addrspace(3) %out, align 2
65 %gep2 = getelementptr i8, ptr addrspace(3) %out, i32 1
66 store i8 %val2, ptr addrspace(3) %gep2, align 2
70 ; GCN-LABEL: {{^}}ds2align4:
73 define amdgpu_kernel void @ds2align4(ptr addrspace(3) %in, ptr addrspace(3) %out) {
74 %val1 = load i16, ptr addrspace(3) %in, align 4
75 %gep1 = getelementptr i16, ptr addrspace(3) %in, i32 1
76 %val2 = load i16, ptr addrspace(3) %gep1, align 4
77 store i16 %val1, ptr addrspace(3) %out, align 4
78 %gep2 = getelementptr i16, ptr addrspace(3) %out, i32 1
79 store i16 %val2, ptr addrspace(3) %gep2, align 4
83 ; GCN-LABEL: {{^}}ds4align8:
86 define amdgpu_kernel void @ds4align8(ptr addrspace(3) %in, ptr addrspace(3) %out) {
87 %val1 = load i32, ptr addrspace(3) %in, align 8
88 %gep1 = getelementptr i32, ptr addrspace(3) %in, i32 1
89 %val2 = load i32, ptr addrspace(3) %gep1, align 8
90 store i32 %val1, ptr addrspace(3) %out, align 8
91 %gep2 = getelementptr i32, ptr addrspace(3) %out, i32 1
92 store i32 %val2, ptr addrspace(3) %gep2, align 8
96 ; GCN-LABEL: {{^}}ds8align16:
99 define amdgpu_kernel void @ds8align16(ptr addrspace(3) %in, ptr addrspace(3) %out) {
100 %val1 = load i64, ptr addrspace(3) %in, align 16
101 %gep1 = getelementptr i64, ptr addrspace(3) %in, i64 1
102 %val2 = load i64, ptr addrspace(3) %gep1, align 16
103 store i64 %val1, ptr addrspace(3) %out, align 16
104 %gep2 = getelementptr i64, ptr addrspace(3) %out, i64 1
105 store i64 %val2, ptr addrspace(3) %gep2, align 16