1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -passes=slp-vectorizer -slp-threshold=-18 < %s | FileCheck %s
4 ; Make sure there's no SCEV assert when the indexes are for different
7 target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"
9 define void @slp_scev_assert(i32 %idx, i64 %tmp3) #0 {
10 ; CHECK-LABEL: @slp_scev_assert(
12 ; CHECK-NEXT: [[TMP:%.*]] = addrspacecast ptr addrspace(5) undef to ptr
13 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr addrspace(5) undef, i32 [[IDX:%.*]]
14 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP]], i64 [[TMP3:%.*]]
15 ; CHECK-NEXT: store i8 0, ptr addrspace(5) [[TMP2]], align 1
16 ; CHECK-NEXT: store i8 0, ptr [[TMP4]], align 1
17 ; CHECK-NEXT: ret void
20 %tmp = addrspacecast ptr addrspace(5) undef to ptr
21 %tmp2 = getelementptr inbounds i8, ptr addrspace(5) undef, i32 %idx
22 %tmp4 = getelementptr inbounds i8, ptr %tmp, i64 %tmp3
23 store i8 0, ptr addrspace(5) %tmp2
28 define void @multi_as_reduction_different_sized(ptr addrspace(3) %lds, i32 %idx0, i64 %idx1) #0 {
29 ; CHECK-LABEL: @multi_as_reduction_different_sized(
31 ; CHECK-NEXT: [[FLAT:%.*]] = addrspacecast ptr addrspace(3) [[LDS:%.*]] to ptr
32 ; CHECK-NEXT: [[ADD0:%.*]] = add i32 [[IDX0:%.*]], 2
33 ; CHECK-NEXT: [[ADD1:%.*]] = add i64 [[IDX1:%.*]], 1
34 ; CHECK-NEXT: [[LDS_1:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[LDS]], i32 [[ADD0]]
35 ; CHECK-NEXT: [[FLAT_1:%.*]] = getelementptr inbounds i32, ptr [[FLAT]], i64 [[ADD1]]
36 ; CHECK-NEXT: [[LOAD_LDS_0:%.*]] = load i32, ptr addrspace(3) [[LDS]], align 4
37 ; CHECK-NEXT: [[LOAD_LDS_1:%.*]] = load i32, ptr addrspace(3) [[LDS_1]], align 4
38 ; CHECK-NEXT: [[LOAD_FLAT_0:%.*]] = load i32, ptr [[FLAT]], align 4
39 ; CHECK-NEXT: [[LOAD_FLAT_1:%.*]] = load i32, ptr [[FLAT_1]], align 4
40 ; CHECK-NEXT: [[SUB0:%.*]] = sub i32 [[LOAD_FLAT_0]], [[LOAD_LDS_0]]
41 ; CHECK-NEXT: [[SUB1:%.*]] = sub i32 [[LOAD_FLAT_1]], [[LOAD_LDS_1]]
42 ; CHECK-NEXT: store i32 [[SUB0]], ptr undef, align 4
43 ; CHECK-NEXT: store i32 [[SUB1]], ptr undef, align 4
44 ; CHECK-NEXT: ret void
47 %flat = addrspacecast ptr addrspace(3) %lds to ptr
48 %add0 = add i32 %idx0, 2
49 %add1 = add i64 %idx1, 1
51 %lds.1 = getelementptr inbounds i32, ptr addrspace(3) %lds, i32 %add0
52 %flat.1 = getelementptr inbounds i32, ptr %flat, i64 %add1
54 %load.lds.0 = load i32, ptr addrspace(3) %lds, align 4
55 %load.lds.1 = load i32, ptr addrspace(3) %lds.1, align 4
57 %load.flat.0 = load i32, ptr %flat, align 4
58 %load.flat.1 = load i32, ptr %flat.1, align 4
60 %sub0 = sub i32 %load.flat.0, %load.lds.0
61 %sub1 = sub i32 %load.flat.1, %load.lds.1
63 store i32 %sub0, ptr undef
64 store i32 %sub1, ptr undef
68 ; This should vectorize if using getUnderlyingObject
69 define void @multi_as_reduction_same_size(ptr addrspace(1) %global, i64 %idx0, i64 %idx1) #0 {
70 ; CHECK-LABEL: @multi_as_reduction_same_size(
72 ; CHECK-NEXT: [[FLAT:%.*]] = addrspacecast ptr addrspace(1) [[GLOBAL:%.*]] to ptr
73 ; CHECK-NEXT: [[ADD0:%.*]] = add i64 [[IDX0:%.*]], 2
74 ; CHECK-NEXT: [[ADD1:%.*]] = add i64 [[IDX1:%.*]], 1
75 ; CHECK-NEXT: [[GLOBAL_1:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[GLOBAL]], i64 [[ADD0]]
76 ; CHECK-NEXT: [[FLAT_1:%.*]] = getelementptr inbounds i32, ptr [[FLAT]], i64 [[ADD1]]
77 ; CHECK-NEXT: [[LOAD_GLOBAL_0:%.*]] = load i32, ptr addrspace(1) [[GLOBAL]], align 4
78 ; CHECK-NEXT: [[LOAD_GLOBAL_1:%.*]] = load i32, ptr addrspace(1) [[GLOBAL_1]], align 4
79 ; CHECK-NEXT: [[LOAD_FLAT_0:%.*]] = load i32, ptr [[FLAT]], align 4
80 ; CHECK-NEXT: [[LOAD_FLAT_1:%.*]] = load i32, ptr [[FLAT_1]], align 4
81 ; CHECK-NEXT: [[SUB0:%.*]] = sub i32 [[LOAD_FLAT_0]], [[LOAD_GLOBAL_0]]
82 ; CHECK-NEXT: [[SUB1:%.*]] = sub i32 [[LOAD_FLAT_1]], [[LOAD_GLOBAL_1]]
83 ; CHECK-NEXT: store i32 [[SUB0]], ptr undef, align 4
84 ; CHECK-NEXT: store i32 [[SUB1]], ptr undef, align 4
85 ; CHECK-NEXT: ret void
88 %flat = addrspacecast ptr addrspace(1) %global to ptr
89 %add0 = add i64 %idx0, 2
90 %add1 = add i64 %idx1, 1
92 %global.1 = getelementptr inbounds i32, ptr addrspace(1) %global, i64 %add0
93 %flat.1 = getelementptr inbounds i32, ptr %flat, i64 %add1
95 %load.global.0 = load i32, ptr addrspace(1) %global, align 4
96 %load.global.1 = load i32, ptr addrspace(1) %global.1, align 4
98 %load.flat.0 = load i32, ptr %flat, align 4
99 %load.flat.1 = load i32, ptr %flat.1, align 4
101 %sub0 = sub i32 %load.flat.0, %load.global.0
102 %sub1 = sub i32 %load.flat.1, %load.global.1
104 store i32 %sub0, ptr undef
105 store i32 %sub1, ptr undef
109 ; This should vectorize if using getUnderlyingObject
110 ; The add is done in the same width, even though the address space size is smaller
111 define void @multi_as_reduction_different_sized_noncanon(ptr addrspace(3) %lds, i64 %idx0, i64 %idx1) #0 {
112 ; CHECK-LABEL: @multi_as_reduction_different_sized_noncanon(
114 ; CHECK-NEXT: [[FLAT:%.*]] = addrspacecast ptr addrspace(3) [[LDS:%.*]] to ptr
115 ; CHECK-NEXT: [[ADD0:%.*]] = add i64 [[IDX0:%.*]], 2
116 ; CHECK-NEXT: [[ADD1:%.*]] = add i64 [[IDX1:%.*]], 1
117 ; CHECK-NEXT: [[LDS_1:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[LDS]], i64 [[ADD0]]
118 ; CHECK-NEXT: [[FLAT_1:%.*]] = getelementptr inbounds i32, ptr [[FLAT]], i64 [[ADD1]]
119 ; CHECK-NEXT: [[LOAD_LDS_0:%.*]] = load i32, ptr addrspace(3) [[LDS]], align 4
120 ; CHECK-NEXT: [[LOAD_LDS_1:%.*]] = load i32, ptr addrspace(3) [[LDS_1]], align 4
121 ; CHECK-NEXT: [[LOAD_FLAT_0:%.*]] = load i32, ptr [[FLAT]], align 4
122 ; CHECK-NEXT: [[LOAD_FLAT_1:%.*]] = load i32, ptr [[FLAT_1]], align 4
123 ; CHECK-NEXT: [[SUB0:%.*]] = sub i32 [[LOAD_FLAT_0]], [[LOAD_LDS_0]]
124 ; CHECK-NEXT: [[SUB1:%.*]] = sub i32 [[LOAD_FLAT_1]], [[LOAD_LDS_1]]
125 ; CHECK-NEXT: store i32 [[SUB0]], ptr undef, align 4
126 ; CHECK-NEXT: store i32 [[SUB1]], ptr undef, align 4
127 ; CHECK-NEXT: ret void
130 %flat = addrspacecast ptr addrspace(3) %lds to ptr
131 %add0 = add i64 %idx0, 2
132 %add1 = add i64 %idx1, 1
134 %lds.1 = getelementptr inbounds i32, ptr addrspace(3) %lds, i64 %add0
135 %flat.1 = getelementptr inbounds i32, ptr %flat, i64 %add1
137 %load.lds.0 = load i32, ptr addrspace(3) %lds, align 4
138 %load.lds.1 = load i32, ptr addrspace(3) %lds.1, align 4
140 %load.flat.0 = load i32, ptr %flat, align 4
141 %load.flat.1 = load i32, ptr %flat.1, align 4
143 %sub0 = sub i32 %load.flat.0, %load.lds.0
144 %sub1 = sub i32 %load.flat.1, %load.lds.1
146 store i32 %sub0, ptr undef
147 store i32 %sub1, ptr undef
151 define void @slp_crash_on_addrspacecast() {
152 ; CHECK-LABEL: @slp_crash_on_addrspacecast(
154 ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i64, ptr addrspace(3) undef, i32 undef
155 ; CHECK-NEXT: [[P0:%.*]] = addrspacecast ptr addrspace(3) [[TMP0]] to ptr
156 ; CHECK-NEXT: store i64 undef, ptr [[P0]], align 8
157 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr addrspace(3) undef, i32 undef
158 ; CHECK-NEXT: [[P1:%.*]] = addrspacecast ptr addrspace(3) [[TMP1]] to ptr
159 ; CHECK-NEXT: store i64 undef, ptr [[P1]], align 8
160 ; CHECK-NEXT: ret void
163 %0 = getelementptr inbounds i64, ptr addrspace(3) undef, i32 undef
164 %p0 = addrspacecast ptr addrspace(3) %0 to ptr
165 store i64 undef, ptr %p0, align 8
166 %1 = getelementptr inbounds i64, ptr addrspace(3) undef, i32 undef
167 %p1 = addrspacecast ptr addrspace(3) %1 to ptr
168 store i64 undef, ptr %p1, align 8