1 ; RUN: opt -mtriple=nvptx64-nvidia-cuda -load-store-vectorizer -S -o - %s | FileCheck %s
3 ; Check that the load/store vectorizer is willing to move loads/stores across
4 ; intervening instructions only if it's safe.
6 ; - Loads can be moved across instructions that don't write or throw.
7 ; - Stores can only be moved across instructions which don't read, write, or
11 declare void @fn_nounwind() #0
12 declare void @fn_nounwind_writeonly() #1
13 declare void @fn_nounwind_readonly() #2
14 declare void @fn_writeonly() #3
15 declare void @fn_readonly() #4
16 declare void @fn_readnone() #5
18 ; CHECK-LABEL: @load_fn
20 ; CHECK: call void @fn()
22 define void @load_fn(i32* %p) #0 {
23 %p.1 = getelementptr i32, i32* %p, i32 1
25 %v0 = load i32, i32* %p, align 8
27 %v1 = load i32, i32* %p.1, align 4
31 ; CHECK-LABEL: @load_fn_nounwind
33 ; CHECK: call void @fn_nounwind()
35 define void @load_fn_nounwind(i32* %p) #0 {
36 %p.1 = getelementptr i32, i32* %p, i32 1
38 %v0 = load i32, i32* %p, align 8
39 call void @fn_nounwind() #0
40 %v1 = load i32, i32* %p.1, align 4
44 ; CHECK-LABEL: @load_fn_nounwind_writeonly
46 ; CHECK: call void @fn_nounwind_writeonly()
48 define void @load_fn_nounwind_writeonly(i32* %p) #0 {
49 %p.1 = getelementptr i32, i32* %p, i32 1
51 %v0 = load i32, i32* %p, align 8
52 call void @fn_nounwind_writeonly() #1
53 %v1 = load i32, i32* %p.1, align 4
57 ; CHECK-LABEL: @load_fn_nounwind_readonly
58 ; CHECK-DAG: load <2 x i32>
59 ; CHECK-DAG: call void @fn_nounwind_readonly()
60 define void @load_fn_nounwind_readonly(i32* %p) #0 {
61 %p.1 = getelementptr i32, i32* %p, i32 1
63 %v0 = load i32, i32* %p, align 8
64 call void @fn_nounwind_readonly() #2
65 %v1 = load i32, i32* %p.1, align 4
69 ; CHECK-LABEL: @load_fn_readonly
71 ; CHECK: call void @fn_readonly
73 define void @load_fn_readonly(i32* %p) #0 {
74 %p.1 = getelementptr i32, i32* %p, i32 1
76 %v0 = load i32, i32* %p, align 8
77 call void @fn_readonly() #4
78 %v1 = load i32, i32* %p.1, align 4
82 ; CHECK-LABEL: @load_fn_writeonly
84 ; CHECK: call void @fn_writeonly()
86 define void @load_fn_writeonly(i32* %p) #0 {
87 %p.1 = getelementptr i32, i32* %p, i32 1
89 %v0 = load i32, i32* %p, align 8
90 call void @fn_writeonly() #3
91 %v1 = load i32, i32* %p.1, align 4
95 ; CHECK-LABEL: @load_fn_readnone
96 ; CHECK-DAG: load <2 x i32>
97 ; CHECK-DAG: call void @fn_readnone()
98 define void @load_fn_readnone(i32* %p) #0 {
99 %p.1 = getelementptr i32, i32* %p, i32 1
101 %v0 = load i32, i32* %p, align 8
102 call void @fn_readnone() #5
103 %v1 = load i32, i32* %p.1, align 4
107 ; ------------------------------------------------
108 ; Same tests, but now for stores instead of loads.
109 ; ------------------------------------------------
111 ; CHECK-LABEL: @store_fn
113 ; CHECK: call void @fn()
115 define void @store_fn(i32* %p) #0 {
116 %p.1 = getelementptr i32, i32* %p, i32 1
120 store i32 0, i32* %p.1
124 ; CHECK-LABEL: @store_fn_nounwind
126 ; CHECK: call void @fn_nounwind()
128 define void @store_fn_nounwind(i32* %p) #0 {
129 %p.1 = getelementptr i32, i32* %p, i32 1
132 call void @fn_nounwind() #0
133 store i32 0, i32* %p.1
137 ; CHECK-LABEL: @store_fn_nounwind_writeonly
139 ; CHECK: call void @fn_nounwind_writeonly()
141 define void @store_fn_nounwind_writeonly(i32* %p) #0 {
142 %p.1 = getelementptr i32, i32* %p, i32 1
145 call void @fn_nounwind_writeonly() #1
146 store i32 0, i32* %p.1
150 ; CHECK-LABEL: @store_fn_nounwind_readonly
152 ; CHECK: call void @fn_nounwind_readonly()
154 define void @store_fn_nounwind_readonly(i32* %p) #0 {
155 %p.1 = getelementptr i32, i32* %p, i32 1
158 call void @fn_nounwind_readonly() #2
159 store i32 0, i32* %p.1
163 ; CHECK-LABEL: @store_fn_readonly
165 ; CHECK: call void @fn_readonly
167 define void @store_fn_readonly(i32* %p) #0 {
168 %p.1 = getelementptr i32, i32* %p, i32 1
171 call void @fn_readonly() #4
172 store i32 0, i32* %p.1
176 ; CHECK-LABEL: @store_fn_writeonly
178 ; CHECK: call void @fn_writeonly()
180 define void @store_fn_writeonly(i32* %p) #0 {
181 %p.1 = getelementptr i32, i32* %p, i32 1
184 call void @fn_writeonly() #3
185 store i32 0, i32* %p.1
189 ; This is the only store idiom we can vectorize.
190 ; CHECK-LABEL: @store_fn_readnone
191 ; CHECK-DAG: store <2 x i32>
192 ; CHECK-DAG: call void @fn_readnone()
193 define void @store_fn_readnone(i32* %p) #0 {
194 %p.1 = getelementptr i32, i32* %p, i32 1
196 store i32 0, i32* %p, align 8
197 call void @fn_readnone() #5
198 store i32 0, i32* %p.1, align 8
203 attributes #0 = { nounwind }
204 attributes #1 = { nounwind writeonly }
205 attributes #2 = { nounwind readonly }
206 attributes #3 = { writeonly }
207 attributes #4 = { readonly }
208 ; readnone implies nounwind, so no need to test separately
209 attributes #5 = { nounwind readnone }