1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
2 ; RUN: opt -passes=gvn -S < %s | FileCheck %s
5 define <4 x float> @ConvertVectors_ByRef(ptr %loc) {
6 ; CHECK-LABEL: define <4 x float> @ConvertVectors_ByRef
7 ; CHECK-SAME: (ptr [[LOC:%.*]]) {
8 ; CHECK-NEXT: [[LOAD_VEC:%.*]] = load <4 x float>, ptr [[LOC]], align 16
9 ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x float> [[LOAD_VEC]], <4 x float> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
10 ; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds [4 x float], ptr [[LOC]], i64 0, i64 1
11 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[LOAD_VEC]] to i128
12 ; CHECK-NEXT: [[TMP2:%.*]] = lshr i128 [[TMP1]], 32
13 ; CHECK-NEXT: [[TMP3:%.*]] = trunc i128 [[TMP2]] to i32
14 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32 [[TMP3]] to float
15 ; CHECK-NEXT: [[INS1:%.*]] = insertelement <4 x float> [[SHUF]], float [[TMP4]], i64 1
16 ; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds [4 x float], ptr [[LOC]], i64 0, i64 2
17 ; CHECK-NEXT: [[TMP5:%.*]] = lshr i128 [[TMP1]], 64
18 ; CHECK-NEXT: [[TMP6:%.*]] = trunc i128 [[TMP5]] to i32
19 ; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32 [[TMP6]] to float
20 ; CHECK-NEXT: [[INS2:%.*]] = insertelement <4 x float> [[INS1]], float [[TMP7]], i64 2
21 ; CHECK-NEXT: [[INS3:%.*]] = insertelement <4 x float> [[INS2]], float [[TMP7]], i64 3
22 ; CHECK-NEXT: ret <4 x float> [[INS3]]
24 %load_vec = load <4 x float>, ptr %loc, align 16
25 %shuf = shufflevector <4 x float> %load_vec, <4 x float> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
26 %gep1 = getelementptr inbounds [4 x float], ptr %loc, i64 0, i64 1
27 %load1 = load float, ptr %gep1, align 4
28 %ins1 = insertelement <4 x float> %shuf, float %load1, i64 1
29 %gep2 = getelementptr inbounds [4 x float], ptr %loc, i64 0, i64 2
30 %load2 = load float, ptr %gep2, align 8
31 %ins2 = insertelement <4 x float> %ins1, float %load2, i64 2
32 %ins3 = insertelement <4 x float> %ins2, float %load2, i64 3
36 define i64 @store_element_smaller_than_load(ptr %loc, <4 x i32> %v) {
37 ; CHECK-LABEL: define i64 @store_element_smaller_than_load
38 ; CHECK-SAME: (ptr [[LOC:%.*]], <4 x i32> [[V:%.*]]) {
40 ; CHECK-NEXT: store <4 x i32> [[V]], ptr [[LOC]], align 16
41 ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [4 x i32], ptr [[LOC]], i64 0, i64 2
42 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V]] to i128
43 ; CHECK-NEXT: [[TMP1:%.*]] = lshr i128 [[TMP0]], 64
44 ; CHECK-NEXT: [[TMP2:%.*]] = trunc i128 [[TMP1]] to i64
45 ; CHECK-NEXT: ret i64 [[TMP2]]
48 store <4 x i32> %v, ptr %loc
49 %gep = getelementptr inbounds [4 x i32], ptr %loc, i64 0, i64 2
50 %ref = load i64, ptr %gep
54 define i64 @call_before_load(ptr %loc, <4 x i32> %v) {
55 ; CHECK-LABEL: define i64 @call_before_load
56 ; CHECK-SAME: (ptr [[LOC:%.*]], <4 x i32> [[V:%.*]]) {
58 ; CHECK-NEXT: store <4 x i32> [[V]], ptr [[LOC]], align 16
59 ; CHECK-NEXT: call void @f(<4 x i32> [[V]])
60 ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [4 x i32], ptr [[LOC]], i64 0, i64 2
61 ; CHECK-NEXT: [[REF:%.*]] = load i64, ptr [[GEP]], align 4
62 ; CHECK-NEXT: ret i64 [[REF]]
65 store <4 x i32> %v, ptr %loc
66 call void @f(<4 x i32> %v)
67 %gep = getelementptr inbounds [4 x i32], ptr %loc, i64 0, i64 2
68 %ref = load i64, ptr %gep
72 define i64 @call_before_load_memory_none(ptr %loc, <4 x i32> %v) {
73 ; CHECK-LABEL: define i64 @call_before_load_memory_none
74 ; CHECK-SAME: (ptr [[LOC:%.*]], <4 x i32> [[V:%.*]]) {
76 ; CHECK-NEXT: store <4 x i32> [[V]], ptr [[LOC]], align 16
77 ; CHECK-NEXT: call void @f_no_mem(<4 x i32> [[V]])
78 ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [4 x i32], ptr [[LOC]], i64 0, i64 2
79 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V]] to i128
80 ; CHECK-NEXT: [[TMP1:%.*]] = lshr i128 [[TMP0]], 64
81 ; CHECK-NEXT: [[TMP2:%.*]] = trunc i128 [[TMP1]] to i64
82 ; CHECK-NEXT: ret i64 [[TMP2]]
85 store <4 x i32> %v, ptr %loc
86 call void @f_no_mem(<4 x i32> %v)
87 %gep = getelementptr inbounds [4 x i32], ptr %loc, i64 0, i64 2
88 %ref = load i64, ptr %gep
92 define i64 @call_after_load(ptr %loc, <4 x i32> %v) {
93 ; CHECK-LABEL: define i64 @call_after_load
94 ; CHECK-SAME: (ptr [[LOC:%.*]], <4 x i32> [[V:%.*]]) {
96 ; CHECK-NEXT: store <4 x i32> [[V]], ptr [[LOC]], align 16
97 ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [4 x i32], ptr [[LOC]], i64 0, i64 2
98 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V]] to i128
99 ; CHECK-NEXT: [[TMP1:%.*]] = lshr i128 [[TMP0]], 64
100 ; CHECK-NEXT: [[TMP2:%.*]] = trunc i128 [[TMP1]] to i64
101 ; CHECK-NEXT: call void @f(<4 x i32> [[V]])
102 ; CHECK-NEXT: ret i64 [[TMP2]]
105 store <4 x i32> %v, ptr %loc
106 %gep = getelementptr inbounds [4 x i32], ptr %loc, i64 0, i64 2
107 %ref = load i64, ptr %gep
108 call void @f(<4 x i32> %v)
112 define double @store_element_smaller_than_load_float(ptr %loc, <4 x float> %v) {
113 ; CHECK-LABEL: define double @store_element_smaller_than_load_float
114 ; CHECK-SAME: (ptr [[LOC:%.*]], <4 x float> [[V:%.*]]) {
115 ; CHECK-NEXT: store <4 x float> [[V]], ptr [[LOC]], align 16
116 ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [4 x float], ptr [[LOC]], i64 0, i64 2
117 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[V]] to i128
118 ; CHECK-NEXT: [[TMP2:%.*]] = lshr i128 [[TMP1]], 64
119 ; CHECK-NEXT: [[TMP3:%.*]] = trunc i128 [[TMP2]] to i64
120 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i64 [[TMP3]] to double
121 ; CHECK-NEXT: ret double [[TMP4]]
123 store <4 x float> %v, ptr %loc
124 %gep = getelementptr inbounds [4 x float], ptr %loc, i64 0, i64 2
125 %ref = load double, ptr %gep
129 define i64 @load_as_scalar(ptr %loc, <2 x i32> %v) {
130 ; CHECK-LABEL: define i64 @load_as_scalar
131 ; CHECK-SAME: (ptr [[LOC:%.*]], <2 x i32> [[V:%.*]]) {
132 ; CHECK-NEXT: store <2 x i32> [[V]], ptr [[LOC]], align 8
133 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[V]] to i64
134 ; CHECK-NEXT: ret i64 [[TMP1]]
136 store <2 x i32> %v, ptr %loc
137 %gep = getelementptr inbounds [4 x float], ptr %loc, i64 0
138 %ref = load i64, ptr %gep
143 define i9 @load_as_scalar_larger(ptr %loc, <4 x i6> %v) {
144 ; CHECK-LABEL: define i9 @load_as_scalar_larger
145 ; CHECK-SAME: (ptr [[LOC:%.*]], <4 x i6> [[V:%.*]]) {
146 ; CHECK-NEXT: store <4 x i6> [[V]], ptr [[LOC]], align 4
147 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i6> [[V]] to i24
148 ; CHECK-NEXT: [[TMP2:%.*]] = trunc i24 [[TMP1]] to i16
149 ; CHECK-NEXT: [[TMP3:%.*]] = trunc i16 [[TMP2]] to i9
150 ; CHECK-NEXT: ret i9 [[TMP3]]
152 store <4 x i6> %v, ptr %loc
153 %gep = getelementptr i9, ptr %loc, i64 0
154 %ref = load i9, ptr %gep
159 define i4 @load_as_scalar_smaller(ptr %loc, <4 x i6> %v) {
160 ; CHECK-LABEL: define i4 @load_as_scalar_smaller
161 ; CHECK-SAME: (ptr [[LOC:%.*]], <4 x i6> [[V:%.*]]) {
162 ; CHECK-NEXT: store <4 x i6> [[V]], ptr [[LOC]], align 4
163 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i6> [[V]] to i24
164 ; CHECK-NEXT: [[TMP2:%.*]] = trunc i24 [[TMP1]] to i8
165 ; CHECK-NEXT: [[TMP3:%.*]] = trunc i8 [[TMP2]] to i4
166 ; CHECK-NEXT: ret i4 [[TMP3]]
168 store <4 x i6> %v, ptr %loc
169 %gep = getelementptr i4, ptr %loc, i64 0
170 %ref = load i4, ptr %gep
175 define i32 @load_vec_same_type(ptr %loc, <4 x i32> %v) {
176 ; CHECK-LABEL: define i32 @load_vec_same_type
177 ; CHECK-SAME: (ptr [[LOC:%.*]], <4 x i32> [[V:%.*]]) {
179 ; CHECK-NEXT: store <4 x i32> [[V]], ptr [[LOC]], align 16
180 ; CHECK-NEXT: [[R:%.*]] = extractelement <4 x i32> [[V]], i32 1
181 ; CHECK-NEXT: ret i32 [[R]]
184 store <4 x i32> %v, ptr %loc
185 %gep = getelementptr inbounds [4 x i32], ptr %loc, i64 0
186 %ref = load <4 x i32>, ptr %gep
187 %r = extractelement <4 x i32> %ref, i32 1
191 define i64 @load_vec_same_size_different_type1(ptr %loc, <4 x i32> %v) {
192 ; CHECK-LABEL: define i64 @load_vec_same_size_different_type1
193 ; CHECK-SAME: (ptr [[LOC:%.*]], <4 x i32> [[V:%.*]]) {
195 ; CHECK-NEXT: store <4 x i32> [[V]], ptr [[LOC]], align 16
196 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V]] to i128
197 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[TMP0]] to <2 x i64>
198 ; CHECK-NEXT: [[R:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
199 ; CHECK-NEXT: ret i64 [[R]]
202 store <4 x i32> %v, ptr %loc
203 %gep = getelementptr inbounds [4 x i32], ptr %loc, i64 0
204 %ref = load <2 x i64>, ptr %gep
205 %r = extractelement <2 x i64> %ref, i32 1
209 define double @load_vec_same_size_different_type2(ptr %loc, <4 x i32> %v) {
210 ; CHECK-LABEL: define double @load_vec_same_size_different_type2
211 ; CHECK-SAME: (ptr [[LOC:%.*]], <4 x i32> [[V:%.*]]) {
213 ; CHECK-NEXT: store <4 x i32> [[V]], ptr [[LOC]], align 16
214 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V]] to i128
215 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[TMP0]] to <2 x double>
216 ; CHECK-NEXT: [[R:%.*]] = extractelement <2 x double> [[TMP1]], i32 1
217 ; CHECK-NEXT: ret double [[R]]
220 store <4 x i32> %v, ptr %loc
221 %gep = getelementptr inbounds [4 x i32], ptr %loc, i64 0
222 %ref = load <2 x double>, ptr %gep
223 %r = extractelement <2 x double> %ref, i32 1
227 define i32 @load_subvector_same_type(ptr %loc, <4 x i32> %v) {
228 ; CHECK-LABEL: define i32 @load_subvector_same_type
229 ; CHECK-SAME: (ptr [[LOC:%.*]], <4 x i32> [[V:%.*]]) {
231 ; CHECK-NEXT: store <4 x i32> [[V]], ptr [[LOC]], align 16
232 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V]] to i128
233 ; CHECK-NEXT: [[TMP1:%.*]] = trunc i128 [[TMP0]] to i64
234 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[TMP1]] to <2 x i32>
235 ; CHECK-NEXT: [[R:%.*]] = extractelement <2 x i32> [[TMP2]], i32 1
236 ; CHECK-NEXT: ret i32 [[R]]
239 store <4 x i32> %v, ptr %loc
240 %gep = getelementptr inbounds [4 x i32], ptr %loc, i64 0
241 %ref = load <2 x i32>, ptr %gep
242 %r = extractelement <2 x i32> %ref, i32 1
246 define i64 @load_subvector_different_type(ptr %loc, <8 x i32> %v) {
247 ; CHECK-LABEL: define i64 @load_subvector_different_type
248 ; CHECK-SAME: (ptr [[LOC:%.*]], <8 x i32> [[V:%.*]]) {
250 ; CHECK-NEXT: store <8 x i32> [[V]], ptr [[LOC]], align 32
251 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i32> [[V]] to i256
252 ; CHECK-NEXT: [[TMP1:%.*]] = trunc i256 [[TMP0]] to i128
253 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[TMP1]] to <2 x i64>
254 ; CHECK-NEXT: [[R:%.*]] = extractelement <2 x i64> [[TMP2]], i32 1
255 ; CHECK-NEXT: ret i64 [[R]]
258 store <8 x i32> %v, ptr %loc
259 %gep = getelementptr inbounds [4 x i32], ptr %loc, i64 0
260 %ref = load <2 x i64>, ptr %gep
261 %r = extractelement <2 x i64> %ref, i32 1
265 define i16 @load_subvector_different_type2(ptr %loc, <8 x i32> %v) {
266 ; CHECK-LABEL: define i16 @load_subvector_different_type2
267 ; CHECK-SAME: (ptr [[LOC:%.*]], <8 x i32> [[V:%.*]]) {
269 ; CHECK-NEXT: store <8 x i32> [[V]], ptr [[LOC]], align 32
270 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i32> [[V]] to i256
271 ; CHECK-NEXT: [[TMP1:%.*]] = trunc i256 [[TMP0]] to i32
272 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32 [[TMP1]] to <2 x i16>
273 ; CHECK-NEXT: [[R:%.*]] = extractelement <2 x i16> [[TMP2]], i32 1
274 ; CHECK-NEXT: ret i16 [[R]]
277 store <8 x i32> %v, ptr %loc
278 %gep = getelementptr [2 x i16], ptr %loc, i64 0
279 %ref = load <2 x i16>, ptr %gep
280 %r = extractelement <2 x i16> %ref, i32 1
284 define i4 @load_subvector_different_type3(ptr %loc, <8 x i8> %v) {
285 ; CHECK-LABEL: define i4 @load_subvector_different_type3
286 ; CHECK-SAME: (ptr [[LOC:%.*]], <8 x i8> [[V:%.*]]) {
288 ; CHECK-NEXT: store <8 x i8> [[V]], ptr [[LOC]], align 8
289 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8> [[V]] to i64
290 ; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i16
291 ; CHECK-NEXT: [[TMP2:%.*]] = trunc i16 [[TMP1]] to i12
292 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast i12 [[TMP2]] to <3 x i4>
293 ; CHECK-NEXT: [[R:%.*]] = extractelement <3 x i4> [[TMP3]], i32 1
294 ; CHECK-NEXT: ret i4 [[R]]
297 store <8 x i8> %v, ptr %loc
298 %gep = getelementptr [3 x i4], ptr %loc, i64 0
299 %ref = load <3 x i4>, ptr %gep
300 %r = extractelement <3 x i4> %ref, i32 1
304 define i12 @load_subvector_different_type4(ptr %loc, <8 x i8> %v) {
305 ; CHECK-LABEL: define i12 @load_subvector_different_type4
306 ; CHECK-SAME: (ptr [[LOC:%.*]], <8 x i8> [[V:%.*]]) {
308 ; CHECK-NEXT: store <8 x i8> [[V]], ptr [[LOC]], align 8
309 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8> [[V]] to i64
310 ; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i24
311 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i24 [[TMP1]] to <2 x i12>
312 ; CHECK-NEXT: [[R:%.*]] = extractelement <2 x i12> [[TMP2]], i32 1
313 ; CHECK-NEXT: ret i12 [[R]]
316 store <8 x i8> %v, ptr %loc
317 %gep = getelementptr [2 x i12], ptr %loc, i64 0
318 %ref = load <2 x i12>, ptr %gep
319 %r = extractelement <2 x i12> %ref, i32 1
323 define i6 @load_subvector_different_type5(ptr %loc, <8 x i8> %v) {
324 ; CHECK-LABEL: define i6 @load_subvector_different_type5
325 ; CHECK-SAME: (ptr [[LOC:%.*]], <8 x i8> [[V:%.*]]) {
327 ; CHECK-NEXT: store <8 x i8> [[V]], ptr [[LOC]], align 8
328 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8> [[V]] to i64
329 ; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i16
330 ; CHECK-NEXT: [[TMP2:%.*]] = trunc i16 [[TMP1]] to i12
331 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast i12 [[TMP2]] to <2 x i6>
332 ; CHECK-NEXT: [[R:%.*]] = extractelement <2 x i6> [[TMP3]], i32 1
333 ; CHECK-NEXT: ret i6 [[R]]
336 store <8 x i8> %v, ptr %loc
337 %gep = getelementptr [2 x i6], ptr %loc, i64 0
338 %ref = load <2 x i6>, ptr %gep
339 %r = extractelement <2 x i6> %ref, i32 1
343 declare void @f(<4 x i32>)
344 declare void @f_no_mem(<4 x i32>) memory(none)