1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt < %s -basicaa -slp-vectorizer -S | FileCheck %s
4 ; Check propagation of optional IR flags (PR20802). For a flag to
5 ; propagate from scalar instructions to their vector replacement,
6 ; *all* scalar instructions must have the flag.
8 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
9 target triple = "x86_64-unknown-unknown"
11 define void @exact(i32* %x) {
12 ; CHECK-LABEL: @exact(
13 ; CHECK-NEXT: [[IDX1:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i64 0
14 ; CHECK-NEXT: [[IDX2:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 1
15 ; CHECK-NEXT: [[IDX3:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 2
16 ; CHECK-NEXT: [[IDX4:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 3
17 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[IDX1]] to <4 x i32>*
18 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
19 ; CHECK-NEXT: [[TMP3:%.*]] = lshr exact <4 x i32> [[TMP2]], <i32 1, i32 1, i32 1, i32 1>
20 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[IDX1]] to <4 x i32>*
21 ; CHECK-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 4
22 ; CHECK-NEXT: ret void
24 %idx1 = getelementptr inbounds i32, i32* %x, i64 0
25 %idx2 = getelementptr inbounds i32, i32* %x, i64 1
26 %idx3 = getelementptr inbounds i32, i32* %x, i64 2
27 %idx4 = getelementptr inbounds i32, i32* %x, i64 3
29 %load1 = load i32, i32* %idx1, align 4
30 %load2 = load i32, i32* %idx2, align 4
31 %load3 = load i32, i32* %idx3, align 4
32 %load4 = load i32, i32* %idx4, align 4
34 %op1 = lshr exact i32 %load1, 1
35 %op2 = lshr exact i32 %load2, 1
36 %op3 = lshr exact i32 %load3, 1
37 %op4 = lshr exact i32 %load4, 1
39 store i32 %op1, i32* %idx1, align 4
40 store i32 %op2, i32* %idx2, align 4
41 store i32 %op3, i32* %idx3, align 4
42 store i32 %op4, i32* %idx4, align 4
47 define void @not_exact(i32* %x) {
48 ; CHECK-LABEL: @not_exact(
49 ; CHECK-NEXT: [[IDX1:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i64 0
50 ; CHECK-NEXT: [[IDX2:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 1
51 ; CHECK-NEXT: [[IDX3:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 2
52 ; CHECK-NEXT: [[IDX4:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 3
53 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[IDX1]] to <4 x i32>*
54 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
55 ; CHECK-NEXT: [[TMP3:%.*]] = lshr <4 x i32> [[TMP2]], <i32 1, i32 1, i32 1, i32 1>
56 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[IDX1]] to <4 x i32>*
57 ; CHECK-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 4
58 ; CHECK-NEXT: ret void
60 %idx1 = getelementptr inbounds i32, i32* %x, i64 0
61 %idx2 = getelementptr inbounds i32, i32* %x, i64 1
62 %idx3 = getelementptr inbounds i32, i32* %x, i64 2
63 %idx4 = getelementptr inbounds i32, i32* %x, i64 3
65 %load1 = load i32, i32* %idx1, align 4
66 %load2 = load i32, i32* %idx2, align 4
67 %load3 = load i32, i32* %idx3, align 4
68 %load4 = load i32, i32* %idx4, align 4
70 %op1 = lshr exact i32 %load1, 1
71 %op2 = lshr i32 %load2, 1
72 %op3 = lshr exact i32 %load3, 1
73 %op4 = lshr exact i32 %load4, 1
75 store i32 %op1, i32* %idx1, align 4
76 store i32 %op2, i32* %idx2, align 4
77 store i32 %op3, i32* %idx3, align 4
78 store i32 %op4, i32* %idx4, align 4
83 define void @nsw(i32* %x) {
85 ; CHECK-NEXT: [[IDX1:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i64 0
86 ; CHECK-NEXT: [[IDX2:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 1
87 ; CHECK-NEXT: [[IDX3:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 2
88 ; CHECK-NEXT: [[IDX4:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 3
89 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[IDX1]] to <4 x i32>*
90 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
91 ; CHECK-NEXT: [[TMP3:%.*]] = add nsw <4 x i32> <i32 1, i32 1, i32 1, i32 1>, [[TMP2]]
92 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[IDX1]] to <4 x i32>*
93 ; CHECK-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 4
94 ; CHECK-NEXT: ret void
96 %idx1 = getelementptr inbounds i32, i32* %x, i64 0
97 %idx2 = getelementptr inbounds i32, i32* %x, i64 1
98 %idx3 = getelementptr inbounds i32, i32* %x, i64 2
99 %idx4 = getelementptr inbounds i32, i32* %x, i64 3
101 %load1 = load i32, i32* %idx1, align 4
102 %load2 = load i32, i32* %idx2, align 4
103 %load3 = load i32, i32* %idx3, align 4
104 %load4 = load i32, i32* %idx4, align 4
106 %op1 = add nsw i32 %load1, 1
107 %op2 = add nsw i32 %load2, 1
108 %op3 = add nsw i32 %load3, 1
109 %op4 = add nsw i32 %load4, 1
111 store i32 %op1, i32* %idx1, align 4
112 store i32 %op2, i32* %idx2, align 4
113 store i32 %op3, i32* %idx3, align 4
114 store i32 %op4, i32* %idx4, align 4
119 define void @not_nsw(i32* %x) {
120 ; CHECK-LABEL: @not_nsw(
121 ; CHECK-NEXT: [[IDX1:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i64 0
122 ; CHECK-NEXT: [[IDX2:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 1
123 ; CHECK-NEXT: [[IDX3:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 2
124 ; CHECK-NEXT: [[IDX4:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 3
125 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[IDX1]] to <4 x i32>*
126 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
127 ; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i32> <i32 1, i32 1, i32 1, i32 1>, [[TMP2]]
128 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[IDX1]] to <4 x i32>*
129 ; CHECK-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 4
130 ; CHECK-NEXT: ret void
132 %idx1 = getelementptr inbounds i32, i32* %x, i64 0
133 %idx2 = getelementptr inbounds i32, i32* %x, i64 1
134 %idx3 = getelementptr inbounds i32, i32* %x, i64 2
135 %idx4 = getelementptr inbounds i32, i32* %x, i64 3
137 %load1 = load i32, i32* %idx1, align 4
138 %load2 = load i32, i32* %idx2, align 4
139 %load3 = load i32, i32* %idx3, align 4
140 %load4 = load i32, i32* %idx4, align 4
142 %op1 = add nsw i32 %load1, 1
143 %op2 = add nsw i32 %load2, 1
144 %op3 = add nsw i32 %load3, 1
145 %op4 = add i32 %load4, 1
147 store i32 %op1, i32* %idx1, align 4
148 store i32 %op2, i32* %idx2, align 4
149 store i32 %op3, i32* %idx3, align 4
150 store i32 %op4, i32* %idx4, align 4
155 define void @nuw(i32* %x) {
157 ; CHECK-NEXT: [[IDX1:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i64 0
158 ; CHECK-NEXT: [[IDX2:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 1
159 ; CHECK-NEXT: [[IDX3:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 2
160 ; CHECK-NEXT: [[IDX4:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 3
161 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[IDX1]] to <4 x i32>*
162 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
163 ; CHECK-NEXT: [[TMP3:%.*]] = add nuw <4 x i32> <i32 1, i32 1, i32 1, i32 1>, [[TMP2]]
164 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[IDX1]] to <4 x i32>*
165 ; CHECK-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 4
166 ; CHECK-NEXT: ret void
168 %idx1 = getelementptr inbounds i32, i32* %x, i64 0
169 %idx2 = getelementptr inbounds i32, i32* %x, i64 1
170 %idx3 = getelementptr inbounds i32, i32* %x, i64 2
171 %idx4 = getelementptr inbounds i32, i32* %x, i64 3
173 %load1 = load i32, i32* %idx1, align 4
174 %load2 = load i32, i32* %idx2, align 4
175 %load3 = load i32, i32* %idx3, align 4
176 %load4 = load i32, i32* %idx4, align 4
178 %op1 = add nuw i32 %load1, 1
179 %op2 = add nuw i32 %load2, 1
180 %op3 = add nuw i32 %load3, 1
181 %op4 = add nuw i32 %load4, 1
183 store i32 %op1, i32* %idx1, align 4
184 store i32 %op2, i32* %idx2, align 4
185 store i32 %op3, i32* %idx3, align 4
186 store i32 %op4, i32* %idx4, align 4
191 define void @not_nuw(i32* %x) {
192 ; CHECK-LABEL: @not_nuw(
193 ; CHECK-NEXT: [[IDX1:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i64 0
194 ; CHECK-NEXT: [[IDX2:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 1
195 ; CHECK-NEXT: [[IDX3:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 2
196 ; CHECK-NEXT: [[IDX4:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 3
197 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[IDX1]] to <4 x i32>*
198 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
199 ; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i32> <i32 1, i32 1, i32 1, i32 1>, [[TMP2]]
200 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[IDX1]] to <4 x i32>*
201 ; CHECK-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 4
202 ; CHECK-NEXT: ret void
204 %idx1 = getelementptr inbounds i32, i32* %x, i64 0
205 %idx2 = getelementptr inbounds i32, i32* %x, i64 1
206 %idx3 = getelementptr inbounds i32, i32* %x, i64 2
207 %idx4 = getelementptr inbounds i32, i32* %x, i64 3
209 %load1 = load i32, i32* %idx1, align 4
210 %load2 = load i32, i32* %idx2, align 4
211 %load3 = load i32, i32* %idx3, align 4
212 %load4 = load i32, i32* %idx4, align 4
214 %op1 = add nuw i32 %load1, 1
215 %op2 = add i32 %load2, 1
216 %op3 = add i32 %load3, 1
217 %op4 = add nuw i32 %load4, 1
219 store i32 %op1, i32* %idx1, align 4
220 store i32 %op2, i32* %idx2, align 4
221 store i32 %op3, i32* %idx3, align 4
222 store i32 %op4, i32* %idx4, align 4
227 define void @nnan(float* %x) {
228 ; CHECK-LABEL: @nnan(
229 ; CHECK-NEXT: [[IDX1:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i64 0
230 ; CHECK-NEXT: [[IDX2:%.*]] = getelementptr inbounds float, float* [[X]], i64 1
231 ; CHECK-NEXT: [[IDX3:%.*]] = getelementptr inbounds float, float* [[X]], i64 2
232 ; CHECK-NEXT: [[IDX4:%.*]] = getelementptr inbounds float, float* [[X]], i64 3
233 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[IDX1]] to <4 x float>*
234 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
235 ; CHECK-NEXT: [[TMP3:%.*]] = fadd nnan <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, [[TMP2]]
236 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast float* [[IDX1]] to <4 x float>*
237 ; CHECK-NEXT: store <4 x float> [[TMP3]], <4 x float>* [[TMP4]], align 4
238 ; CHECK-NEXT: ret void
240 %idx1 = getelementptr inbounds float, float* %x, i64 0
241 %idx2 = getelementptr inbounds float, float* %x, i64 1
242 %idx3 = getelementptr inbounds float, float* %x, i64 2
243 %idx4 = getelementptr inbounds float, float* %x, i64 3
245 %load1 = load float, float* %idx1, align 4
246 %load2 = load float, float* %idx2, align 4
247 %load3 = load float, float* %idx3, align 4
248 %load4 = load float, float* %idx4, align 4
250 %op1 = fadd fast nnan float %load1, 1.0
251 %op2 = fadd nnan ninf float %load2, 1.0
252 %op3 = fadd nsz nnan float %load3, 1.0
253 %op4 = fadd arcp nnan float %load4, 1.0
255 store float %op1, float* %idx1, align 4
256 store float %op2, float* %idx2, align 4
257 store float %op3, float* %idx3, align 4
258 store float %op4, float* %idx4, align 4
263 define void @not_nnan(float* %x) {
264 ; CHECK-LABEL: @not_nnan(
265 ; CHECK-NEXT: [[IDX1:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i64 0
266 ; CHECK-NEXT: [[IDX2:%.*]] = getelementptr inbounds float, float* [[X]], i64 1
267 ; CHECK-NEXT: [[IDX3:%.*]] = getelementptr inbounds float, float* [[X]], i64 2
268 ; CHECK-NEXT: [[IDX4:%.*]] = getelementptr inbounds float, float* [[X]], i64 3
269 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[IDX1]] to <4 x float>*
270 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
271 ; CHECK-NEXT: [[TMP3:%.*]] = fadd <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, [[TMP2]]
272 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast float* [[IDX1]] to <4 x float>*
273 ; CHECK-NEXT: store <4 x float> [[TMP3]], <4 x float>* [[TMP4]], align 4
274 ; CHECK-NEXT: ret void
276 %idx1 = getelementptr inbounds float, float* %x, i64 0
277 %idx2 = getelementptr inbounds float, float* %x, i64 1
278 %idx3 = getelementptr inbounds float, float* %x, i64 2
279 %idx4 = getelementptr inbounds float, float* %x, i64 3
281 %load1 = load float, float* %idx1, align 4
282 %load2 = load float, float* %idx2, align 4
283 %load3 = load float, float* %idx3, align 4
284 %load4 = load float, float* %idx4, align 4
286 %op1 = fadd nnan float %load1, 1.0
287 %op2 = fadd ninf float %load2, 1.0
288 %op3 = fadd nsz float %load3, 1.0
289 %op4 = fadd arcp float %load4, 1.0
291 store float %op1, float* %idx1, align 4
292 store float %op2, float* %idx2, align 4
293 store float %op3, float* %idx3, align 4
294 store float %op4, float* %idx4, align 4
299 define void @only_fast(float* %x) {
300 ; CHECK-LABEL: @only_fast(
301 ; CHECK-NEXT: [[IDX1:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i64 0
302 ; CHECK-NEXT: [[IDX2:%.*]] = getelementptr inbounds float, float* [[X]], i64 1
303 ; CHECK-NEXT: [[IDX3:%.*]] = getelementptr inbounds float, float* [[X]], i64 2
304 ; CHECK-NEXT: [[IDX4:%.*]] = getelementptr inbounds float, float* [[X]], i64 3
305 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[IDX1]] to <4 x float>*
306 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
307 ; CHECK-NEXT: [[TMP3:%.*]] = fadd fast <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, [[TMP2]]
308 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast float* [[IDX1]] to <4 x float>*
309 ; CHECK-NEXT: store <4 x float> [[TMP3]], <4 x float>* [[TMP4]], align 4
310 ; CHECK-NEXT: ret void
312 %idx1 = getelementptr inbounds float, float* %x, i64 0
313 %idx2 = getelementptr inbounds float, float* %x, i64 1
314 %idx3 = getelementptr inbounds float, float* %x, i64 2
315 %idx4 = getelementptr inbounds float, float* %x, i64 3
317 %load1 = load float, float* %idx1, align 4
318 %load2 = load float, float* %idx2, align 4
319 %load3 = load float, float* %idx3, align 4
320 %load4 = load float, float* %idx4, align 4
322 %op1 = fadd fast nnan float %load1, 1.0
323 %op2 = fadd fast nnan ninf float %load2, 1.0
324 %op3 = fadd fast nsz nnan float %load3, 1.0
325 %op4 = fadd arcp nnan fast float %load4, 1.0
327 store float %op1, float* %idx1, align 4
328 store float %op2, float* %idx2, align 4
329 store float %op3, float* %idx3, align 4
330 store float %op4, float* %idx4, align 4
335 define void @only_arcp(float* %x) {
336 ; CHECK-LABEL: @only_arcp(
337 ; CHECK-NEXT: [[IDX1:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i64 0
338 ; CHECK-NEXT: [[IDX2:%.*]] = getelementptr inbounds float, float* [[X]], i64 1
339 ; CHECK-NEXT: [[IDX3:%.*]] = getelementptr inbounds float, float* [[X]], i64 2
340 ; CHECK-NEXT: [[IDX4:%.*]] = getelementptr inbounds float, float* [[X]], i64 3
341 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[IDX1]] to <4 x float>*
342 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
343 ; CHECK-NEXT: [[TMP3:%.*]] = fadd arcp <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, [[TMP2]]
344 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast float* [[IDX1]] to <4 x float>*
345 ; CHECK-NEXT: store <4 x float> [[TMP3]], <4 x float>* [[TMP4]], align 4
346 ; CHECK-NEXT: ret void
348 %idx1 = getelementptr inbounds float, float* %x, i64 0
349 %idx2 = getelementptr inbounds float, float* %x, i64 1
350 %idx3 = getelementptr inbounds float, float* %x, i64 2
351 %idx4 = getelementptr inbounds float, float* %x, i64 3
353 %load1 = load float, float* %idx1, align 4
354 %load2 = load float, float* %idx2, align 4
355 %load3 = load float, float* %idx3, align 4
356 %load4 = load float, float* %idx4, align 4
358 %op1 = fadd fast float %load1, 1.0
359 %op2 = fadd fast float %load2, 1.0
360 %op3 = fadd fast float %load3, 1.0
361 %op4 = fadd arcp float %load4, 1.0
363 store float %op1, float* %idx1, align 4
364 store float %op2, float* %idx2, align 4
365 store float %op3, float* %idx3, align 4
366 store float %op4, float* %idx4, align 4
371 define void @addsub_all_nsw(i32* %x) {
372 ; CHECK-LABEL: @addsub_all_nsw(
373 ; CHECK-NEXT: [[IDX1:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i64 0
374 ; CHECK-NEXT: [[IDX2:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 1
375 ; CHECK-NEXT: [[IDX3:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 2
376 ; CHECK-NEXT: [[IDX4:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 3
377 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[IDX1]] to <4 x i32>*
378 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
379 ; CHECK-NEXT: [[TMP3:%.*]] = add nsw <4 x i32> [[TMP2]], <i32 1, i32 1, i32 1, i32 1>
380 ; CHECK-NEXT: [[TMP4:%.*]] = sub nsw <4 x i32> [[TMP2]], <i32 1, i32 1, i32 1, i32 1>
381 ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
382 ; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32* [[IDX1]] to <4 x i32>*
383 ; CHECK-NEXT: store <4 x i32> [[TMP5]], <4 x i32>* [[TMP6]], align 4
384 ; CHECK-NEXT: ret void
386 %idx1 = getelementptr inbounds i32, i32* %x, i64 0
387 %idx2 = getelementptr inbounds i32, i32* %x, i64 1
388 %idx3 = getelementptr inbounds i32, i32* %x, i64 2
389 %idx4 = getelementptr inbounds i32, i32* %x, i64 3
391 %load1 = load i32, i32* %idx1, align 4
392 %load2 = load i32, i32* %idx2, align 4
393 %load3 = load i32, i32* %idx3, align 4
394 %load4 = load i32, i32* %idx4, align 4
396 %op1 = add nsw i32 %load1, 1
397 %op2 = sub nsw i32 %load2, 1
398 %op3 = add nsw i32 %load3, 1
399 %op4 = sub nsw i32 %load4, 1
401 store i32 %op1, i32* %idx1, align 4
402 store i32 %op2, i32* %idx2, align 4
403 store i32 %op3, i32* %idx3, align 4
404 store i32 %op4, i32* %idx4, align 4
409 define void @addsub_some_nsw(i32* %x) {
410 ; CHECK-LABEL: @addsub_some_nsw(
411 ; CHECK-NEXT: [[IDX1:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i64 0
412 ; CHECK-NEXT: [[IDX2:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 1
413 ; CHECK-NEXT: [[IDX3:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 2
414 ; CHECK-NEXT: [[IDX4:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 3
415 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[IDX1]] to <4 x i32>*
416 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
417 ; CHECK-NEXT: [[TMP3:%.*]] = add nsw <4 x i32> [[TMP2]], <i32 1, i32 1, i32 1, i32 1>
418 ; CHECK-NEXT: [[TMP4:%.*]] = sub <4 x i32> [[TMP2]], <i32 1, i32 1, i32 1, i32 1>
419 ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
420 ; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32* [[IDX1]] to <4 x i32>*
421 ; CHECK-NEXT: store <4 x i32> [[TMP5]], <4 x i32>* [[TMP6]], align 4
422 ; CHECK-NEXT: ret void
424 %idx1 = getelementptr inbounds i32, i32* %x, i64 0
425 %idx2 = getelementptr inbounds i32, i32* %x, i64 1
426 %idx3 = getelementptr inbounds i32, i32* %x, i64 2
427 %idx4 = getelementptr inbounds i32, i32* %x, i64 3
429 %load1 = load i32, i32* %idx1, align 4
430 %load2 = load i32, i32* %idx2, align 4
431 %load3 = load i32, i32* %idx3, align 4
432 %load4 = load i32, i32* %idx4, align 4
434 %op1 = add nsw i32 %load1, 1
435 %op2 = sub nsw i32 %load2, 1
436 %op3 = add nsw i32 %load3, 1
437 %op4 = sub i32 %load4, 1
439 store i32 %op1, i32* %idx1, align 4
440 store i32 %op2, i32* %idx2, align 4
441 store i32 %op3, i32* %idx3, align 4
442 store i32 %op4, i32* %idx4, align 4
447 define void @addsub_no_nsw(i32* %x) {
448 ; CHECK-LABEL: @addsub_no_nsw(
449 ; CHECK-NEXT: [[IDX1:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i64 0
450 ; CHECK-NEXT: [[IDX2:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 1
451 ; CHECK-NEXT: [[IDX3:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 2
452 ; CHECK-NEXT: [[IDX4:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 3
453 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[IDX1]] to <4 x i32>*
454 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
455 ; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i32> [[TMP2]], <i32 1, i32 1, i32 1, i32 1>
456 ; CHECK-NEXT: [[TMP4:%.*]] = sub <4 x i32> [[TMP2]], <i32 1, i32 1, i32 1, i32 1>
457 ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
458 ; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32* [[IDX1]] to <4 x i32>*
459 ; CHECK-NEXT: store <4 x i32> [[TMP5]], <4 x i32>* [[TMP6]], align 4
460 ; CHECK-NEXT: ret void
462 %idx1 = getelementptr inbounds i32, i32* %x, i64 0
463 %idx2 = getelementptr inbounds i32, i32* %x, i64 1
464 %idx3 = getelementptr inbounds i32, i32* %x, i64 2
465 %idx4 = getelementptr inbounds i32, i32* %x, i64 3
467 %load1 = load i32, i32* %idx1, align 4
468 %load2 = load i32, i32* %idx2, align 4
469 %load3 = load i32, i32* %idx3, align 4
470 %load4 = load i32, i32* %idx4, align 4
472 %op1 = add i32 %load1, 1
473 %op2 = sub nsw i32 %load2, 1
474 %op3 = add nsw i32 %load3, 1
475 %op4 = sub i32 %load4, 1
477 store i32 %op1, i32* %idx1, align 4
478 store i32 %op2, i32* %idx2, align 4
479 store i32 %op3, i32* %idx3, align 4
480 store i32 %op4, i32* %idx4, align 4
485 define void @fcmp_fast(double* %x) #1 {
486 ; CHECK-LABEL: @fcmp_fast(
487 ; CHECK-NEXT: [[IDX1:%.*]] = getelementptr inbounds double, double* [[X:%.*]], i64 0
488 ; CHECK-NEXT: [[IDX2:%.*]] = getelementptr inbounds double, double* [[X]], i64 1
489 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast double* [[IDX1]] to <2 x double>*
490 ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 8
491 ; CHECK-NEXT: [[TMP3:%.*]] = fcmp fast oge <2 x double> [[TMP2]], zeroinitializer
492 ; CHECK-NEXT: [[TMP4:%.*]] = fsub fast <2 x double> <double -0.000000e+00, double -0.000000e+00>, [[TMP2]]
493 ; CHECK-NEXT: [[TMP5:%.*]] = select <2 x i1> [[TMP3]], <2 x double> [[TMP2]], <2 x double> [[TMP4]]
494 ; CHECK-NEXT: [[TMP6:%.*]] = bitcast double* [[IDX1]] to <2 x double>*
495 ; CHECK-NEXT: store <2 x double> [[TMP5]], <2 x double>* [[TMP6]], align 8
496 ; CHECK-NEXT: ret void
498 %idx1 = getelementptr inbounds double, double* %x, i64 0
499 %idx2 = getelementptr inbounds double, double* %x, i64 1
501 %load1 = load double, double* %idx1, align 8
502 %load2 = load double, double* %idx2, align 8
504 %cmp1 = fcmp fast oge double %load1, 0.000000e+00
505 %cmp2 = fcmp fast oge double %load2, 0.000000e+00
507 %sub1 = fsub fast double -0.000000e+00, %load1
508 %sub2 = fsub fast double -0.000000e+00, %load2
510 %sel1 = select i1 %cmp1, double %load1, double %sub1
511 %sel2 = select i1 %cmp2, double %load2, double %sub2
513 store double %sel1, double* %idx1, align 8
514 store double %sel2, double* %idx2, align 8
519 define void @fcmp_no_fast(double* %x) #1 {
520 ; CHECK-LABEL: @fcmp_no_fast(
521 ; CHECK-NEXT: [[IDX1:%.*]] = getelementptr inbounds double, double* [[X:%.*]], i64 0
522 ; CHECK-NEXT: [[IDX2:%.*]] = getelementptr inbounds double, double* [[X]], i64 1
523 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast double* [[IDX1]] to <2 x double>*
524 ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 8
525 ; CHECK-NEXT: [[TMP3:%.*]] = fcmp oge <2 x double> [[TMP2]], zeroinitializer
526 ; CHECK-NEXT: [[TMP4:%.*]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, [[TMP2]]
527 ; CHECK-NEXT: [[TMP5:%.*]] = select <2 x i1> [[TMP3]], <2 x double> [[TMP2]], <2 x double> [[TMP4]]
528 ; CHECK-NEXT: [[TMP6:%.*]] = bitcast double* [[IDX1]] to <2 x double>*
529 ; CHECK-NEXT: store <2 x double> [[TMP5]], <2 x double>* [[TMP6]], align 8
530 ; CHECK-NEXT: ret void
532 %idx1 = getelementptr inbounds double, double* %x, i64 0
533 %idx2 = getelementptr inbounds double, double* %x, i64 1
535 %load1 = load double, double* %idx1, align 8
536 %load2 = load double, double* %idx2, align 8
538 %cmp1 = fcmp fast oge double %load1, 0.000000e+00
539 %cmp2 = fcmp oge double %load2, 0.000000e+00
541 %sub1 = fsub fast double -0.000000e+00, %load1
542 %sub2 = fsub double -0.000000e+00, %load2
544 %sel1 = select i1 %cmp1, double %load1, double %sub1
545 %sel2 = select i1 %cmp2, double %load2, double %sub2
547 store double %sel1, double* %idx1, align 8
548 store double %sel2, double* %idx2, align 8
553 declare double @llvm.fabs.f64(double) nounwind readnone
555 define void @call_fast(double* %x) {
556 ; CHECK-LABEL: @call_fast(
557 ; CHECK-NEXT: [[IDX1:%.*]] = getelementptr inbounds double, double* [[X:%.*]], i64 0
558 ; CHECK-NEXT: [[IDX2:%.*]] = getelementptr inbounds double, double* [[X]], i64 1
559 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast double* [[IDX1]] to <2 x double>*
560 ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 8
561 ; CHECK-NEXT: [[TMP3:%.*]] = call fast <2 x double> @llvm.fabs.v2f64(<2 x double> [[TMP2]])
562 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast double* [[IDX1]] to <2 x double>*
563 ; CHECK-NEXT: store <2 x double> [[TMP3]], <2 x double>* [[TMP4]], align 8
564 ; CHECK-NEXT: ret void
566 %idx1 = getelementptr inbounds double, double* %x, i64 0
567 %idx2 = getelementptr inbounds double, double* %x, i64 1
569 %load1 = load double, double* %idx1, align 8
570 %load2 = load double, double* %idx2, align 8
572 %call1 = tail call fast double @llvm.fabs.f64(double %load1) nounwind readnone
573 %call2 = tail call fast double @llvm.fabs.f64(double %load2) nounwind readnone
575 store double %call1, double* %idx1, align 8
576 store double %call2, double* %idx2, align 8
581 define void @call_no_fast(double* %x) {
582 ; CHECK-LABEL: @call_no_fast(
583 ; CHECK-NEXT: [[IDX1:%.*]] = getelementptr inbounds double, double* [[X:%.*]], i64 0
584 ; CHECK-NEXT: [[IDX2:%.*]] = getelementptr inbounds double, double* [[X]], i64 1
585 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast double* [[IDX1]] to <2 x double>*
586 ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 8
587 ; CHECK-NEXT: [[TMP3:%.*]] = call <2 x double> @llvm.fabs.v2f64(<2 x double> [[TMP2]])
588 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast double* [[IDX1]] to <2 x double>*
589 ; CHECK-NEXT: store <2 x double> [[TMP3]], <2 x double>* [[TMP4]], align 8
590 ; CHECK-NEXT: ret void
592 %idx1 = getelementptr inbounds double, double* %x, i64 0
593 %idx2 = getelementptr inbounds double, double* %x, i64 1
595 %load1 = load double, double* %idx1, align 8
596 %load2 = load double, double* %idx2, align 8
598 %call1 = tail call fast double @llvm.fabs.f64(double %load1) nounwind readnone
599 %call2 = tail call double @llvm.fabs.f64(double %load2) nounwind readnone
601 store double %call1, double* %idx1, align 8
602 store double %call2, double* %idx2, align 8
607 attributes #1 = { "target-features"="+avx" }