1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt < %s -basicaa -slp-vectorizer -S | FileCheck %s
4 ; Check propagation of optional IR flags (PR20802). For a flag to
5 ; propagate from scalar instructions to their vector replacement,
6 ; *all* scalar instructions must have the flag.
8 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
9 target triple = "x86_64-unknown-unknown"
11 define void @exact(i32* %x) {
12 ; CHECK-LABEL: @exact(
13 ; CHECK-NEXT: [[IDX1:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i64 0
14 ; CHECK-NEXT: [[IDX2:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 1
15 ; CHECK-NEXT: [[IDX3:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 2
16 ; CHECK-NEXT: [[IDX4:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 3
17 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[IDX1]] to <4 x i32>*
18 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
19 ; CHECK-NEXT: [[TMP3:%.*]] = lshr exact <4 x i32> [[TMP2]], <i32 1, i32 1, i32 1, i32 1>
20 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[IDX1]] to <4 x i32>*
21 ; CHECK-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 4
22 ; CHECK-NEXT: ret void
24 %idx1 = getelementptr inbounds i32, i32* %x, i64 0
25 %idx2 = getelementptr inbounds i32, i32* %x, i64 1
26 %idx3 = getelementptr inbounds i32, i32* %x, i64 2
27 %idx4 = getelementptr inbounds i32, i32* %x, i64 3
29 %load1 = load i32, i32* %idx1, align 4
30 %load2 = load i32, i32* %idx2, align 4
31 %load3 = load i32, i32* %idx3, align 4
32 %load4 = load i32, i32* %idx4, align 4
34 %op1 = lshr exact i32 %load1, 1
35 %op2 = lshr exact i32 %load2, 1
36 %op3 = lshr exact i32 %load3, 1
37 %op4 = lshr exact i32 %load4, 1
39 store i32 %op1, i32* %idx1, align 4
40 store i32 %op2, i32* %idx2, align 4
41 store i32 %op3, i32* %idx3, align 4
42 store i32 %op4, i32* %idx4, align 4
47 define void @not_exact(i32* %x) {
48 ; CHECK-LABEL: @not_exact(
49 ; CHECK-NEXT: [[IDX1:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i64 0
50 ; CHECK-NEXT: [[IDX2:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 1
51 ; CHECK-NEXT: [[IDX3:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 2
52 ; CHECK-NEXT: [[IDX4:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 3
53 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[IDX1]] to <4 x i32>*
54 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
55 ; CHECK-NEXT: [[TMP3:%.*]] = lshr <4 x i32> [[TMP2]], <i32 1, i32 1, i32 1, i32 1>
56 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[IDX1]] to <4 x i32>*
57 ; CHECK-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 4
58 ; CHECK-NEXT: ret void
60 %idx1 = getelementptr inbounds i32, i32* %x, i64 0
61 %idx2 = getelementptr inbounds i32, i32* %x, i64 1
62 %idx3 = getelementptr inbounds i32, i32* %x, i64 2
63 %idx4 = getelementptr inbounds i32, i32* %x, i64 3
65 %load1 = load i32, i32* %idx1, align 4
66 %load2 = load i32, i32* %idx2, align 4
67 %load3 = load i32, i32* %idx3, align 4
68 %load4 = load i32, i32* %idx4, align 4
70 %op1 = lshr exact i32 %load1, 1
71 %op2 = lshr i32 %load2, 1
72 %op3 = lshr exact i32 %load3, 1
73 %op4 = lshr exact i32 %load4, 1
75 store i32 %op1, i32* %idx1, align 4
76 store i32 %op2, i32* %idx2, align 4
77 store i32 %op3, i32* %idx3, align 4
78 store i32 %op4, i32* %idx4, align 4
83 define void @nsw(i32* %x) {
85 ; CHECK-NEXT: [[IDX1:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i64 0
86 ; CHECK-NEXT: [[IDX2:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 1
87 ; CHECK-NEXT: [[IDX3:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 2
88 ; CHECK-NEXT: [[IDX4:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 3
89 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[IDX1]] to <4 x i32>*
90 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
91 ; CHECK-NEXT: [[TMP3:%.*]] = add nsw <4 x i32> [[TMP2]], <i32 1, i32 1, i32 1, i32 1>
92 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[IDX1]] to <4 x i32>*
93 ; CHECK-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 4
94 ; CHECK-NEXT: ret void
96 %idx1 = getelementptr inbounds i32, i32* %x, i64 0
97 %idx2 = getelementptr inbounds i32, i32* %x, i64 1
98 %idx3 = getelementptr inbounds i32, i32* %x, i64 2
99 %idx4 = getelementptr inbounds i32, i32* %x, i64 3
101 %load1 = load i32, i32* %idx1, align 4
102 %load2 = load i32, i32* %idx2, align 4
103 %load3 = load i32, i32* %idx3, align 4
104 %load4 = load i32, i32* %idx4, align 4
106 %op1 = add nsw i32 %load1, 1
107 %op2 = add nsw i32 %load2, 1
108 %op3 = add nsw i32 %load3, 1
109 %op4 = add nsw i32 %load4, 1
111 store i32 %op1, i32* %idx1, align 4
112 store i32 %op2, i32* %idx2, align 4
113 store i32 %op3, i32* %idx3, align 4
114 store i32 %op4, i32* %idx4, align 4
119 define void @not_nsw(i32* %x) {
120 ; CHECK-LABEL: @not_nsw(
121 ; CHECK-NEXT: [[IDX1:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i64 0
122 ; CHECK-NEXT: [[IDX2:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 1
123 ; CHECK-NEXT: [[IDX3:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 2
124 ; CHECK-NEXT: [[IDX4:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 3
125 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[IDX1]] to <4 x i32>*
126 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
127 ; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i32> [[TMP2]], <i32 1, i32 1, i32 1, i32 1>
128 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[IDX1]] to <4 x i32>*
129 ; CHECK-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 4
130 ; CHECK-NEXT: ret void
132 %idx1 = getelementptr inbounds i32, i32* %x, i64 0
133 %idx2 = getelementptr inbounds i32, i32* %x, i64 1
134 %idx3 = getelementptr inbounds i32, i32* %x, i64 2
135 %idx4 = getelementptr inbounds i32, i32* %x, i64 3
137 %load1 = load i32, i32* %idx1, align 4
138 %load2 = load i32, i32* %idx2, align 4
139 %load3 = load i32, i32* %idx3, align 4
140 %load4 = load i32, i32* %idx4, align 4
142 %op1 = add nsw i32 %load1, 1
143 %op2 = add nsw i32 %load2, 1
144 %op3 = add nsw i32 %load3, 1
145 %op4 = add i32 %load4, 1
147 store i32 %op1, i32* %idx1, align 4
148 store i32 %op2, i32* %idx2, align 4
149 store i32 %op3, i32* %idx3, align 4
150 store i32 %op4, i32* %idx4, align 4
155 define void @nuw(i32* %x) {
157 ; CHECK-NEXT: [[IDX1:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i64 0
158 ; CHECK-NEXT: [[IDX2:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 1
159 ; CHECK-NEXT: [[IDX3:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 2
160 ; CHECK-NEXT: [[IDX4:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 3
161 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[IDX1]] to <4 x i32>*
162 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
163 ; CHECK-NEXT: [[TMP3:%.*]] = add nuw <4 x i32> [[TMP2]], <i32 1, i32 1, i32 1, i32 1>
164 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[IDX1]] to <4 x i32>*
165 ; CHECK-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 4
166 ; CHECK-NEXT: ret void
168 %idx1 = getelementptr inbounds i32, i32* %x, i64 0
169 %idx2 = getelementptr inbounds i32, i32* %x, i64 1
170 %idx3 = getelementptr inbounds i32, i32* %x, i64 2
171 %idx4 = getelementptr inbounds i32, i32* %x, i64 3
173 %load1 = load i32, i32* %idx1, align 4
174 %load2 = load i32, i32* %idx2, align 4
175 %load3 = load i32, i32* %idx3, align 4
176 %load4 = load i32, i32* %idx4, align 4
178 %op1 = add nuw i32 %load1, 1
179 %op2 = add nuw i32 %load2, 1
180 %op3 = add nuw i32 %load3, 1
181 %op4 = add nuw i32 %load4, 1
183 store i32 %op1, i32* %idx1, align 4
184 store i32 %op2, i32* %idx2, align 4
185 store i32 %op3, i32* %idx3, align 4
186 store i32 %op4, i32* %idx4, align 4
191 define void @not_nuw(i32* %x) {
192 ; CHECK-LABEL: @not_nuw(
193 ; CHECK-NEXT: [[IDX1:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i64 0
194 ; CHECK-NEXT: [[IDX2:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 1
195 ; CHECK-NEXT: [[IDX3:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 2
196 ; CHECK-NEXT: [[IDX4:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 3
197 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[IDX1]] to <4 x i32>*
198 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
199 ; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i32> [[TMP2]], <i32 1, i32 1, i32 1, i32 1>
200 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[IDX1]] to <4 x i32>*
201 ; CHECK-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 4
202 ; CHECK-NEXT: ret void
204 %idx1 = getelementptr inbounds i32, i32* %x, i64 0
205 %idx2 = getelementptr inbounds i32, i32* %x, i64 1
206 %idx3 = getelementptr inbounds i32, i32* %x, i64 2
207 %idx4 = getelementptr inbounds i32, i32* %x, i64 3
209 %load1 = load i32, i32* %idx1, align 4
210 %load2 = load i32, i32* %idx2, align 4
211 %load3 = load i32, i32* %idx3, align 4
212 %load4 = load i32, i32* %idx4, align 4
214 %op1 = add nuw i32 %load1, 1
215 %op2 = add i32 %load2, 1
216 %op3 = add i32 %load3, 1
217 %op4 = add nuw i32 %load4, 1
219 store i32 %op1, i32* %idx1, align 4
220 store i32 %op2, i32* %idx2, align 4
221 store i32 %op3, i32* %idx3, align 4
222 store i32 %op4, i32* %idx4, align 4
227 define void @not_nsw_but_nuw(i32* %x) {
228 ; CHECK-LABEL: @not_nsw_but_nuw(
229 ; CHECK-NEXT: [[IDX1:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i64 0
230 ; CHECK-NEXT: [[IDX2:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 1
231 ; CHECK-NEXT: [[IDX3:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 2
232 ; CHECK-NEXT: [[IDX4:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 3
233 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[IDX1]] to <4 x i32>*
234 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
235 ; CHECK-NEXT: [[TMP3:%.*]] = add nuw <4 x i32> [[TMP2]], <i32 1, i32 1, i32 1, i32 1>
236 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[IDX1]] to <4 x i32>*
237 ; CHECK-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 4
238 ; CHECK-NEXT: ret void
240 %idx1 = getelementptr inbounds i32, i32* %x, i64 0
241 %idx2 = getelementptr inbounds i32, i32* %x, i64 1
242 %idx3 = getelementptr inbounds i32, i32* %x, i64 2
243 %idx4 = getelementptr inbounds i32, i32* %x, i64 3
245 %load1 = load i32, i32* %idx1, align 4
246 %load2 = load i32, i32* %idx2, align 4
247 %load3 = load i32, i32* %idx3, align 4
248 %load4 = load i32, i32* %idx4, align 4
250 %op1 = add nuw i32 %load1, 1
251 %op2 = add nuw nsw i32 %load2, 1
252 %op3 = add nuw nsw i32 %load3, 1
253 %op4 = add nuw i32 %load4, 1
255 store i32 %op1, i32* %idx1, align 4
256 store i32 %op2, i32* %idx2, align 4
257 store i32 %op3, i32* %idx3, align 4
258 store i32 %op4, i32* %idx4, align 4
263 define void @nnan(float* %x) {
264 ; CHECK-LABEL: @nnan(
265 ; CHECK-NEXT: [[IDX1:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i64 0
266 ; CHECK-NEXT: [[IDX2:%.*]] = getelementptr inbounds float, float* [[X]], i64 1
267 ; CHECK-NEXT: [[IDX3:%.*]] = getelementptr inbounds float, float* [[X]], i64 2
268 ; CHECK-NEXT: [[IDX4:%.*]] = getelementptr inbounds float, float* [[X]], i64 3
269 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[IDX1]] to <4 x float>*
270 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
271 ; CHECK-NEXT: [[TMP3:%.*]] = fadd nnan <4 x float> [[TMP2]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
272 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast float* [[IDX1]] to <4 x float>*
273 ; CHECK-NEXT: store <4 x float> [[TMP3]], <4 x float>* [[TMP4]], align 4
274 ; CHECK-NEXT: ret void
276 %idx1 = getelementptr inbounds float, float* %x, i64 0
277 %idx2 = getelementptr inbounds float, float* %x, i64 1
278 %idx3 = getelementptr inbounds float, float* %x, i64 2
279 %idx4 = getelementptr inbounds float, float* %x, i64 3
281 %load1 = load float, float* %idx1, align 4
282 %load2 = load float, float* %idx2, align 4
283 %load3 = load float, float* %idx3, align 4
284 %load4 = load float, float* %idx4, align 4
286 %op1 = fadd fast nnan float %load1, 1.0
287 %op2 = fadd nnan ninf float %load2, 1.0
288 %op3 = fadd nsz nnan float %load3, 1.0
289 %op4 = fadd arcp nnan float %load4, 1.0
291 store float %op1, float* %idx1, align 4
292 store float %op2, float* %idx2, align 4
293 store float %op3, float* %idx3, align 4
294 store float %op4, float* %idx4, align 4
299 define void @not_nnan(float* %x) {
300 ; CHECK-LABEL: @not_nnan(
301 ; CHECK-NEXT: [[IDX1:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i64 0
302 ; CHECK-NEXT: [[IDX2:%.*]] = getelementptr inbounds float, float* [[X]], i64 1
303 ; CHECK-NEXT: [[IDX3:%.*]] = getelementptr inbounds float, float* [[X]], i64 2
304 ; CHECK-NEXT: [[IDX4:%.*]] = getelementptr inbounds float, float* [[X]], i64 3
305 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[IDX1]] to <4 x float>*
306 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
307 ; CHECK-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[TMP2]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
308 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast float* [[IDX1]] to <4 x float>*
309 ; CHECK-NEXT: store <4 x float> [[TMP3]], <4 x float>* [[TMP4]], align 4
310 ; CHECK-NEXT: ret void
312 %idx1 = getelementptr inbounds float, float* %x, i64 0
313 %idx2 = getelementptr inbounds float, float* %x, i64 1
314 %idx3 = getelementptr inbounds float, float* %x, i64 2
315 %idx4 = getelementptr inbounds float, float* %x, i64 3
317 %load1 = load float, float* %idx1, align 4
318 %load2 = load float, float* %idx2, align 4
319 %load3 = load float, float* %idx3, align 4
320 %load4 = load float, float* %idx4, align 4
322 %op1 = fadd nnan float %load1, 1.0
323 %op2 = fadd ninf float %load2, 1.0
324 %op3 = fadd nsz float %load3, 1.0
325 %op4 = fadd arcp float %load4, 1.0
327 store float %op1, float* %idx1, align 4
328 store float %op2, float* %idx2, align 4
329 store float %op3, float* %idx3, align 4
330 store float %op4, float* %idx4, align 4
335 define void @only_fast(float* %x) {
336 ; CHECK-LABEL: @only_fast(
337 ; CHECK-NEXT: [[IDX1:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i64 0
338 ; CHECK-NEXT: [[IDX2:%.*]] = getelementptr inbounds float, float* [[X]], i64 1
339 ; CHECK-NEXT: [[IDX3:%.*]] = getelementptr inbounds float, float* [[X]], i64 2
340 ; CHECK-NEXT: [[IDX4:%.*]] = getelementptr inbounds float, float* [[X]], i64 3
341 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[IDX1]] to <4 x float>*
342 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
343 ; CHECK-NEXT: [[TMP3:%.*]] = fadd fast <4 x float> [[TMP2]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
344 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast float* [[IDX1]] to <4 x float>*
345 ; CHECK-NEXT: store <4 x float> [[TMP3]], <4 x float>* [[TMP4]], align 4
346 ; CHECK-NEXT: ret void
348 %idx1 = getelementptr inbounds float, float* %x, i64 0
349 %idx2 = getelementptr inbounds float, float* %x, i64 1
350 %idx3 = getelementptr inbounds float, float* %x, i64 2
351 %idx4 = getelementptr inbounds float, float* %x, i64 3
353 %load1 = load float, float* %idx1, align 4
354 %load2 = load float, float* %idx2, align 4
355 %load3 = load float, float* %idx3, align 4
356 %load4 = load float, float* %idx4, align 4
358 %op1 = fadd fast nnan float %load1, 1.0
359 %op2 = fadd fast nnan ninf float %load2, 1.0
360 %op3 = fadd fast nsz nnan float %load3, 1.0
361 %op4 = fadd arcp nnan fast float %load4, 1.0
363 store float %op1, float* %idx1, align 4
364 store float %op2, float* %idx2, align 4
365 store float %op3, float* %idx3, align 4
366 store float %op4, float* %idx4, align 4
371 define void @only_arcp(float* %x) {
372 ; CHECK-LABEL: @only_arcp(
373 ; CHECK-NEXT: [[IDX1:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i64 0
374 ; CHECK-NEXT: [[IDX2:%.*]] = getelementptr inbounds float, float* [[X]], i64 1
375 ; CHECK-NEXT: [[IDX3:%.*]] = getelementptr inbounds float, float* [[X]], i64 2
376 ; CHECK-NEXT: [[IDX4:%.*]] = getelementptr inbounds float, float* [[X]], i64 3
377 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[IDX1]] to <4 x float>*
378 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
379 ; CHECK-NEXT: [[TMP3:%.*]] = fadd arcp <4 x float> [[TMP2]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
380 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast float* [[IDX1]] to <4 x float>*
381 ; CHECK-NEXT: store <4 x float> [[TMP3]], <4 x float>* [[TMP4]], align 4
382 ; CHECK-NEXT: ret void
384 %idx1 = getelementptr inbounds float, float* %x, i64 0
385 %idx2 = getelementptr inbounds float, float* %x, i64 1
386 %idx3 = getelementptr inbounds float, float* %x, i64 2
387 %idx4 = getelementptr inbounds float, float* %x, i64 3
389 %load1 = load float, float* %idx1, align 4
390 %load2 = load float, float* %idx2, align 4
391 %load3 = load float, float* %idx3, align 4
392 %load4 = load float, float* %idx4, align 4
394 %op1 = fadd fast float %load1, 1.0
395 %op2 = fadd fast float %load2, 1.0
396 %op3 = fadd fast float %load3, 1.0
397 %op4 = fadd arcp float %load4, 1.0
399 store float %op1, float* %idx1, align 4
400 store float %op2, float* %idx2, align 4
401 store float %op3, float* %idx3, align 4
402 store float %op4, float* %idx4, align 4
407 define void @addsub_all_nsw(i32* %x) {
408 ; CHECK-LABEL: @addsub_all_nsw(
409 ; CHECK-NEXT: [[IDX1:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i64 0
410 ; CHECK-NEXT: [[IDX2:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 1
411 ; CHECK-NEXT: [[IDX3:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 2
412 ; CHECK-NEXT: [[IDX4:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 3
413 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[IDX1]] to <4 x i32>*
414 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
415 ; CHECK-NEXT: [[TMP3:%.*]] = add nsw <4 x i32> [[TMP2]], <i32 1, i32 1, i32 1, i32 1>
416 ; CHECK-NEXT: [[TMP4:%.*]] = sub nsw <4 x i32> [[TMP2]], <i32 1, i32 1, i32 1, i32 1>
417 ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
418 ; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32* [[IDX1]] to <4 x i32>*
419 ; CHECK-NEXT: store <4 x i32> [[TMP5]], <4 x i32>* [[TMP6]], align 4
420 ; CHECK-NEXT: ret void
422 %idx1 = getelementptr inbounds i32, i32* %x, i64 0
423 %idx2 = getelementptr inbounds i32, i32* %x, i64 1
424 %idx3 = getelementptr inbounds i32, i32* %x, i64 2
425 %idx4 = getelementptr inbounds i32, i32* %x, i64 3
427 %load1 = load i32, i32* %idx1, align 4
428 %load2 = load i32, i32* %idx2, align 4
429 %load3 = load i32, i32* %idx3, align 4
430 %load4 = load i32, i32* %idx4, align 4
432 %op1 = add nsw i32 %load1, 1
433 %op2 = sub nsw i32 %load2, 1
434 %op3 = add nsw i32 %load3, 1
435 %op4 = sub nsw i32 %load4, 1
437 store i32 %op1, i32* %idx1, align 4
438 store i32 %op2, i32* %idx2, align 4
439 store i32 %op3, i32* %idx3, align 4
440 store i32 %op4, i32* %idx4, align 4
445 define void @addsub_some_nsw(i32* %x) {
446 ; CHECK-LABEL: @addsub_some_nsw(
447 ; CHECK-NEXT: [[IDX1:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i64 0
448 ; CHECK-NEXT: [[IDX2:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 1
449 ; CHECK-NEXT: [[IDX3:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 2
450 ; CHECK-NEXT: [[IDX4:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 3
451 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[IDX1]] to <4 x i32>*
452 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
453 ; CHECK-NEXT: [[TMP3:%.*]] = add nsw <4 x i32> [[TMP2]], <i32 1, i32 1, i32 1, i32 1>
454 ; CHECK-NEXT: [[TMP4:%.*]] = sub <4 x i32> [[TMP2]], <i32 1, i32 1, i32 1, i32 1>
455 ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
456 ; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32* [[IDX1]] to <4 x i32>*
457 ; CHECK-NEXT: store <4 x i32> [[TMP5]], <4 x i32>* [[TMP6]], align 4
458 ; CHECK-NEXT: ret void
460 %idx1 = getelementptr inbounds i32, i32* %x, i64 0
461 %idx2 = getelementptr inbounds i32, i32* %x, i64 1
462 %idx3 = getelementptr inbounds i32, i32* %x, i64 2
463 %idx4 = getelementptr inbounds i32, i32* %x, i64 3
465 %load1 = load i32, i32* %idx1, align 4
466 %load2 = load i32, i32* %idx2, align 4
467 %load3 = load i32, i32* %idx3, align 4
468 %load4 = load i32, i32* %idx4, align 4
470 %op1 = add nsw i32 %load1, 1
471 %op2 = sub nsw i32 %load2, 1
472 %op3 = add nsw i32 %load3, 1
473 %op4 = sub i32 %load4, 1
475 store i32 %op1, i32* %idx1, align 4
476 store i32 %op2, i32* %idx2, align 4
477 store i32 %op3, i32* %idx3, align 4
478 store i32 %op4, i32* %idx4, align 4
483 define void @addsub_no_nsw(i32* %x) {
484 ; CHECK-LABEL: @addsub_no_nsw(
485 ; CHECK-NEXT: [[IDX1:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i64 0
486 ; CHECK-NEXT: [[IDX2:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 1
487 ; CHECK-NEXT: [[IDX3:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 2
488 ; CHECK-NEXT: [[IDX4:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 3
489 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[IDX1]] to <4 x i32>*
490 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
491 ; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i32> [[TMP2]], <i32 1, i32 1, i32 1, i32 1>
492 ; CHECK-NEXT: [[TMP4:%.*]] = sub <4 x i32> [[TMP2]], <i32 1, i32 1, i32 1, i32 1>
493 ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
494 ; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32* [[IDX1]] to <4 x i32>*
495 ; CHECK-NEXT: store <4 x i32> [[TMP5]], <4 x i32>* [[TMP6]], align 4
496 ; CHECK-NEXT: ret void
498 %idx1 = getelementptr inbounds i32, i32* %x, i64 0
499 %idx2 = getelementptr inbounds i32, i32* %x, i64 1
500 %idx3 = getelementptr inbounds i32, i32* %x, i64 2
501 %idx4 = getelementptr inbounds i32, i32* %x, i64 3
503 %load1 = load i32, i32* %idx1, align 4
504 %load2 = load i32, i32* %idx2, align 4
505 %load3 = load i32, i32* %idx3, align 4
506 %load4 = load i32, i32* %idx4, align 4
508 %op1 = add i32 %load1, 1
509 %op2 = sub nsw i32 %load2, 1
510 %op3 = add nsw i32 %load3, 1
511 %op4 = sub i32 %load4, 1
513 store i32 %op1, i32* %idx1, align 4
514 store i32 %op2, i32* %idx2, align 4
515 store i32 %op3, i32* %idx3, align 4
516 store i32 %op4, i32* %idx4, align 4
521 define void @fcmp_fast(double* %x) #1 {
522 ; CHECK-LABEL: @fcmp_fast(
523 ; CHECK-NEXT: [[IDX1:%.*]] = getelementptr inbounds double, double* [[X:%.*]], i64 0
524 ; CHECK-NEXT: [[IDX2:%.*]] = getelementptr inbounds double, double* [[X]], i64 1
525 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast double* [[IDX1]] to <2 x double>*
526 ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 8
527 ; CHECK-NEXT: [[TMP3:%.*]] = fcmp fast oge <2 x double> [[TMP2]], zeroinitializer
528 ; CHECK-NEXT: [[TMP4:%.*]] = fsub fast <2 x double> <double -0.000000e+00, double -0.000000e+00>, [[TMP2]]
529 ; CHECK-NEXT: [[TMP5:%.*]] = select <2 x i1> [[TMP3]], <2 x double> [[TMP2]], <2 x double> [[TMP4]]
530 ; CHECK-NEXT: [[TMP6:%.*]] = bitcast double* [[IDX1]] to <2 x double>*
531 ; CHECK-NEXT: store <2 x double> [[TMP5]], <2 x double>* [[TMP6]], align 8
532 ; CHECK-NEXT: ret void
534 %idx1 = getelementptr inbounds double, double* %x, i64 0
535 %idx2 = getelementptr inbounds double, double* %x, i64 1
537 %load1 = load double, double* %idx1, align 8
538 %load2 = load double, double* %idx2, align 8
540 %cmp1 = fcmp fast oge double %load1, 0.000000e+00
541 %cmp2 = fcmp fast oge double %load2, 0.000000e+00
543 %sub1 = fsub fast double -0.000000e+00, %load1
544 %sub2 = fsub fast double -0.000000e+00, %load2
546 %sel1 = select i1 %cmp1, double %load1, double %sub1
547 %sel2 = select i1 %cmp2, double %load2, double %sub2
549 store double %sel1, double* %idx1, align 8
550 store double %sel2, double* %idx2, align 8
555 define void @fcmp_fast_unary_fneg(double* %x) #1 {
556 ; CHECK-LABEL: @fcmp_fast_unary_fneg(
557 ; CHECK-NEXT: [[IDX1:%.*]] = getelementptr inbounds double, double* [[X:%.*]], i64 0
558 ; CHECK-NEXT: [[IDX2:%.*]] = getelementptr inbounds double, double* [[X]], i64 1
559 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast double* [[IDX1]] to <2 x double>*
560 ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 8
561 ; CHECK-NEXT: [[TMP3:%.*]] = fcmp fast oge <2 x double> [[TMP2]], zeroinitializer
562 ; CHECK-NEXT: [[TMP4:%.*]] = fneg fast <2 x double> [[TMP2]]
563 ; CHECK-NEXT: [[TMP5:%.*]] = select <2 x i1> [[TMP3]], <2 x double> [[TMP2]], <2 x double> [[TMP4]]
564 ; CHECK-NEXT: [[TMP6:%.*]] = bitcast double* [[IDX1]] to <2 x double>*
565 ; CHECK-NEXT: store <2 x double> [[TMP5]], <2 x double>* [[TMP6]], align 8
566 ; CHECK-NEXT: ret void
568 %idx1 = getelementptr inbounds double, double* %x, i64 0
569 %idx2 = getelementptr inbounds double, double* %x, i64 1
571 %load1 = load double, double* %idx1, align 8
572 %load2 = load double, double* %idx2, align 8
574 %cmp1 = fcmp fast oge double %load1, 0.000000e+00
575 %cmp2 = fcmp fast oge double %load2, 0.000000e+00
577 %sub1 = fneg fast double %load1
578 %sub2 = fneg fast double %load2
580 %sel1 = select i1 %cmp1, double %load1, double %sub1
581 %sel2 = select i1 %cmp2, double %load2, double %sub2
583 store double %sel1, double* %idx1, align 8
584 store double %sel2, double* %idx2, align 8
589 define void @fcmp_no_fast(double* %x) #1 {
590 ; CHECK-LABEL: @fcmp_no_fast(
591 ; CHECK-NEXT: [[IDX1:%.*]] = getelementptr inbounds double, double* [[X:%.*]], i64 0
592 ; CHECK-NEXT: [[IDX2:%.*]] = getelementptr inbounds double, double* [[X]], i64 1
593 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast double* [[IDX1]] to <2 x double>*
594 ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 8
595 ; CHECK-NEXT: [[TMP3:%.*]] = fcmp oge <2 x double> [[TMP2]], zeroinitializer
596 ; CHECK-NEXT: [[TMP4:%.*]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, [[TMP2]]
597 ; CHECK-NEXT: [[TMP5:%.*]] = select <2 x i1> [[TMP3]], <2 x double> [[TMP2]], <2 x double> [[TMP4]]
598 ; CHECK-NEXT: [[TMP6:%.*]] = bitcast double* [[IDX1]] to <2 x double>*
599 ; CHECK-NEXT: store <2 x double> [[TMP5]], <2 x double>* [[TMP6]], align 8
600 ; CHECK-NEXT: ret void
602 %idx1 = getelementptr inbounds double, double* %x, i64 0
603 %idx2 = getelementptr inbounds double, double* %x, i64 1
605 %load1 = load double, double* %idx1, align 8
606 %load2 = load double, double* %idx2, align 8
608 %cmp1 = fcmp fast oge double %load1, 0.000000e+00
609 %cmp2 = fcmp oge double %load2, 0.000000e+00
611 %sub1 = fsub fast double -0.000000e+00, %load1
612 %sub2 = fsub double -0.000000e+00, %load2
614 %sel1 = select i1 %cmp1, double %load1, double %sub1
615 %sel2 = select i1 %cmp2, double %load2, double %sub2
617 store double %sel1, double* %idx1, align 8
618 store double %sel2, double* %idx2, align 8
623 define void @fcmp_no_fast_unary_fneg(double* %x) #1 {
624 ; CHECK-LABEL: @fcmp_no_fast_unary_fneg(
625 ; CHECK-NEXT: [[IDX1:%.*]] = getelementptr inbounds double, double* [[X:%.*]], i64 0
626 ; CHECK-NEXT: [[IDX2:%.*]] = getelementptr inbounds double, double* [[X]], i64 1
627 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast double* [[IDX1]] to <2 x double>*
628 ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 8
629 ; CHECK-NEXT: [[TMP3:%.*]] = fcmp oge <2 x double> [[TMP2]], zeroinitializer
630 ; CHECK-NEXT: [[TMP4:%.*]] = fneg <2 x double> [[TMP2]]
631 ; CHECK-NEXT: [[TMP5:%.*]] = select <2 x i1> [[TMP3]], <2 x double> [[TMP2]], <2 x double> [[TMP4]]
632 ; CHECK-NEXT: [[TMP6:%.*]] = bitcast double* [[IDX1]] to <2 x double>*
633 ; CHECK-NEXT: store <2 x double> [[TMP5]], <2 x double>* [[TMP6]], align 8
634 ; CHECK-NEXT: ret void
636 %idx1 = getelementptr inbounds double, double* %x, i64 0
637 %idx2 = getelementptr inbounds double, double* %x, i64 1
639 %load1 = load double, double* %idx1, align 8
640 %load2 = load double, double* %idx2, align 8
642 %cmp1 = fcmp fast oge double %load1, 0.000000e+00
643 %cmp2 = fcmp oge double %load2, 0.000000e+00
645 %sub1 = fneg double %load1
646 %sub2 = fneg double %load2
648 %sel1 = select i1 %cmp1, double %load1, double %sub1
649 %sel2 = select i1 %cmp2, double %load2, double %sub2
651 store double %sel1, double* %idx1, align 8
652 store double %sel2, double* %idx2, align 8
657 declare double @llvm.fabs.f64(double) nounwind readnone
659 define void @call_fast(double* %x) {
660 ; CHECK-LABEL: @call_fast(
661 ; CHECK-NEXT: [[IDX1:%.*]] = getelementptr inbounds double, double* [[X:%.*]], i64 0
662 ; CHECK-NEXT: [[IDX2:%.*]] = getelementptr inbounds double, double* [[X]], i64 1
663 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast double* [[IDX1]] to <2 x double>*
664 ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 8
665 ; CHECK-NEXT: [[TMP3:%.*]] = call fast <2 x double> @llvm.fabs.v2f64(<2 x double> [[TMP2]])
666 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast double* [[IDX1]] to <2 x double>*
667 ; CHECK-NEXT: store <2 x double> [[TMP3]], <2 x double>* [[TMP4]], align 8
668 ; CHECK-NEXT: ret void
670 %idx1 = getelementptr inbounds double, double* %x, i64 0
671 %idx2 = getelementptr inbounds double, double* %x, i64 1
673 %load1 = load double, double* %idx1, align 8
674 %load2 = load double, double* %idx2, align 8
676 %call1 = tail call fast double @llvm.fabs.f64(double %load1) nounwind readnone
677 %call2 = tail call fast double @llvm.fabs.f64(double %load2) nounwind readnone
679 store double %call1, double* %idx1, align 8
680 store double %call2, double* %idx2, align 8
685 define void @call_no_fast(double* %x) {
686 ; CHECK-LABEL: @call_no_fast(
687 ; CHECK-NEXT: [[IDX1:%.*]] = getelementptr inbounds double, double* [[X:%.*]], i64 0
688 ; CHECK-NEXT: [[IDX2:%.*]] = getelementptr inbounds double, double* [[X]], i64 1
689 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast double* [[IDX1]] to <2 x double>*
690 ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 8
691 ; CHECK-NEXT: [[TMP3:%.*]] = call <2 x double> @llvm.fabs.v2f64(<2 x double> [[TMP2]])
692 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast double* [[IDX1]] to <2 x double>*
693 ; CHECK-NEXT: store <2 x double> [[TMP3]], <2 x double>* [[TMP4]], align 8
694 ; CHECK-NEXT: ret void
696 %idx1 = getelementptr inbounds double, double* %x, i64 0
697 %idx2 = getelementptr inbounds double, double* %x, i64 1
699 %load1 = load double, double* %idx1, align 8
700 %load2 = load double, double* %idx2, align 8
702 %call1 = tail call fast double @llvm.fabs.f64(double %load1) nounwind readnone
703 %call2 = tail call double @llvm.fabs.f64(double %load2) nounwind readnone
705 store double %call1, double* %idx1, align 8
706 store double %call2, double* %idx2, align 8
711 attributes #1 = { "target-features"="+avx" }