1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt < %s -passes=slp-vectorizer -S | FileCheck %s
4 ; Check propagation of optional IR flags (PR20802). For a flag to
5 ; propagate from scalar instructions to their vector replacement,
6 ; *all* scalar instructions must have the flag.
8 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
9 target triple = "x86_64-unknown-unknown"
11 define void @exact(ptr %x) {
12 ; CHECK-LABEL: @exact(
13 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 4
14 ; CHECK-NEXT: [[TMP3:%.*]] = lshr exact <4 x i32> [[TMP2]], <i32 1, i32 1, i32 1, i32 1>
15 ; CHECK-NEXT: store <4 x i32> [[TMP3]], ptr [[X]], align 4
16 ; CHECK-NEXT: ret void
18 %idx2 = getelementptr inbounds i32, ptr %x, i64 1
19 %idx3 = getelementptr inbounds i32, ptr %x, i64 2
20 %idx4 = getelementptr inbounds i32, ptr %x, i64 3
22 %load1 = load i32, ptr %x, align 4
23 %load2 = load i32, ptr %idx2, align 4
24 %load3 = load i32, ptr %idx3, align 4
25 %load4 = load i32, ptr %idx4, align 4
27 %op1 = lshr exact i32 %load1, 1
28 %op2 = lshr exact i32 %load2, 1
29 %op3 = lshr exact i32 %load3, 1
30 %op4 = lshr exact i32 %load4, 1
32 store i32 %op1, ptr %x, align 4
33 store i32 %op2, ptr %idx2, align 4
34 store i32 %op3, ptr %idx3, align 4
35 store i32 %op4, ptr %idx4, align 4
40 define void @not_exact(ptr %x) {
41 ; CHECK-LABEL: @not_exact(
42 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 4
43 ; CHECK-NEXT: [[TMP3:%.*]] = lshr <4 x i32> [[TMP2]], <i32 1, i32 1, i32 1, i32 1>
44 ; CHECK-NEXT: store <4 x i32> [[TMP3]], ptr [[X]], align 4
45 ; CHECK-NEXT: ret void
47 %idx2 = getelementptr inbounds i32, ptr %x, i64 1
48 %idx3 = getelementptr inbounds i32, ptr %x, i64 2
49 %idx4 = getelementptr inbounds i32, ptr %x, i64 3
51 %load1 = load i32, ptr %x, align 4
52 %load2 = load i32, ptr %idx2, align 4
53 %load3 = load i32, ptr %idx3, align 4
54 %load4 = load i32, ptr %idx4, align 4
56 %op1 = lshr exact i32 %load1, 1
57 %op2 = lshr i32 %load2, 1
58 %op3 = lshr exact i32 %load3, 1
59 %op4 = lshr exact i32 %load4, 1
61 store i32 %op1, ptr %x, align 4
62 store i32 %op2, ptr %idx2, align 4
63 store i32 %op3, ptr %idx3, align 4
64 store i32 %op4, ptr %idx4, align 4
69 define void @nsw(ptr %x) {
71 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 4
72 ; CHECK-NEXT: [[TMP3:%.*]] = add nsw <4 x i32> [[TMP2]], <i32 1, i32 1, i32 1, i32 1>
73 ; CHECK-NEXT: store <4 x i32> [[TMP3]], ptr [[X]], align 4
74 ; CHECK-NEXT: ret void
76 %idx2 = getelementptr inbounds i32, ptr %x, i64 1
77 %idx3 = getelementptr inbounds i32, ptr %x, i64 2
78 %idx4 = getelementptr inbounds i32, ptr %x, i64 3
80 %load1 = load i32, ptr %x, align 4
81 %load2 = load i32, ptr %idx2, align 4
82 %load3 = load i32, ptr %idx3, align 4
83 %load4 = load i32, ptr %idx4, align 4
85 %op1 = add nsw i32 %load1, 1
86 %op2 = add nsw i32 %load2, 1
87 %op3 = add nsw i32 %load3, 1
88 %op4 = add nsw i32 %load4, 1
90 store i32 %op1, ptr %x, align 4
91 store i32 %op2, ptr %idx2, align 4
92 store i32 %op3, ptr %idx3, align 4
93 store i32 %op4, ptr %idx4, align 4
98 define void @not_nsw(ptr %x) {
99 ; CHECK-LABEL: @not_nsw(
100 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 4
101 ; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i32> [[TMP2]], <i32 1, i32 1, i32 1, i32 1>
102 ; CHECK-NEXT: store <4 x i32> [[TMP3]], ptr [[X]], align 4
103 ; CHECK-NEXT: ret void
105 %idx2 = getelementptr inbounds i32, ptr %x, i64 1
106 %idx3 = getelementptr inbounds i32, ptr %x, i64 2
107 %idx4 = getelementptr inbounds i32, ptr %x, i64 3
109 %load1 = load i32, ptr %x, align 4
110 %load2 = load i32, ptr %idx2, align 4
111 %load3 = load i32, ptr %idx3, align 4
112 %load4 = load i32, ptr %idx4, align 4
114 %op1 = add nsw i32 %load1, 1
115 %op2 = add nsw i32 %load2, 1
116 %op3 = add nsw i32 %load3, 1
117 %op4 = add i32 %load4, 1
119 store i32 %op1, ptr %x, align 4
120 store i32 %op2, ptr %idx2, align 4
121 store i32 %op3, ptr %idx3, align 4
122 store i32 %op4, ptr %idx4, align 4
127 define void @nuw(ptr %x) {
129 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 4
130 ; CHECK-NEXT: [[TMP3:%.*]] = add nuw <4 x i32> [[TMP2]], <i32 1, i32 1, i32 1, i32 1>
131 ; CHECK-NEXT: store <4 x i32> [[TMP3]], ptr [[X]], align 4
132 ; CHECK-NEXT: ret void
134 %idx2 = getelementptr inbounds i32, ptr %x, i64 1
135 %idx3 = getelementptr inbounds i32, ptr %x, i64 2
136 %idx4 = getelementptr inbounds i32, ptr %x, i64 3
138 %load1 = load i32, ptr %x, align 4
139 %load2 = load i32, ptr %idx2, align 4
140 %load3 = load i32, ptr %idx3, align 4
141 %load4 = load i32, ptr %idx4, align 4
143 %op1 = add nuw i32 %load1, 1
144 %op2 = add nuw i32 %load2, 1
145 %op3 = add nuw i32 %load3, 1
146 %op4 = add nuw i32 %load4, 1
148 store i32 %op1, ptr %x, align 4
149 store i32 %op2, ptr %idx2, align 4
150 store i32 %op3, ptr %idx3, align 4
151 store i32 %op4, ptr %idx4, align 4
156 define void @not_nuw(ptr %x) {
157 ; CHECK-LABEL: @not_nuw(
158 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 4
159 ; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i32> [[TMP2]], <i32 1, i32 1, i32 1, i32 1>
160 ; CHECK-NEXT: store <4 x i32> [[TMP3]], ptr [[X]], align 4
161 ; CHECK-NEXT: ret void
163 %idx2 = getelementptr inbounds i32, ptr %x, i64 1
164 %idx3 = getelementptr inbounds i32, ptr %x, i64 2
165 %idx4 = getelementptr inbounds i32, ptr %x, i64 3
167 %load1 = load i32, ptr %x, align 4
168 %load2 = load i32, ptr %idx2, align 4
169 %load3 = load i32, ptr %idx3, align 4
170 %load4 = load i32, ptr %idx4, align 4
172 %op1 = add nuw i32 %load1, 1
173 %op2 = add i32 %load2, 1
174 %op3 = add i32 %load3, 1
175 %op4 = add nuw i32 %load4, 1
177 store i32 %op1, ptr %x, align 4
178 store i32 %op2, ptr %idx2, align 4
179 store i32 %op3, ptr %idx3, align 4
180 store i32 %op4, ptr %idx4, align 4
185 define void @not_nsw_but_nuw(ptr %x) {
186 ; CHECK-LABEL: @not_nsw_but_nuw(
187 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 4
188 ; CHECK-NEXT: [[TMP3:%.*]] = add nuw <4 x i32> [[TMP2]], <i32 1, i32 1, i32 1, i32 1>
189 ; CHECK-NEXT: store <4 x i32> [[TMP3]], ptr [[X]], align 4
190 ; CHECK-NEXT: ret void
192 %idx2 = getelementptr inbounds i32, ptr %x, i64 1
193 %idx3 = getelementptr inbounds i32, ptr %x, i64 2
194 %idx4 = getelementptr inbounds i32, ptr %x, i64 3
196 %load1 = load i32, ptr %x, align 4
197 %load2 = load i32, ptr %idx2, align 4
198 %load3 = load i32, ptr %idx3, align 4
199 %load4 = load i32, ptr %idx4, align 4
201 %op1 = add nuw i32 %load1, 1
202 %op2 = add nuw nsw i32 %load2, 1
203 %op3 = add nuw nsw i32 %load3, 1
204 %op4 = add nuw i32 %load4, 1
206 store i32 %op1, ptr %x, align 4
207 store i32 %op2, ptr %idx2, align 4
208 store i32 %op3, ptr %idx3, align 4
209 store i32 %op4, ptr %idx4, align 4
214 define void @nnan(ptr %x) {
215 ; CHECK-LABEL: @nnan(
216 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, ptr [[X:%.*]], align 4
217 ; CHECK-NEXT: [[TMP3:%.*]] = fadd nnan <4 x float> [[TMP2]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
218 ; CHECK-NEXT: store <4 x float> [[TMP3]], ptr [[X]], align 4
219 ; CHECK-NEXT: ret void
221 %idx2 = getelementptr inbounds float, ptr %x, i64 1
222 %idx3 = getelementptr inbounds float, ptr %x, i64 2
223 %idx4 = getelementptr inbounds float, ptr %x, i64 3
225 %load1 = load float, ptr %x, align 4
226 %load2 = load float, ptr %idx2, align 4
227 %load3 = load float, ptr %idx3, align 4
228 %load4 = load float, ptr %idx4, align 4
230 %op1 = fadd fast nnan float %load1, 1.0
231 %op2 = fadd nnan ninf float %load2, 1.0
232 %op3 = fadd nsz nnan float %load3, 1.0
233 %op4 = fadd arcp nnan float %load4, 1.0
235 store float %op1, ptr %x, align 4
236 store float %op2, ptr %idx2, align 4
237 store float %op3, ptr %idx3, align 4
238 store float %op4, ptr %idx4, align 4
243 define void @not_nnan(ptr %x) {
244 ; CHECK-LABEL: @not_nnan(
245 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, ptr [[X:%.*]], align 4
246 ; CHECK-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[TMP2]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
247 ; CHECK-NEXT: store <4 x float> [[TMP3]], ptr [[X]], align 4
248 ; CHECK-NEXT: ret void
250 %idx2 = getelementptr inbounds float, ptr %x, i64 1
251 %idx3 = getelementptr inbounds float, ptr %x, i64 2
252 %idx4 = getelementptr inbounds float, ptr %x, i64 3
254 %load1 = load float, ptr %x, align 4
255 %load2 = load float, ptr %idx2, align 4
256 %load3 = load float, ptr %idx3, align 4
257 %load4 = load float, ptr %idx4, align 4
259 %op1 = fadd nnan float %load1, 1.0
260 %op2 = fadd ninf float %load2, 1.0
261 %op3 = fadd nsz float %load3, 1.0
262 %op4 = fadd arcp float %load4, 1.0
264 store float %op1, ptr %x, align 4
265 store float %op2, ptr %idx2, align 4
266 store float %op3, ptr %idx3, align 4
267 store float %op4, ptr %idx4, align 4
272 define void @only_fast(ptr %x) {
273 ; CHECK-LABEL: @only_fast(
274 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, ptr [[X:%.*]], align 4
275 ; CHECK-NEXT: [[TMP3:%.*]] = fadd fast <4 x float> [[TMP2]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
276 ; CHECK-NEXT: store <4 x float> [[TMP3]], ptr [[X]], align 4
277 ; CHECK-NEXT: ret void
279 %idx2 = getelementptr inbounds float, ptr %x, i64 1
280 %idx3 = getelementptr inbounds float, ptr %x, i64 2
281 %idx4 = getelementptr inbounds float, ptr %x, i64 3
283 %load1 = load float, ptr %x, align 4
284 %load2 = load float, ptr %idx2, align 4
285 %load3 = load float, ptr %idx3, align 4
286 %load4 = load float, ptr %idx4, align 4
288 %op1 = fadd fast nnan float %load1, 1.0
289 %op2 = fadd fast nnan ninf float %load2, 1.0
290 %op3 = fadd fast nsz nnan float %load3, 1.0
291 %op4 = fadd arcp nnan fast float %load4, 1.0
293 store float %op1, ptr %x, align 4
294 store float %op2, ptr %idx2, align 4
295 store float %op3, ptr %idx3, align 4
296 store float %op4, ptr %idx4, align 4
301 define void @only_arcp(ptr %x) {
302 ; CHECK-LABEL: @only_arcp(
303 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, ptr [[X:%.*]], align 4
304 ; CHECK-NEXT: [[TMP3:%.*]] = fadd arcp <4 x float> [[TMP2]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
305 ; CHECK-NEXT: store <4 x float> [[TMP3]], ptr [[X]], align 4
306 ; CHECK-NEXT: ret void
308 %idx2 = getelementptr inbounds float, ptr %x, i64 1
309 %idx3 = getelementptr inbounds float, ptr %x, i64 2
310 %idx4 = getelementptr inbounds float, ptr %x, i64 3
312 %load1 = load float, ptr %x, align 4
313 %load2 = load float, ptr %idx2, align 4
314 %load3 = load float, ptr %idx3, align 4
315 %load4 = load float, ptr %idx4, align 4
317 %op1 = fadd fast float %load1, 1.0
318 %op2 = fadd fast float %load2, 1.0
319 %op3 = fadd fast float %load3, 1.0
320 %op4 = fadd arcp float %load4, 1.0
322 store float %op1, ptr %x, align 4
323 store float %op2, ptr %idx2, align 4
324 store float %op3, ptr %idx3, align 4
325 store float %op4, ptr %idx4, align 4
330 define void @addsub_all_nsw(ptr %x) {
331 ; CHECK-LABEL: @addsub_all_nsw(
332 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 4
333 ; CHECK-NEXT: [[TMP3:%.*]] = add nsw <4 x i32> [[TMP2]], <i32 1, i32 1, i32 1, i32 1>
334 ; CHECK-NEXT: [[TMP4:%.*]] = sub nsw <4 x i32> [[TMP2]], <i32 1, i32 1, i32 1, i32 1>
335 ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
336 ; CHECK-NEXT: store <4 x i32> [[TMP5]], ptr [[X]], align 4
337 ; CHECK-NEXT: ret void
339 %idx2 = getelementptr inbounds i32, ptr %x, i64 1
340 %idx3 = getelementptr inbounds i32, ptr %x, i64 2
341 %idx4 = getelementptr inbounds i32, ptr %x, i64 3
343 %load1 = load i32, ptr %x, align 4
344 %load2 = load i32, ptr %idx2, align 4
345 %load3 = load i32, ptr %idx3, align 4
346 %load4 = load i32, ptr %idx4, align 4
348 %op1 = add nsw i32 %load1, 1
349 %op2 = sub nsw i32 %load2, 1
350 %op3 = add nsw i32 %load3, 1
351 %op4 = sub nsw i32 %load4, 1
353 store i32 %op1, ptr %x, align 4
354 store i32 %op2, ptr %idx2, align 4
355 store i32 %op3, ptr %idx3, align 4
356 store i32 %op4, ptr %idx4, align 4
361 define void @addsub_some_nsw(ptr %x) {
362 ; CHECK-LABEL: @addsub_some_nsw(
363 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 4
364 ; CHECK-NEXT: [[TMP3:%.*]] = add nsw <4 x i32> [[TMP2]], <i32 1, i32 1, i32 1, i32 1>
365 ; CHECK-NEXT: [[TMP4:%.*]] = sub <4 x i32> [[TMP2]], <i32 1, i32 1, i32 1, i32 1>
366 ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
367 ; CHECK-NEXT: store <4 x i32> [[TMP5]], ptr [[X]], align 4
368 ; CHECK-NEXT: ret void
370 %idx2 = getelementptr inbounds i32, ptr %x, i64 1
371 %idx3 = getelementptr inbounds i32, ptr %x, i64 2
372 %idx4 = getelementptr inbounds i32, ptr %x, i64 3
374 %load1 = load i32, ptr %x, align 4
375 %load2 = load i32, ptr %idx2, align 4
376 %load3 = load i32, ptr %idx3, align 4
377 %load4 = load i32, ptr %idx4, align 4
379 %op1 = add nsw i32 %load1, 1
380 %op2 = sub nsw i32 %load2, 1
381 %op3 = add nsw i32 %load3, 1
382 %op4 = sub i32 %load4, 1
384 store i32 %op1, ptr %x, align 4
385 store i32 %op2, ptr %idx2, align 4
386 store i32 %op3, ptr %idx3, align 4
387 store i32 %op4, ptr %idx4, align 4
392 define void @addsub_no_nsw(ptr %x) {
393 ; CHECK-LABEL: @addsub_no_nsw(
394 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 4
395 ; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i32> [[TMP2]], <i32 1, i32 1, i32 1, i32 1>
396 ; CHECK-NEXT: [[TMP4:%.*]] = sub <4 x i32> [[TMP2]], <i32 1, i32 1, i32 1, i32 1>
397 ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
398 ; CHECK-NEXT: store <4 x i32> [[TMP5]], ptr [[X]], align 4
399 ; CHECK-NEXT: ret void
401 %idx2 = getelementptr inbounds i32, ptr %x, i64 1
402 %idx3 = getelementptr inbounds i32, ptr %x, i64 2
403 %idx4 = getelementptr inbounds i32, ptr %x, i64 3
405 %load1 = load i32, ptr %x, align 4
406 %load2 = load i32, ptr %idx2, align 4
407 %load3 = load i32, ptr %idx3, align 4
408 %load4 = load i32, ptr %idx4, align 4
410 %op1 = add i32 %load1, 1
411 %op2 = sub nsw i32 %load2, 1
412 %op3 = add nsw i32 %load3, 1
413 %op4 = sub i32 %load4, 1
415 store i32 %op1, ptr %x, align 4
416 store i32 %op2, ptr %idx2, align 4
417 store i32 %op3, ptr %idx3, align 4
418 store i32 %op4, ptr %idx4, align 4
423 define void @fcmp_fast(ptr %x) #1 {
424 ; CHECK-LABEL: @fcmp_fast(
425 ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[X:%.*]], align 8
426 ; CHECK-NEXT: [[TMP3:%.*]] = fcmp fast oge <2 x double> [[TMP2]], zeroinitializer
427 ; CHECK-NEXT: [[TMP4:%.*]] = fsub fast <2 x double> <double -0.000000e+00, double -0.000000e+00>, [[TMP2]]
428 ; CHECK-NEXT: [[TMP5:%.*]] = select <2 x i1> [[TMP3]], <2 x double> [[TMP2]], <2 x double> [[TMP4]]
429 ; CHECK-NEXT: store <2 x double> [[TMP5]], ptr [[X]], align 8
430 ; CHECK-NEXT: ret void
432 %idx2 = getelementptr inbounds double, ptr %x, i64 1
434 %load1 = load double, ptr %x, align 8
435 %load2 = load double, ptr %idx2, align 8
437 %cmp1 = fcmp fast oge double %load1, 0.000000e+00
438 %cmp2 = fcmp fast oge double %load2, 0.000000e+00
440 %sub1 = fsub fast double -0.000000e+00, %load1
441 %sub2 = fsub fast double -0.000000e+00, %load2
443 %sel1 = select i1 %cmp1, double %load1, double %sub1
444 %sel2 = select i1 %cmp2, double %load2, double %sub2
446 store double %sel1, ptr %x, align 8
447 store double %sel2, ptr %idx2, align 8
452 define void @fcmp_fast_unary_fneg(ptr %x) #1 {
453 ; CHECK-LABEL: @fcmp_fast_unary_fneg(
454 ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[X:%.*]], align 8
455 ; CHECK-NEXT: [[TMP3:%.*]] = fcmp fast oge <2 x double> [[TMP2]], zeroinitializer
456 ; CHECK-NEXT: [[TMP4:%.*]] = fneg fast <2 x double> [[TMP2]]
457 ; CHECK-NEXT: [[TMP5:%.*]] = select <2 x i1> [[TMP3]], <2 x double> [[TMP2]], <2 x double> [[TMP4]]
458 ; CHECK-NEXT: store <2 x double> [[TMP5]], ptr [[X]], align 8
459 ; CHECK-NEXT: ret void
461 %idx2 = getelementptr inbounds double, ptr %x, i64 1
463 %load1 = load double, ptr %x, align 8
464 %load2 = load double, ptr %idx2, align 8
466 %cmp1 = fcmp fast oge double %load1, 0.000000e+00
467 %cmp2 = fcmp fast oge double %load2, 0.000000e+00
469 %sub1 = fneg fast double %load1
470 %sub2 = fneg fast double %load2
472 %sel1 = select i1 %cmp1, double %load1, double %sub1
473 %sel2 = select i1 %cmp2, double %load2, double %sub2
475 store double %sel1, ptr %x, align 8
476 store double %sel2, ptr %idx2, align 8
481 define void @fcmp_no_fast(ptr %x) #1 {
482 ; CHECK-LABEL: @fcmp_no_fast(
483 ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[X:%.*]], align 8
484 ; CHECK-NEXT: [[TMP3:%.*]] = fcmp oge <2 x double> [[TMP2]], zeroinitializer
485 ; CHECK-NEXT: [[TMP4:%.*]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, [[TMP2]]
486 ; CHECK-NEXT: [[TMP5:%.*]] = select <2 x i1> [[TMP3]], <2 x double> [[TMP2]], <2 x double> [[TMP4]]
487 ; CHECK-NEXT: store <2 x double> [[TMP5]], ptr [[X]], align 8
488 ; CHECK-NEXT: ret void
490 %idx2 = getelementptr inbounds double, ptr %x, i64 1
492 %load1 = load double, ptr %x, align 8
493 %load2 = load double, ptr %idx2, align 8
495 %cmp1 = fcmp fast oge double %load1, 0.000000e+00
496 %cmp2 = fcmp oge double %load2, 0.000000e+00
498 %sub1 = fsub fast double -0.000000e+00, %load1
499 %sub2 = fsub double -0.000000e+00, %load2
501 %sel1 = select i1 %cmp1, double %load1, double %sub1
502 %sel2 = select i1 %cmp2, double %load2, double %sub2
504 store double %sel1, ptr %x, align 8
505 store double %sel2, ptr %idx2, align 8
510 define void @fcmp_no_fast_unary_fneg(ptr %x) #1 {
511 ; CHECK-LABEL: @fcmp_no_fast_unary_fneg(
512 ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[X:%.*]], align 8
513 ; CHECK-NEXT: [[TMP3:%.*]] = fcmp oge <2 x double> [[TMP2]], zeroinitializer
514 ; CHECK-NEXT: [[TMP4:%.*]] = fneg <2 x double> [[TMP2]]
515 ; CHECK-NEXT: [[TMP5:%.*]] = select <2 x i1> [[TMP3]], <2 x double> [[TMP2]], <2 x double> [[TMP4]]
516 ; CHECK-NEXT: store <2 x double> [[TMP5]], ptr [[X]], align 8
517 ; CHECK-NEXT: ret void
519 %idx2 = getelementptr inbounds double, ptr %x, i64 1
521 %load1 = load double, ptr %x, align 8
522 %load2 = load double, ptr %idx2, align 8
524 %cmp1 = fcmp fast oge double %load1, 0.000000e+00
525 %cmp2 = fcmp oge double %load2, 0.000000e+00
527 %sub1 = fneg double %load1
528 %sub2 = fneg double %load2
530 %sel1 = select i1 %cmp1, double %load1, double %sub1
531 %sel2 = select i1 %cmp2, double %load2, double %sub2
533 store double %sel1, ptr %x, align 8
534 store double %sel2, ptr %idx2, align 8
539 declare double @llvm.fabs.f64(double) nounwind readnone
541 define void @call_fast(ptr %x) {
542 ; CHECK-LABEL: @call_fast(
543 ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[X:%.*]], align 8
544 ; CHECK-NEXT: [[TMP3:%.*]] = call fast <2 x double> @llvm.fabs.v2f64(<2 x double> [[TMP2]])
545 ; CHECK-NEXT: store <2 x double> [[TMP3]], ptr [[X]], align 8
546 ; CHECK-NEXT: ret void
548 %idx2 = getelementptr inbounds double, ptr %x, i64 1
550 %load1 = load double, ptr %x, align 8
551 %load2 = load double, ptr %idx2, align 8
553 %call1 = tail call fast double @llvm.fabs.f64(double %load1) nounwind readnone
554 %call2 = tail call fast double @llvm.fabs.f64(double %load2) nounwind readnone
556 store double %call1, ptr %x, align 8
557 store double %call2, ptr %idx2, align 8
562 define void @call_no_fast(ptr %x) {
563 ; CHECK-LABEL: @call_no_fast(
564 ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[X:%.*]], align 8
565 ; CHECK-NEXT: [[TMP3:%.*]] = call <2 x double> @llvm.fabs.v2f64(<2 x double> [[TMP2]])
566 ; CHECK-NEXT: store <2 x double> [[TMP3]], ptr [[X]], align 8
567 ; CHECK-NEXT: ret void
569 %idx2 = getelementptr inbounds double, ptr %x, i64 1
571 %load1 = load double, ptr %x, align 8
572 %load2 = load double, ptr %idx2, align 8
574 %call1 = tail call fast double @llvm.fabs.f64(double %load1) nounwind readnone
575 %call2 = tail call double @llvm.fabs.f64(double %load2) nounwind readnone
577 store double %call1, ptr %x, align 8
578 store double %call2, ptr %idx2, align 8
583 attributes #1 = { "target-features"="+avx" }