1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=SSE
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX1
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX2
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX512
7 define <4 x float> @PR32368_128(<4 x float>) {
8 ; SSE-LABEL: PR32368_128:
10 ; SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
11 ; SSE-NEXT: addps %xmm0, %xmm0
12 ; SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
15 ; AVX1-LABEL: PR32368_128:
17 ; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
18 ; AVX1-NEXT: vaddps %xmm0, %xmm0, %xmm0
19 ; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
22 ; AVX2-LABEL: PR32368_128:
24 ; AVX2-NEXT: vbroadcastss {{.*#+}} xmm1 = [4294967004,4294967004,4294967004,4294967004]
25 ; AVX2-NEXT: vandps %xmm1, %xmm0, %xmm0
26 ; AVX2-NEXT: vaddps %xmm0, %xmm0, %xmm0
27 ; AVX2-NEXT: vbroadcastss {{.*#+}} xmm1 = [291,291,291,291]
28 ; AVX2-NEXT: vandps %xmm1, %xmm0, %xmm0
31 ; AVX512-LABEL: PR32368_128:
33 ; AVX512-NEXT: vbroadcastss {{.*#+}} xmm1 = [4294967004,4294967004,4294967004,4294967004]
34 ; AVX512-NEXT: vandps %xmm1, %xmm0, %xmm0
35 ; AVX512-NEXT: vaddps %xmm0, %xmm0, %xmm0
36 ; AVX512-NEXT: vbroadcastss {{.*#+}} xmm1 = [291,291,291,291]
37 ; AVX512-NEXT: vandps %xmm1, %xmm0, %xmm0
39 %2 = bitcast <4 x float> %0 to <4 x i32>
40 %3 = and <4 x i32> %2, <i32 -292, i32 -292, i32 -292, i32 -292>
41 %4 = bitcast <4 x i32> %3 to <4 x float>
42 %5 = fmul <4 x float> %4, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
43 %6 = bitcast <4 x float> %5 to <4 x i32>
44 %7 = and <4 x i32> %6, <i32 291, i32 291, i32 291, i32 291>
45 %8 = bitcast <4 x i32> %7 to <4 x float>
49 define <8 x float> @PR32368_256(<8 x float>) {
50 ; SSE-LABEL: PR32368_256:
52 ; SSE-NEXT: movaps {{.*#+}} xmm2 = [4294967004,4294967004,4294967004,4294967004]
53 ; SSE-NEXT: andps %xmm2, %xmm0
54 ; SSE-NEXT: andps %xmm2, %xmm1
55 ; SSE-NEXT: addps %xmm1, %xmm1
56 ; SSE-NEXT: addps %xmm0, %xmm0
57 ; SSE-NEXT: movaps {{.*#+}} xmm2 = [291,291,291,291]
58 ; SSE-NEXT: andps %xmm2, %xmm0
59 ; SSE-NEXT: andps %xmm2, %xmm1
62 ; AVX1-LABEL: PR32368_256:
64 ; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
65 ; AVX1-NEXT: vaddps %ymm0, %ymm0, %ymm0
66 ; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
69 ; AVX2-LABEL: PR32368_256:
71 ; AVX2-NEXT: vbroadcastss {{.*#+}} ymm1 = [4294967004,4294967004,4294967004,4294967004,4294967004,4294967004,4294967004,4294967004]
72 ; AVX2-NEXT: vandps %ymm1, %ymm0, %ymm0
73 ; AVX2-NEXT: vaddps %ymm0, %ymm0, %ymm0
74 ; AVX2-NEXT: vbroadcastss {{.*#+}} ymm1 = [291,291,291,291,291,291,291,291]
75 ; AVX2-NEXT: vandps %ymm1, %ymm0, %ymm0
78 ; AVX512-LABEL: PR32368_256:
80 ; AVX512-NEXT: vbroadcastss {{.*#+}} ymm1 = [4294967004,4294967004,4294967004,4294967004,4294967004,4294967004,4294967004,4294967004]
81 ; AVX512-NEXT: vandps %ymm1, %ymm0, %ymm0
82 ; AVX512-NEXT: vaddps %ymm0, %ymm0, %ymm0
83 ; AVX512-NEXT: vbroadcastss {{.*#+}} ymm1 = [291,291,291,291,291,291,291,291]
84 ; AVX512-NEXT: vandps %ymm1, %ymm0, %ymm0
86 %2 = bitcast <8 x float> %0 to <8 x i32>
87 %3 = and <8 x i32> %2, <i32 -292, i32 -292, i32 -292, i32 -292, i32 -292, i32 -292, i32 -292, i32 -292>
88 %4 = bitcast <8 x i32> %3 to <8 x float>
89 %5 = fmul <8 x float> %4, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
90 %6 = bitcast <8 x float> %5 to <8 x i32>
91 %7 = and <8 x i32> %6, <i32 291, i32 291, i32 291, i32 291, i32 291, i32 291, i32 291, i32 291>
92 %8 = bitcast <8 x i32> %7 to <8 x float>
96 define <16 x float> @PR32368_512(<16 x float>) {
97 ; SSE-LABEL: PR32368_512:
99 ; SSE-NEXT: movaps {{.*#+}} xmm4 = [4294967004,4294967004,4294967004,4294967004]
100 ; SSE-NEXT: andps %xmm4, %xmm0
101 ; SSE-NEXT: andps %xmm4, %xmm1
102 ; SSE-NEXT: andps %xmm4, %xmm2
103 ; SSE-NEXT: andps %xmm4, %xmm3
104 ; SSE-NEXT: addps %xmm3, %xmm3
105 ; SSE-NEXT: addps %xmm2, %xmm2
106 ; SSE-NEXT: addps %xmm1, %xmm1
107 ; SSE-NEXT: addps %xmm0, %xmm0
108 ; SSE-NEXT: movaps {{.*#+}} xmm4 = [291,291,291,291]
109 ; SSE-NEXT: andps %xmm4, %xmm0
110 ; SSE-NEXT: andps %xmm4, %xmm1
111 ; SSE-NEXT: andps %xmm4, %xmm2
112 ; SSE-NEXT: andps %xmm4, %xmm3
115 ; AVX1-LABEL: PR32368_512:
117 ; AVX1-NEXT: vbroadcastss {{.*#+}} ymm2 = [4294967004,4294967004,4294967004,4294967004,4294967004,4294967004,4294967004,4294967004]
118 ; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
119 ; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
120 ; AVX1-NEXT: vaddps %ymm1, %ymm1, %ymm1
121 ; AVX1-NEXT: vaddps %ymm0, %ymm0, %ymm0
122 ; AVX1-NEXT: vbroadcastss {{.*#+}} ymm2 = [291,291,291,291,291,291,291,291]
123 ; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
124 ; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
127 ; AVX2-LABEL: PR32368_512:
129 ; AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [4294967004,4294967004,4294967004,4294967004,4294967004,4294967004,4294967004,4294967004]
130 ; AVX2-NEXT: vandps %ymm2, %ymm0, %ymm0
131 ; AVX2-NEXT: vandps %ymm2, %ymm1, %ymm1
132 ; AVX2-NEXT: vaddps %ymm1, %ymm1, %ymm1
133 ; AVX2-NEXT: vaddps %ymm0, %ymm0, %ymm0
134 ; AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [291,291,291,291,291,291,291,291]
135 ; AVX2-NEXT: vandps %ymm2, %ymm0, %ymm0
136 ; AVX2-NEXT: vandps %ymm2, %ymm1, %ymm1
139 ; AVX512-LABEL: PR32368_512:
141 ; AVX512-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
142 ; AVX512-NEXT: vaddps %zmm0, %zmm0, %zmm0
143 ; AVX512-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
145 %2 = bitcast <16 x float> %0 to <16 x i32>
146 %3 = and <16 x i32> %2, <i32 -292, i32 -292, i32 -292, i32 -292, i32 -292, i32 -292, i32 -292, i32 -292, i32 -292, i32 -292, i32 -292, i32 -292, i32 -292, i32 -292, i32 -292, i32 -292>
147 %4 = bitcast <16 x i32> %3 to <16 x float>
148 %5 = fmul <16 x float> %4, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
149 %6 = bitcast <16 x float> %5 to <16 x i32>
150 %7 = and <16 x i32> %6, <i32 291, i32 291, i32 291, i32 291, i32 291, i32 291, i32 291, i32 291, i32 291, i32 291, i32 291, i32 291, i32 291, i32 291, i32 291, i32 291>
151 %8 = bitcast <16 x i32> %7 to <16 x float>