1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s --check-prefixes=CHECK,CHECK-SSE
3 ; RUN: llc < %s -mtriple=x86_64-linux -mattr=+avx | FileCheck %s --check-prefixes=CHECK,CHECK-AVX,CHECK-AVX1
4 ; RUN: llc < %s -mtriple=x86_64-linux -mattr=+avx512f,+avx512vl,+avx512bw | FileCheck %s --check-prefixes=CHECK,CHECK-AVX,CHECK-AVX512
6 define i32 @mul_and_to_neg_shl_and(i32 %x) {
7 ; CHECK-LABEL: mul_and_to_neg_shl_and:
9 ; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
10 ; CHECK-NEXT: negl %edi
11 ; CHECK-NEXT: leal (,%rdi,8), %eax
12 ; CHECK-NEXT: andl $56, %eax
15 %and = and i32 %mul, 56
19 define i32 @mul_and_to_neg_shl_and2(i32 %x) {
20 ; CHECK-LABEL: mul_and_to_neg_shl_and2:
22 ; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
23 ; CHECK-NEXT: negl %edi
24 ; CHECK-NEXT: leal (,%rdi,8), %eax
25 ; CHECK-NEXT: andl $48, %eax
28 %and = and i32 %mul, 51
32 define <4 x i32> @mul_and_to_neg_shl_and_vec(<4 x i32> %x) {
33 ; CHECK-SSE-LABEL: mul_and_to_neg_shl_and_vec:
35 ; CHECK-SSE-NEXT: pxor %xmm1, %xmm1
36 ; CHECK-SSE-NEXT: psubd %xmm0, %xmm1
37 ; CHECK-SSE-NEXT: pslld $3, %xmm1
38 ; CHECK-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
39 ; CHECK-SSE-NEXT: movdqa %xmm1, %xmm0
40 ; CHECK-SSE-NEXT: retq
42 ; CHECK-AVX1-LABEL: mul_and_to_neg_shl_and_vec:
43 ; CHECK-AVX1: # %bb.0:
44 ; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
45 ; CHECK-AVX1-NEXT: vpsubd %xmm0, %xmm1, %xmm0
46 ; CHECK-AVX1-NEXT: vpslld $3, %xmm0, %xmm0
47 ; CHECK-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
48 ; CHECK-AVX1-NEXT: retq
50 ; CHECK-AVX512-LABEL: mul_and_to_neg_shl_and_vec:
51 ; CHECK-AVX512: # %bb.0:
52 ; CHECK-AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
53 ; CHECK-AVX512-NEXT: vpsubd %xmm0, %xmm1, %xmm0
54 ; CHECK-AVX512-NEXT: vpslld $3, %xmm0, %xmm0
55 ; CHECK-AVX512-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
56 ; CHECK-AVX512-NEXT: retq
57 %mul = mul <4 x i32> %x, <i32 56, i32 56, i32 56, i32 56>
58 %and = and <4 x i32> %mul, <i32 48, i32 48, i32 48, i32 48>
62 define <4 x i32> @mul_and_to_neg_shl_and_vec_fail_no_splat(<4 x i32> %x) {
63 ; CHECK-SSE-LABEL: mul_and_to_neg_shl_and_vec_fail_no_splat:
65 ; CHECK-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
66 ; CHECK-SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
67 ; CHECK-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
68 ; CHECK-SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
69 ; CHECK-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
70 ; CHECK-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
71 ; CHECK-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
72 ; CHECK-SSE-NEXT: retq
74 ; CHECK-AVX1-LABEL: mul_and_to_neg_shl_and_vec_fail_no_splat:
75 ; CHECK-AVX1: # %bb.0:
76 ; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
77 ; CHECK-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
78 ; CHECK-AVX1-NEXT: retq
80 ; CHECK-AVX512-LABEL: mul_and_to_neg_shl_and_vec_fail_no_splat:
81 ; CHECK-AVX512: # %bb.0:
82 ; CHECK-AVX512-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
83 ; CHECK-AVX512-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
84 ; CHECK-AVX512-NEXT: retq
85 %mul = mul <4 x i32> %x, <i32 56, i32 56, i32 56, i32 64>
86 %and = and <4 x i32> %mul, <i32 48, i32 48, i32 48, i32 48>
90 ;; todo_no_splat ones have the correct invariants for all elements.
91 define <4 x i32> @mul_and_to_neg_shl_and_vec_todo_no_splat1(<4 x i32> %x) {
92 ; CHECK-SSE-LABEL: mul_and_to_neg_shl_and_vec_todo_no_splat1:
94 ; CHECK-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
95 ; CHECK-SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
96 ; CHECK-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
97 ; CHECK-SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
98 ; CHECK-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
99 ; CHECK-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
100 ; CHECK-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
101 ; CHECK-SSE-NEXT: retq
103 ; CHECK-AVX1-LABEL: mul_and_to_neg_shl_and_vec_todo_no_splat1:
104 ; CHECK-AVX1: # %bb.0:
105 ; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
106 ; CHECK-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
107 ; CHECK-AVX1-NEXT: retq
109 ; CHECK-AVX512-LABEL: mul_and_to_neg_shl_and_vec_todo_no_splat1:
110 ; CHECK-AVX512: # %bb.0:
111 ; CHECK-AVX512-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
112 ; CHECK-AVX512-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
113 ; CHECK-AVX512-NEXT: retq
114 %mul = mul <4 x i32> %x, <i32 56, i32 56, i32 56, i32 48>
115 %and = and <4 x i32> %mul, <i32 48, i32 48, i32 48, i32 48>
119 define <4 x i32> @mul_and_to_neg_shl_and_vec_todo_no_splat2(<4 x i32> %x) {
120 ; CHECK-SSE-LABEL: mul_and_to_neg_shl_and_vec_todo_no_splat2:
121 ; CHECK-SSE: # %bb.0:
122 ; CHECK-SSE-NEXT: movdqa {{.*#+}} xmm1 = [56,56,56,56]
123 ; CHECK-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
124 ; CHECK-SSE-NEXT: pmuludq %xmm1, %xmm0
125 ; CHECK-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
126 ; CHECK-SSE-NEXT: pmuludq %xmm1, %xmm2
127 ; CHECK-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
128 ; CHECK-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
129 ; CHECK-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
130 ; CHECK-SSE-NEXT: retq
132 ; CHECK-AVX1-LABEL: mul_and_to_neg_shl_and_vec_todo_no_splat2:
133 ; CHECK-AVX1: # %bb.0:
134 ; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
135 ; CHECK-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
136 ; CHECK-AVX1-NEXT: retq
138 ; CHECK-AVX512-LABEL: mul_and_to_neg_shl_and_vec_todo_no_splat2:
139 ; CHECK-AVX512: # %bb.0:
140 ; CHECK-AVX512-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
141 ; CHECK-AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
142 ; CHECK-AVX512-NEXT: retq
143 %mul = mul <4 x i32> %x, <i32 56, i32 56, i32 56, i32 56>
144 %and = and <4 x i32> %mul, <i32 48, i32 48, i32 48, i32 32>
148 define <4 x i32> @mul_and_to_neg_shl_and_vec_with_undef_mul(<4 x i32> %x) {
149 ; CHECK-SSE-LABEL: mul_and_to_neg_shl_and_vec_with_undef_mul:
150 ; CHECK-SSE: # %bb.0:
151 ; CHECK-SSE-NEXT: pxor %xmm1, %xmm1
152 ; CHECK-SSE-NEXT: psubd %xmm0, %xmm1
153 ; CHECK-SSE-NEXT: pslld $3, %xmm1
154 ; CHECK-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
155 ; CHECK-SSE-NEXT: movdqa %xmm1, %xmm0
156 ; CHECK-SSE-NEXT: retq
158 ; CHECK-AVX1-LABEL: mul_and_to_neg_shl_and_vec_with_undef_mul:
159 ; CHECK-AVX1: # %bb.0:
160 ; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
161 ; CHECK-AVX1-NEXT: vpsubd %xmm0, %xmm1, %xmm0
162 ; CHECK-AVX1-NEXT: vpslld $3, %xmm0, %xmm0
163 ; CHECK-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
164 ; CHECK-AVX1-NEXT: retq
166 ; CHECK-AVX512-LABEL: mul_and_to_neg_shl_and_vec_with_undef_mul:
167 ; CHECK-AVX512: # %bb.0:
168 ; CHECK-AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
169 ; CHECK-AVX512-NEXT: vpsubd %xmm0, %xmm1, %xmm0
170 ; CHECK-AVX512-NEXT: vpslld $3, %xmm0, %xmm0
171 ; CHECK-AVX512-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
172 ; CHECK-AVX512-NEXT: retq
173 %mul = mul <4 x i32> %x, <i32 56, i32 56, i32 56, i32 undef>
174 %and = and <4 x i32> %mul, <i32 48, i32 48, i32 48, i32 48>
178 define <4 x i32> @mul_and_to_neg_shl_and_vec_with_undef_and(<4 x i32> %x) {
179 ; CHECK-SSE-LABEL: mul_and_to_neg_shl_and_vec_with_undef_and:
180 ; CHECK-SSE: # %bb.0:
181 ; CHECK-SSE-NEXT: pxor %xmm1, %xmm1
182 ; CHECK-SSE-NEXT: psubd %xmm0, %xmm1
183 ; CHECK-SSE-NEXT: pslld $3, %xmm1
184 ; CHECK-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
185 ; CHECK-SSE-NEXT: movdqa %xmm1, %xmm0
186 ; CHECK-SSE-NEXT: retq
188 ; CHECK-AVX1-LABEL: mul_and_to_neg_shl_and_vec_with_undef_and:
189 ; CHECK-AVX1: # %bb.0:
190 ; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
191 ; CHECK-AVX1-NEXT: vpsubd %xmm0, %xmm1, %xmm0
192 ; CHECK-AVX1-NEXT: vpslld $3, %xmm0, %xmm0
193 ; CHECK-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
194 ; CHECK-AVX1-NEXT: retq
196 ; CHECK-AVX512-LABEL: mul_and_to_neg_shl_and_vec_with_undef_and:
197 ; CHECK-AVX512: # %bb.0:
198 ; CHECK-AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
199 ; CHECK-AVX512-NEXT: vpsubd %xmm0, %xmm1, %xmm0
200 ; CHECK-AVX512-NEXT: vpslld $3, %xmm0, %xmm0
201 ; CHECK-AVX512-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
202 ; CHECK-AVX512-NEXT: retq
203 %mul = mul <4 x i32> %x, <i32 56, i32 56, i32 56, i32 56>
204 %and = and <4 x i32> %mul, <i32 48, i32 48, i32 48, i32 undef>
208 define <16 x i8> @mul_and_to_neg_shl_and_vec_with_undef_mul_and(<16 x i8> %x) {
209 ; CHECK-SSE-LABEL: mul_and_to_neg_shl_and_vec_with_undef_mul_and:
210 ; CHECK-SSE: # %bb.0:
211 ; CHECK-SSE-NEXT: pxor %xmm1, %xmm1
212 ; CHECK-SSE-NEXT: psubb %xmm0, %xmm1
213 ; CHECK-SSE-NEXT: psllw $2, %xmm1
214 ; CHECK-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
215 ; CHECK-SSE-NEXT: movdqa %xmm1, %xmm0
216 ; CHECK-SSE-NEXT: retq
218 ; CHECK-AVX-LABEL: mul_and_to_neg_shl_and_vec_with_undef_mul_and:
219 ; CHECK-AVX: # %bb.0:
220 ; CHECK-AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
221 ; CHECK-AVX-NEXT: vpsubb %xmm0, %xmm1, %xmm0
222 ; CHECK-AVX-NEXT: vpsllw $2, %xmm0, %xmm0
223 ; CHECK-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
224 ; CHECK-AVX-NEXT: retq
225 %mul = mul <16 x i8> %x, <i8 12, i8 12, i8 12, i8 12, i8 undef, i8 12, i8 12, i8 12, i8 12, i8 12, i8 12, i8 12, i8 12, i8 12, i8 12, i8 12>
226 %and = and <16 x i8> %mul, <i8 11, i8 undef, i8 11, i8 11, i8 11, i8 11, i8 11, i8 11, i8 11, i8 11, i8 11, i8 11, i8 11, i8 11, i8 11, i8 11>
230 define i32 @mul_and_to_neg_shl_and_fail_invalid_mul(i32 %x) {
231 ; CHECK-LABEL: mul_and_to_neg_shl_and_fail_invalid_mul:
233 ; CHECK-NEXT: imull $57, %edi, %eax
234 ; CHECK-NEXT: andl $56, %eax
236 %mul = mul i32 %x, 57
237 %and = and i32 %mul, 56
241 define i32 @mul_and_to_neg_shl_and_fail_mul_p2(i32 %x) {
242 ; CHECK-LABEL: mul_and_to_neg_shl_and_fail_mul_p2:
244 ; CHECK-NEXT: movl %edi, %eax
245 ; CHECK-NEXT: shll $6, %eax
246 ; CHECK-NEXT: andl $64, %eax
248 %mul = mul i32 %x, 64
249 %and = and i32 %mul, 64
253 define i32 @mul_and_to_neg_shl_and_fail_mask_to_large(i32 %x) {
254 ; CHECK-LABEL: mul_and_to_neg_shl_and_fail_mask_to_large:
256 ; CHECK-NEXT: imull $56, %edi, %eax
257 ; CHECK-NEXT: andl $120, %eax
259 %mul = mul i32 %x, 56
260 %and = and i32 %mul, 120