1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=X64
3 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.2 | FileCheck %s --check-prefixes=X64
4 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefixes=X64
5 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=X64
6 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512vl,+avx512bw,+avx512dq | FileCheck %s --check-prefixes=X64
7 ; RUN: llc < %s -mtriple=i686-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=X86
9 declare i1 @llvm.vector.reduce.and.v2i1(<2 x i1>)
10 declare i1 @llvm.vector.reduce.and.v4i1(<4 x i1>)
11 declare i1 @llvm.vector.reduce.and.v8i1(<8 x i1>)
13 ; All four versions are semantically equivalent and should produce same asm as scalar version.
15 define i1 @intrinsic_v2i8(ptr align 1 %arg, ptr align 1 %arg1) {
16 ; X64-LABEL: intrinsic_v2i8:
18 ; X64-NEXT: movzwl (%rsi), %eax
19 ; X64-NEXT: cmpw (%rdi), %ax
23 ; X86-LABEL: intrinsic_v2i8:
25 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
26 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
27 ; X86-NEXT: movzwl (%ecx), %ecx
28 ; X86-NEXT: cmpw (%eax), %cx
32 %lhs = load <2 x i8>, ptr %arg1, align 1
33 %rhs = load <2 x i8>, ptr %arg, align 1
34 %cmp = icmp eq <2 x i8> %lhs, %rhs
35 %all_eq = call i1 @llvm.vector.reduce.and.v2i1(<2 x i1> %cmp)
39 define i1 @intrinsic_v4i8(ptr align 1 %arg, ptr align 1 %arg1) {
40 ; X64-LABEL: intrinsic_v4i8:
42 ; X64-NEXT: movl (%rsi), %eax
43 ; X64-NEXT: cmpl (%rdi), %eax
47 ; X86-LABEL: intrinsic_v4i8:
49 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
50 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
51 ; X86-NEXT: movl (%ecx), %ecx
52 ; X86-NEXT: cmpl (%eax), %ecx
56 %lhs = load <4 x i8>, ptr %arg1, align 1
57 %rhs = load <4 x i8>, ptr %arg, align 1
58 %cmp = icmp eq <4 x i8> %lhs, %rhs
59 %all_eq = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> %cmp)
63 define i1 @intrinsic_v8i8(ptr align 1 %arg, ptr align 1 %arg1) {
64 ; X64-LABEL: intrinsic_v8i8:
66 ; X64-NEXT: movq (%rsi), %rax
67 ; X64-NEXT: cmpq (%rdi), %rax
71 ; X86-LABEL: intrinsic_v8i8:
73 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
74 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
75 ; X86-NEXT: movl (%ecx), %edx
76 ; X86-NEXT: movl 4(%ecx), %ecx
77 ; X86-NEXT: xorl 4(%eax), %ecx
78 ; X86-NEXT: xorl (%eax), %edx
79 ; X86-NEXT: orl %ecx, %edx
83 %lhs = load <8 x i8>, ptr %arg1, align 1
84 %rhs = load <8 x i8>, ptr %arg, align 1
85 %cmp = icmp eq <8 x i8> %lhs, %rhs
86 %all_eq = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> %cmp)
90 define i1 @vector_version_v2i8(ptr align 1 %arg, ptr align 1 %arg1) {
91 ; X64-LABEL: vector_version_v2i8:
93 ; X64-NEXT: movzwl (%rsi), %eax
94 ; X64-NEXT: cmpw (%rdi), %ax
98 ; X86-LABEL: vector_version_v2i8:
100 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
101 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
102 ; X86-NEXT: movzwl (%ecx), %ecx
103 ; X86-NEXT: cmpw (%eax), %cx
107 %lhs = load <2 x i8>, ptr %arg1, align 1
108 %rhs = load <2 x i8>, ptr %arg, align 1
109 %any_ne = icmp ne <2 x i8> %lhs, %rhs
110 %any_ne_scalar = bitcast <2 x i1> %any_ne to i2
111 %all_eq = icmp eq i2 %any_ne_scalar, 0
115 define i1 @vector_version_v4i8(ptr align 1 %arg, ptr align 1 %arg1) {
116 ; X64-LABEL: vector_version_v4i8:
117 ; X64: # %bb.0: # %bb
118 ; X64-NEXT: movl (%rsi), %eax
119 ; X64-NEXT: cmpl (%rdi), %eax
123 ; X86-LABEL: vector_version_v4i8:
124 ; X86: # %bb.0: # %bb
125 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
126 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
127 ; X86-NEXT: movl (%ecx), %ecx
128 ; X86-NEXT: cmpl (%eax), %ecx
132 %lhs = load <4 x i8>, ptr %arg1, align 1
133 %rhs = load <4 x i8>, ptr %arg, align 1
134 %any_ne = icmp ne <4 x i8> %lhs, %rhs
135 %any_ne_scalar = bitcast <4 x i1> %any_ne to i4
136 %all_eq = icmp eq i4 %any_ne_scalar, 0
140 define i1 @vector_version_v8i8(ptr align 1 %arg, ptr align 1 %arg1) {
141 ; X64-LABEL: vector_version_v8i8:
142 ; X64: # %bb.0: # %bb
143 ; X64-NEXT: movq (%rsi), %rax
144 ; X64-NEXT: cmpq (%rdi), %rax
148 ; X86-LABEL: vector_version_v8i8:
149 ; X86: # %bb.0: # %bb
150 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
151 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
152 ; X86-NEXT: movl (%ecx), %edx
153 ; X86-NEXT: movl 4(%ecx), %ecx
154 ; X86-NEXT: xorl 4(%eax), %ecx
155 ; X86-NEXT: xorl (%eax), %edx
156 ; X86-NEXT: orl %ecx, %edx
160 %lhs = load <8 x i8>, ptr %arg1, align 1
161 %rhs = load <8 x i8>, ptr %arg, align 1
162 %any_ne = icmp ne <8 x i8> %lhs, %rhs
163 %any_ne_scalar = bitcast <8 x i1> %any_ne to i8
164 %all_eq = icmp eq i8 %any_ne_scalar, 0
168 define i1 @mixed_version_v2i8(ptr align 1 %arg, ptr align 1 %arg1) {
169 ; X64-LABEL: mixed_version_v2i8:
170 ; X64: # %bb.0: # %bb
171 ; X64-NEXT: movzwl (%rsi), %eax
172 ; X64-NEXT: cmpw (%rdi), %ax
176 ; X86-LABEL: mixed_version_v2i8:
177 ; X86: # %bb.0: # %bb
178 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
179 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
180 ; X86-NEXT: movzwl (%ecx), %ecx
181 ; X86-NEXT: cmpw (%eax), %cx
185 %lhs = load <2 x i8>, ptr %arg1, align 1
186 %rhs = load <2 x i8>, ptr %arg, align 1
187 %lhs_s = bitcast <2 x i8> %lhs to i16
188 %rhs_s = bitcast <2 x i8> %rhs to i16
189 %all_eq = icmp eq i16 %lhs_s, %rhs_s
193 define i1 @mixed_version_v4i8(ptr align 1 %arg, ptr align 1 %arg1) {
194 ; X64-LABEL: mixed_version_v4i8:
195 ; X64: # %bb.0: # %bb
196 ; X64-NEXT: movl (%rsi), %eax
197 ; X64-NEXT: cmpl (%rdi), %eax
201 ; X86-LABEL: mixed_version_v4i8:
202 ; X86: # %bb.0: # %bb
203 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
204 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
205 ; X86-NEXT: movl (%ecx), %ecx
206 ; X86-NEXT: cmpl (%eax), %ecx
210 %lhs = load <4 x i8>, ptr %arg1, align 1
211 %rhs = load <4 x i8>, ptr %arg, align 1
212 %lhs_s = bitcast <4 x i8> %lhs to i32
213 %rhs_s = bitcast <4 x i8> %rhs to i32
214 %all_eq = icmp eq i32 %lhs_s, %rhs_s
218 define i1 @mixed_version_v8i8(ptr align 1 %arg, ptr align 1 %arg1) {
219 ; X64-LABEL: mixed_version_v8i8:
220 ; X64: # %bb.0: # %bb
221 ; X64-NEXT: movq (%rsi), %rax
222 ; X64-NEXT: cmpq (%rdi), %rax
226 ; X86-LABEL: mixed_version_v8i8:
227 ; X86: # %bb.0: # %bb
228 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
229 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
230 ; X86-NEXT: movl (%ecx), %edx
231 ; X86-NEXT: movl 4(%ecx), %ecx
232 ; X86-NEXT: xorl 4(%eax), %ecx
233 ; X86-NEXT: xorl (%eax), %edx
234 ; X86-NEXT: orl %ecx, %edx
238 %lhs = load <8 x i8>, ptr %arg1, align 1
239 %rhs = load <8 x i8>, ptr %arg, align 1
240 %lhs_s = bitcast <8 x i8> %lhs to i64
241 %rhs_s = bitcast <8 x i8> %rhs to i64
242 %all_eq = icmp eq i64 %lhs_s, %rhs_s
246 define i1 @scalar_version_i16(ptr align 1 %arg, ptr align 1 %arg1) {
247 ; X64-LABEL: scalar_version_i16:
248 ; X64: # %bb.0: # %bb
249 ; X64-NEXT: movzwl (%rsi), %eax
250 ; X64-NEXT: cmpw (%rdi), %ax
254 ; X86-LABEL: scalar_version_i16:
255 ; X86: # %bb.0: # %bb
256 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
257 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
258 ; X86-NEXT: movzwl (%ecx), %ecx
259 ; X86-NEXT: cmpw (%eax), %cx
263 %lhs = load i16, ptr %arg1, align 1
264 %rhs = load i16, ptr %arg, align 1
265 %all_eq = icmp eq i16 %lhs, %rhs
269 define i1 @scalar_version_i32(ptr align 1 %arg, ptr align 1 %arg1) {
270 ; X64-LABEL: scalar_version_i32:
271 ; X64: # %bb.0: # %bb
272 ; X64-NEXT: movl (%rsi), %eax
273 ; X64-NEXT: cmpl (%rdi), %eax
277 ; X86-LABEL: scalar_version_i32:
278 ; X86: # %bb.0: # %bb
279 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
280 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
281 ; X86-NEXT: movl (%ecx), %ecx
282 ; X86-NEXT: cmpl (%eax), %ecx
286 %lhs = load i32, ptr %arg1, align 1
287 %rhs = load i32, ptr %arg, align 1
288 %all_eq = icmp eq i32 %lhs, %rhs
292 define i1 @scalar_version_i64(ptr align 1 %arg, ptr align 1 %arg1) {
293 ; X64-LABEL: scalar_version_i64:
294 ; X64: # %bb.0: # %bb
295 ; X64-NEXT: movq (%rsi), %rax
296 ; X64-NEXT: cmpq (%rdi), %rax
300 ; X86-LABEL: scalar_version_i64:
301 ; X86: # %bb.0: # %bb
302 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
303 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
304 ; X86-NEXT: movl (%ecx), %edx
305 ; X86-NEXT: movl 4(%ecx), %ecx
306 ; X86-NEXT: xorl 4(%eax), %ecx
307 ; X86-NEXT: xorl (%eax), %edx
308 ; X86-NEXT: orl %ecx, %edx
312 %lhs = load i64, ptr %arg1, align 1
313 %rhs = load i64, ptr %arg, align 1
314 %all_eq = icmp eq i64 %lhs, %rhs