1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=X32-SSE --check-prefix=X32-SSE42
3 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=X64-SSE --check-prefix=X64-SSE42
6 ; AND/XOR/OR i24 as v3i8
9 define i24 @and_i24_as_v3i8(i24 %a, i24 %b) nounwind {
10 ; X32-SSE-LABEL: and_i24_as_v3i8:
12 ; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
13 ; X32-SSE-NEXT: andl {{[0-9]+}}(%esp), %eax
16 ; X64-SSE-LABEL: and_i24_as_v3i8:
18 ; X64-SSE-NEXT: movl %edi, %eax
19 ; X64-SSE-NEXT: andl %esi, %eax
21 %1 = bitcast i24 %a to <3 x i8>
22 %2 = bitcast i24 %b to <3 x i8>
23 %3 = and <3 x i8> %1, %2
24 %4 = bitcast <3 x i8> %3 to i24
28 define i24 @xor_i24_as_v3i8(i24 %a, i24 %b) nounwind {
29 ; X32-SSE-LABEL: xor_i24_as_v3i8:
31 ; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
32 ; X32-SSE-NEXT: xorl {{[0-9]+}}(%esp), %eax
35 ; X64-SSE-LABEL: xor_i24_as_v3i8:
37 ; X64-SSE-NEXT: movl %edi, %eax
38 ; X64-SSE-NEXT: xorl %esi, %eax
40 %1 = bitcast i24 %a to <3 x i8>
41 %2 = bitcast i24 %b to <3 x i8>
42 %3 = xor <3 x i8> %1, %2
43 %4 = bitcast <3 x i8> %3 to i24
47 define i24 @or_i24_as_v3i8(i24 %a, i24 %b) nounwind {
48 ; X32-SSE-LABEL: or_i24_as_v3i8:
50 ; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
51 ; X32-SSE-NEXT: orl {{[0-9]+}}(%esp), %eax
54 ; X64-SSE-LABEL: or_i24_as_v3i8:
56 ; X64-SSE-NEXT: movl %edi, %eax
57 ; X64-SSE-NEXT: orl %esi, %eax
59 %1 = bitcast i24 %a to <3 x i8>
60 %2 = bitcast i24 %b to <3 x i8>
61 %3 = or <3 x i8> %1, %2
62 %4 = bitcast <3 x i8> %3 to i24
67 ; AND/XOR/OR i24 as v8i3
70 define i24 @and_i24_as_v8i3(i24 %a, i24 %b) nounwind {
71 ; X32-SSE-LABEL: and_i24_as_v8i3:
73 ; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
74 ; X32-SSE-NEXT: andl {{[0-9]+}}(%esp), %eax
77 ; X64-SSE-LABEL: and_i24_as_v8i3:
79 ; X64-SSE-NEXT: movl %edi, %eax
80 ; X64-SSE-NEXT: andl %esi, %eax
82 %1 = bitcast i24 %a to <8 x i3>
83 %2 = bitcast i24 %b to <8 x i3>
84 %3 = and <8 x i3> %1, %2
85 %4 = bitcast <8 x i3> %3 to i24
89 define i24 @xor_i24_as_v8i3(i24 %a, i24 %b) nounwind {
90 ; X32-SSE-LABEL: xor_i24_as_v8i3:
92 ; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
93 ; X32-SSE-NEXT: xorl {{[0-9]+}}(%esp), %eax
96 ; X64-SSE-LABEL: xor_i24_as_v8i3:
98 ; X64-SSE-NEXT: movl %edi, %eax
99 ; X64-SSE-NEXT: xorl %esi, %eax
101 %1 = bitcast i24 %a to <8 x i3>
102 %2 = bitcast i24 %b to <8 x i3>
103 %3 = xor <8 x i3> %1, %2
104 %4 = bitcast <8 x i3> %3 to i24
108 define i24 @or_i24_as_v8i3(i24 %a, i24 %b) nounwind {
109 ; X32-SSE-LABEL: or_i24_as_v8i3:
111 ; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
112 ; X32-SSE-NEXT: orl {{[0-9]+}}(%esp), %eax
115 ; X64-SSE-LABEL: or_i24_as_v8i3:
117 ; X64-SSE-NEXT: movl %edi, %eax
118 ; X64-SSE-NEXT: orl %esi, %eax
120 %1 = bitcast i24 %a to <8 x i3>
121 %2 = bitcast i24 %b to <8 x i3>
122 %3 = or <8 x i3> %1, %2
123 %4 = bitcast <8 x i3> %3 to i24
128 ; AND/XOR/OR v3i8 as i24
131 define <3 x i8> @and_v3i8_as_i24(<3 x i8> %a, <3 x i8> %b) nounwind {
132 ; X32-SSE-LABEL: and_v3i8_as_i24:
134 ; X32-SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
135 ; X32-SSE-NEXT: pinsrb $4, {{[0-9]+}}(%esp), %xmm0
136 ; X32-SSE-NEXT: pinsrb $8, {{[0-9]+}}(%esp), %xmm0
137 ; X32-SSE-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
138 ; X32-SSE-NEXT: pinsrb $4, {{[0-9]+}}(%esp), %xmm1
139 ; X32-SSE-NEXT: pinsrb $8, {{[0-9]+}}(%esp), %xmm1
140 ; X32-SSE-NEXT: pand %xmm0, %xmm1
141 ; X32-SSE-NEXT: pextrb $0, %xmm1, %eax
142 ; X32-SSE-NEXT: pextrb $4, %xmm1, %edx
143 ; X32-SSE-NEXT: pextrb $8, %xmm1, %ecx
144 ; X32-SSE-NEXT: # kill: def $al killed $al killed $eax
145 ; X32-SSE-NEXT: # kill: def $dl killed $dl killed $edx
146 ; X32-SSE-NEXT: # kill: def $cl killed $cl killed $ecx
149 ; X64-SSE-LABEL: and_v3i8_as_i24:
151 ; X64-SSE-NEXT: movd %ecx, %xmm0
152 ; X64-SSE-NEXT: pinsrd $1, %r8d, %xmm0
153 ; X64-SSE-NEXT: pinsrd $2, %r9d, %xmm0
154 ; X64-SSE-NEXT: movd %edi, %xmm1
155 ; X64-SSE-NEXT: pinsrd $1, %esi, %xmm1
156 ; X64-SSE-NEXT: pinsrd $2, %edx, %xmm1
157 ; X64-SSE-NEXT: pand %xmm0, %xmm1
158 ; X64-SSE-NEXT: pextrb $0, %xmm1, %eax
159 ; X64-SSE-NEXT: pextrb $4, %xmm1, %edx
160 ; X64-SSE-NEXT: pextrb $8, %xmm1, %ecx
161 ; X64-SSE-NEXT: # kill: def $al killed $al killed $eax
162 ; X64-SSE-NEXT: # kill: def $dl killed $dl killed $edx
163 ; X64-SSE-NEXT: # kill: def $cl killed $cl killed $ecx
165 %1 = bitcast <3 x i8> %a to i24
166 %2 = bitcast <3 x i8> %b to i24
168 %4 = bitcast i24 %3 to <3 x i8>
172 define <3 x i8> @xor_v3i8_as_i24(<3 x i8> %a, <3 x i8> %b) nounwind {
173 ; X32-SSE-LABEL: xor_v3i8_as_i24:
175 ; X32-SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
176 ; X32-SSE-NEXT: pinsrb $4, {{[0-9]+}}(%esp), %xmm0
177 ; X32-SSE-NEXT: pinsrb $8, {{[0-9]+}}(%esp), %xmm0
178 ; X32-SSE-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
179 ; X32-SSE-NEXT: pinsrb $4, {{[0-9]+}}(%esp), %xmm1
180 ; X32-SSE-NEXT: pinsrb $8, {{[0-9]+}}(%esp), %xmm1
181 ; X32-SSE-NEXT: pxor %xmm0, %xmm1
182 ; X32-SSE-NEXT: pextrb $0, %xmm1, %eax
183 ; X32-SSE-NEXT: pextrb $4, %xmm1, %edx
184 ; X32-SSE-NEXT: pextrb $8, %xmm1, %ecx
185 ; X32-SSE-NEXT: # kill: def $al killed $al killed $eax
186 ; X32-SSE-NEXT: # kill: def $dl killed $dl killed $edx
187 ; X32-SSE-NEXT: # kill: def $cl killed $cl killed $ecx
190 ; X64-SSE-LABEL: xor_v3i8_as_i24:
192 ; X64-SSE-NEXT: movd %ecx, %xmm0
193 ; X64-SSE-NEXT: pinsrd $1, %r8d, %xmm0
194 ; X64-SSE-NEXT: pinsrd $2, %r9d, %xmm0
195 ; X64-SSE-NEXT: movd %edi, %xmm1
196 ; X64-SSE-NEXT: pinsrd $1, %esi, %xmm1
197 ; X64-SSE-NEXT: pinsrd $2, %edx, %xmm1
198 ; X64-SSE-NEXT: pxor %xmm0, %xmm1
199 ; X64-SSE-NEXT: pextrb $0, %xmm1, %eax
200 ; X64-SSE-NEXT: pextrb $4, %xmm1, %edx
201 ; X64-SSE-NEXT: pextrb $8, %xmm1, %ecx
202 ; X64-SSE-NEXT: # kill: def $al killed $al killed $eax
203 ; X64-SSE-NEXT: # kill: def $dl killed $dl killed $edx
204 ; X64-SSE-NEXT: # kill: def $cl killed $cl killed $ecx
206 %1 = bitcast <3 x i8> %a to i24
207 %2 = bitcast <3 x i8> %b to i24
209 %4 = bitcast i24 %3 to <3 x i8>
213 define <3 x i8> @or_v3i8_as_i24(<3 x i8> %a, <3 x i8> %b) nounwind {
214 ; X32-SSE-LABEL: or_v3i8_as_i24:
216 ; X32-SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
217 ; X32-SSE-NEXT: pinsrb $4, {{[0-9]+}}(%esp), %xmm0
218 ; X32-SSE-NEXT: pinsrb $8, {{[0-9]+}}(%esp), %xmm0
219 ; X32-SSE-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
220 ; X32-SSE-NEXT: pinsrb $4, {{[0-9]+}}(%esp), %xmm1
221 ; X32-SSE-NEXT: pinsrb $8, {{[0-9]+}}(%esp), %xmm1
222 ; X32-SSE-NEXT: por %xmm0, %xmm1
223 ; X32-SSE-NEXT: pextrb $0, %xmm1, %eax
224 ; X32-SSE-NEXT: pextrb $4, %xmm1, %edx
225 ; X32-SSE-NEXT: pextrb $8, %xmm1, %ecx
226 ; X32-SSE-NEXT: # kill: def $al killed $al killed $eax
227 ; X32-SSE-NEXT: # kill: def $dl killed $dl killed $edx
228 ; X32-SSE-NEXT: # kill: def $cl killed $cl killed $ecx
231 ; X64-SSE-LABEL: or_v3i8_as_i24:
233 ; X64-SSE-NEXT: movd %ecx, %xmm0
234 ; X64-SSE-NEXT: pinsrd $1, %r8d, %xmm0
235 ; X64-SSE-NEXT: pinsrd $2, %r9d, %xmm0
236 ; X64-SSE-NEXT: movd %edi, %xmm1
237 ; X64-SSE-NEXT: pinsrd $1, %esi, %xmm1
238 ; X64-SSE-NEXT: pinsrd $2, %edx, %xmm1
239 ; X64-SSE-NEXT: por %xmm0, %xmm1
240 ; X64-SSE-NEXT: pextrb $0, %xmm1, %eax
241 ; X64-SSE-NEXT: pextrb $4, %xmm1, %edx
242 ; X64-SSE-NEXT: pextrb $8, %xmm1, %ecx
243 ; X64-SSE-NEXT: # kill: def $al killed $al killed $eax
244 ; X64-SSE-NEXT: # kill: def $dl killed $dl killed $edx
245 ; X64-SSE-NEXT: # kill: def $cl killed $cl killed $ecx
247 %1 = bitcast <3 x i8> %a to i24
248 %2 = bitcast <3 x i8> %b to i24
250 %4 = bitcast i24 %3 to <3 x i8>
255 ; AND/XOR/OR v8i3 as i24
258 define <8 x i3> @and_v8i3_as_i24(<8 x i3> %a, <8 x i3> %b) nounwind {
259 ; X32-SSE-LABEL: and_v8i3_as_i24:
261 ; X32-SSE-NEXT: andps %xmm1, %xmm0
264 ; X64-SSE-LABEL: and_v8i3_as_i24:
266 ; X64-SSE-NEXT: andps %xmm1, %xmm0
268 %1 = bitcast <8 x i3> %a to i24
269 %2 = bitcast <8 x i3> %b to i24
271 %4 = bitcast i24 %3 to <8 x i3>
275 define <8 x i3> @xor_v8i3_as_i24(<8 x i3> %a, <8 x i3> %b) nounwind {
276 ; X32-SSE-LABEL: xor_v8i3_as_i24:
278 ; X32-SSE-NEXT: xorps %xmm1, %xmm0
281 ; X64-SSE-LABEL: xor_v8i3_as_i24:
283 ; X64-SSE-NEXT: xorps %xmm1, %xmm0
285 %1 = bitcast <8 x i3> %a to i24
286 %2 = bitcast <8 x i3> %b to i24
288 %4 = bitcast i24 %3 to <8 x i3>
292 define <8 x i3> @or_v8i3_as_i24(<8 x i3> %a, <8 x i3> %b) nounwind {
293 ; X32-SSE-LABEL: or_v8i3_as_i24:
295 ; X32-SSE-NEXT: orps %xmm1, %xmm0
298 ; X64-SSE-LABEL: or_v8i3_as_i24:
300 ; X64-SSE-NEXT: orps %xmm1, %xmm0
302 %1 = bitcast <8 x i3> %a to i24
303 %2 = bitcast <8 x i3> %b to i24
305 %4 = bitcast i24 %3 to <8 x i3>