1 ; RUN: llc -mtriple=x86_64-apple-darwin -mcpu=core2 < %s | FileCheck --check-prefix=SSE2-CODEGEN %s
2 ; RUN: opt -mtriple=x86_64-apple-darwin -mcpu=core2 -cost-model -analyze < %s | FileCheck --check-prefix=SSE2 %s
4 %shifttype = type <2 x i16>
5 define %shifttype @shift2i16(%shifttype %a, %shifttype %b) {
7 ; SSE2-LABEL: shift2i16
8 ; SSE2: cost of 32 {{.*}} lshr
9 ; SSE2-CODEGEN-LABEL: shift2i16
12 %0 = lshr %shifttype %a , %b
16 %shifttype4i16 = type <4 x i16>
17 define %shifttype4i16 @shift4i16(%shifttype4i16 %a, %shifttype4i16 %b) {
19 ; SSE2-LABEL: shift4i16
20 ; SSE2: cost of 32 {{.*}} lshr
21 ; SSE2-CODEGEN-LABEL: shift4i16
24 %0 = lshr %shifttype4i16 %a , %b
28 %shifttype8i16 = type <8 x i16>
29 define %shifttype8i16 @shift8i16(%shifttype8i16 %a, %shifttype8i16 %b) {
31 ; SSE2-LABEL: shift8i16
32 ; SSE2: cost of 32 {{.*}} lshr
33 ; SSE2-CODEGEN-LABEL: shift8i16
36 %0 = lshr %shifttype8i16 %a , %b
40 %shifttype16i16 = type <16 x i16>
41 define %shifttype16i16 @shift16i16(%shifttype16i16 %a, %shifttype16i16 %b) {
43 ; SSE2-LABEL: shift16i16
44 ; SSE2: cost of 64 {{.*}} lshr
45 ; SSE2-CODEGEN-LABEL: shift16i16
48 %0 = lshr %shifttype16i16 %a , %b
49 ret %shifttype16i16 %0
52 %shifttype32i16 = type <32 x i16>
53 define %shifttype32i16 @shift32i16(%shifttype32i16 %a, %shifttype32i16 %b) {
55 ; SSE2-LABEL: shift32i16
56 ; SSE2: cost of 128 {{.*}} lshr
57 ; SSE2-CODEGEN-LABEL: shift32i16
60 %0 = lshr %shifttype32i16 %a , %b
61 ret %shifttype32i16 %0
64 %shifttype2i32 = type <2 x i32>
65 define %shifttype2i32 @shift2i32(%shifttype2i32 %a, %shifttype2i32 %b) {
67 ; SSE2-LABEL: shift2i32
68 ; SSE2: cost of 16 {{.*}} lshr
69 ; SSE2-CODEGEN-LABEL: shift2i32
72 %0 = lshr %shifttype2i32 %a , %b
76 %shifttype4i32 = type <4 x i32>
77 define %shifttype4i32 @shift4i32(%shifttype4i32 %a, %shifttype4i32 %b) {
79 ; SSE2-LABEL: shift4i32
80 ; SSE2: cost of 16 {{.*}} lshr
81 ; SSE2-CODEGEN-LABEL: shift4i32
84 %0 = lshr %shifttype4i32 %a , %b
88 %shifttype8i32 = type <8 x i32>
89 define %shifttype8i32 @shift8i32(%shifttype8i32 %a, %shifttype8i32 %b) {
91 ; SSE2-LABEL: shift8i32
92 ; SSE2: cost of 32 {{.*}} lshr
93 ; SSE2-CODEGEN-LABEL: shift8i32
96 %0 = lshr %shifttype8i32 %a , %b
100 %shifttype16i32 = type <16 x i32>
101 define %shifttype16i32 @shift16i32(%shifttype16i32 %a, %shifttype16i32 %b) {
103 ; SSE2-LABEL: shift16i32
104 ; SSE2: cost of 64 {{.*}} lshr
105 ; SSE2-CODEGEN-LABEL: shift16i32
106 ; SSE2-CODEGEN: psrld
108 %0 = lshr %shifttype16i32 %a , %b
109 ret %shifttype16i32 %0
112 %shifttype32i32 = type <32 x i32>
113 define %shifttype32i32 @shift32i32(%shifttype32i32 %a, %shifttype32i32 %b) {
115 ; SSE2-LABEL: shift32i32
116 ; SSE2: cost of 128 {{.*}} lshr
117 ; SSE2-CODEGEN-LABEL: shift32i32
118 ; SSE2-CODEGEN: psrld
120 %0 = lshr %shifttype32i32 %a , %b
121 ret %shifttype32i32 %0
124 %shifttype2i64 = type <2 x i64>
125 define %shifttype2i64 @shift2i64(%shifttype2i64 %a, %shifttype2i64 %b) {
127 ; SSE2-LABEL: shift2i64
128 ; SSE2: cost of 4 {{.*}} lshr
129 ; SSE2-CODEGEN-LABEL: shift2i64
130 ; SSE2-CODEGEN: psrlq
132 %0 = lshr %shifttype2i64 %a , %b
133 ret %shifttype2i64 %0
136 %shifttype4i64 = type <4 x i64>
137 define %shifttype4i64 @shift4i64(%shifttype4i64 %a, %shifttype4i64 %b) {
139 ; SSE2-LABEL: shift4i64
140 ; SSE2: cost of 8 {{.*}} lshr
141 ; SSE2-CODEGEN-LABEL: shift4i64
142 ; SSE2-CODEGEN: psrlq
144 %0 = lshr %shifttype4i64 %a , %b
145 ret %shifttype4i64 %0
148 %shifttype8i64 = type <8 x i64>
149 define %shifttype8i64 @shift8i64(%shifttype8i64 %a, %shifttype8i64 %b) {
151 ; SSE2-LABEL: shift8i64
152 ; SSE2: cost of 16 {{.*}} lshr
153 ; SSE2-CODEGEN-LABEL: shift8i64
154 ; SSE2-CODEGEN: psrlq
156 %0 = lshr %shifttype8i64 %a , %b
157 ret %shifttype8i64 %0
160 %shifttype16i64 = type <16 x i64>
161 define %shifttype16i64 @shift16i64(%shifttype16i64 %a, %shifttype16i64 %b) {
163 ; SSE2-LABEL: shift16i64
164 ; SSE2: cost of 32 {{.*}} lshr
165 ; SSE2-CODEGEN-LABEL: shift16i64
166 ; SSE2-CODEGEN: psrlq
168 %0 = lshr %shifttype16i64 %a , %b
169 ret %shifttype16i64 %0
172 %shifttype32i64 = type <32 x i64>
173 define %shifttype32i64 @shift32i64(%shifttype32i64 %a, %shifttype32i64 %b) {
175 ; SSE2-LABEL: shift32i64
176 ; SSE2: cost of 64 {{.*}} lshr
177 ; SSE2-CODEGEN-LABEL: shift32i64
178 ; SSE2-CODEGEN: psrlq
180 %0 = lshr %shifttype32i64 %a , %b
181 ret %shifttype32i64 %0
184 %shifttype2i8 = type <2 x i8>
185 define %shifttype2i8 @shift2i8(%shifttype2i8 %a, %shifttype2i8 %b) {
187 ; SSE2-LABEL: shift2i8
188 ; SSE2: cost of 26 {{.*}} lshr
189 ; SSE2-CODEGEN-LABEL: shift2i8
190 ; SSE2-CODEGEN: psrlw
192 %0 = lshr %shifttype2i8 %a , %b
196 %shifttype4i8 = type <4 x i8>
197 define %shifttype4i8 @shift4i8(%shifttype4i8 %a, %shifttype4i8 %b) {
199 ; SSE2-LABEL: shift4i8
200 ; SSE2: cost of 26 {{.*}} lshr
201 ; SSE2-CODEGEN-LABEL: shift4i8
202 ; SSE2-CODEGEN: psrlw
204 %0 = lshr %shifttype4i8 %a , %b
208 %shifttype8i8 = type <8 x i8>
209 define %shifttype8i8 @shift8i8(%shifttype8i8 %a, %shifttype8i8 %b) {
211 ; SSE2-LABEL: shift8i8
212 ; SSE2: cost of 26 {{.*}} lshr
213 ; SSE2-CODEGEN-LABEL: shift8i8
214 ; SSE2-CODEGEN: psrlw
216 %0 = lshr %shifttype8i8 %a , %b
220 %shifttype16i8 = type <16 x i8>
221 define %shifttype16i8 @shift16i8(%shifttype16i8 %a, %shifttype16i8 %b) {
223 ; SSE2-LABEL: shift16i8
224 ; SSE2: cost of 26 {{.*}} lshr
225 ; SSE2-CODEGEN-LABEL: shift16i8
226 ; SSE2-CODEGEN: psrlw
228 %0 = lshr %shifttype16i8 %a , %b
229 ret %shifttype16i8 %0
232 %shifttype32i8 = type <32 x i8>
233 define %shifttype32i8 @shift32i8(%shifttype32i8 %a, %shifttype32i8 %b) {
235 ; SSE2-LABEL: shift32i8
236 ; SSE2: cost of 52 {{.*}} lshr
237 ; SSE2-CODEGEN-LABEL: shift32i8
238 ; SSE2-CODEGEN: psrlw
240 %0 = lshr %shifttype32i8 %a , %b
241 ret %shifttype32i8 %0
244 ; Test shift by a constant vector.
246 %shifttypec = type <2 x i16>
247 define %shifttypec @shift2i16const(%shifttypec %a, %shifttypec %b) {
249 ; SSE2-LABEL: shift2i16const
250 ; SSE2: cost of 1 {{.*}} lshr
251 ; SSE2-CODEGEN-LABEL: shift2i16const
252 ; SSE2-CODEGEN: psrlw $3
254 %0 = lshr %shifttypec %a , <i16 3, i16 3>
258 %shifttypec4i16 = type <4 x i16>
259 define %shifttypec4i16 @shift4i16const(%shifttypec4i16 %a, %shifttypec4i16 %b) {
261 ; SSE2-LABEL: shift4i16const
262 ; SSE2: cost of 1 {{.*}} lshr
263 ; SSE2-CODEGEN-LABEL: shift4i16const
264 ; SSE2-CODEGEN: psrlw $3
266 %0 = lshr %shifttypec4i16 %a , <i16 3, i16 3, i16 3, i16 3>
267 ret %shifttypec4i16 %0
270 %shifttypec8i16 = type <8 x i16>
271 define %shifttypec8i16 @shift8i16const(%shifttypec8i16 %a, %shifttypec8i16 %b) {
273 ; SSE2-LABEL: shift8i16const
274 ; SSE2: cost of 1 {{.*}} lshr
275 ; SSE2-CODEGEN-LABEL: shift8i16const
276 ; SSE2-CODEGEN: psrlw $3
278 %0 = lshr %shifttypec8i16 %a , <i16 3, i16 3, i16 3, i16 3,
279 i16 3, i16 3, i16 3, i16 3>
280 ret %shifttypec8i16 %0
283 %shifttypec16i16 = type <16 x i16>
284 define %shifttypec16i16 @shift16i16const(%shifttypec16i16 %a,
285 %shifttypec16i16 %b) {
287 ; SSE2-LABEL: shift16i16const
288 ; SSE2: cost of 2 {{.*}} lshr
289 ; SSE2-CODEGEN-LABEL: shift16i16const
290 ; SSE2-CODEGEN: psrlw $3
292 %0 = lshr %shifttypec16i16 %a , <i16 3, i16 3, i16 3, i16 3,
293 i16 3, i16 3, i16 3, i16 3,
294 i16 3, i16 3, i16 3, i16 3,
295 i16 3, i16 3, i16 3, i16 3>
296 ret %shifttypec16i16 %0
299 %shifttypec32i16 = type <32 x i16>
300 define %shifttypec32i16 @shift32i16const(%shifttypec32i16 %a,
301 %shifttypec32i16 %b) {
303 ; SSE2-LABEL: shift32i16const
304 ; SSE2: cost of 4 {{.*}} lshr
305 ; SSE2-CODEGEN-LABEL: shift32i16const
306 ; SSE2-CODEGEN: psrlw $3
308 %0 = lshr %shifttypec32i16 %a , <i16 3, i16 3, i16 3, i16 3,
309 i16 3, i16 3, i16 3, i16 3,
310 i16 3, i16 3, i16 3, i16 3,
311 i16 3, i16 3, i16 3, i16 3,
312 i16 3, i16 3, i16 3, i16 3,
313 i16 3, i16 3, i16 3, i16 3,
314 i16 3, i16 3, i16 3, i16 3,
315 i16 3, i16 3, i16 3, i16 3>
316 ret %shifttypec32i16 %0
319 %shifttypec2i32 = type <2 x i32>
320 define %shifttypec2i32 @shift2i32c(%shifttypec2i32 %a, %shifttypec2i32 %b) {
322 ; SSE2-LABEL: shift2i32c
323 ; SSE2: cost of 1 {{.*}} lshr
324 ; SSE2-CODEGEN-LABEL: shift2i32c
325 ; SSE2-CODEGEN: psrld $3
327 %0 = lshr %shifttypec2i32 %a , <i32 3, i32 3>
328 ret %shifttypec2i32 %0
331 %shifttypec4i32 = type <4 x i32>
332 define %shifttypec4i32 @shift4i32c(%shifttypec4i32 %a, %shifttypec4i32 %b) {
334 ; SSE2-LABEL: shift4i32c
335 ; SSE2: cost of 1 {{.*}} lshr
336 ; SSE2-CODEGEN-LABEL: shift4i32c
337 ; SSE2-CODEGEN: psrld $3
339 %0 = lshr %shifttypec4i32 %a , <i32 3, i32 3, i32 3, i32 3>
340 ret %shifttypec4i32 %0
343 %shifttypec8i32 = type <8 x i32>
344 define %shifttypec8i32 @shift8i32c(%shifttypec8i32 %a, %shifttypec8i32 %b) {
346 ; SSE2-LABEL: shift8i32c
347 ; SSE2: cost of 2 {{.*}} lshr
348 ; SSE2-CODEGEN-LABEL: shift8i32c
349 ; SSE2-CODEGEN: psrld $3
351 %0 = lshr %shifttypec8i32 %a , <i32 3, i32 3, i32 3, i32 3,
352 i32 3, i32 3, i32 3, i32 3>
353 ret %shifttypec8i32 %0
356 %shifttypec16i32 = type <16 x i32>
357 define %shifttypec16i32 @shift16i32c(%shifttypec16i32 %a, %shifttypec16i32 %b) {
359 ; SSE2-LABEL: shift16i32c
360 ; SSE2: cost of 4 {{.*}} lshr
361 ; SSE2-CODEGEN-LABEL: shift16i32c
362 ; SSE2-CODEGEN: psrld $3
364 %0 = lshr %shifttypec16i32 %a , <i32 3, i32 3, i32 3, i32 3,
365 i32 3, i32 3, i32 3, i32 3,
366 i32 3, i32 3, i32 3, i32 3,
367 i32 3, i32 3, i32 3, i32 3>
368 ret %shifttypec16i32 %0
371 %shifttypec32i32 = type <32 x i32>
372 define %shifttypec32i32 @shift32i32c(%shifttypec32i32 %a, %shifttypec32i32 %b) {
374 ; SSE2-LABEL: shift32i32c
375 ; SSE2: cost of 8 {{.*}} lshr
376 ; SSE2-CODEGEN-LABEL: shift32i32c
377 ; SSE2-CODEGEN: psrld $3
378 %0 = lshr %shifttypec32i32 %a , <i32 3, i32 3, i32 3, i32 3,
379 i32 3, i32 3, i32 3, i32 3,
380 i32 3, i32 3, i32 3, i32 3,
381 i32 3, i32 3, i32 3, i32 3,
382 i32 3, i32 3, i32 3, i32 3,
383 i32 3, i32 3, i32 3, i32 3,
384 i32 3, i32 3, i32 3, i32 3,
385 i32 3, i32 3, i32 3, i32 3>
386 ret %shifttypec32i32 %0
389 %shifttypec2i64 = type <2 x i64>
390 define %shifttypec2i64 @shift2i64c(%shifttypec2i64 %a, %shifttypec2i64 %b) {
392 ; SSE2-LABEL: shift2i64c
393 ; SSE2: cost of 1 {{.*}} lshr
394 ; SSE2-CODEGEN-LABEL: shift2i64c
395 ; SSE2-CODEGEN: psrlq $3
397 %0 = lshr %shifttypec2i64 %a , <i64 3, i64 3>
398 ret %shifttypec2i64 %0
401 %shifttypec4i64 = type <4 x i64>
402 define %shifttypec4i64 @shift4i64c(%shifttypec4i64 %a, %shifttypec4i64 %b) {
404 ; SSE2-LABEL: shift4i64c
405 ; SSE2: cost of 2 {{.*}} lshr
406 ; SSE2-CODEGEN-LABEL: shift4i64c
407 ; SSE2-CODEGEN: psrlq $3
409 %0 = lshr %shifttypec4i64 %a , <i64 3, i64 3, i64 3, i64 3>
410 ret %shifttypec4i64 %0
413 %shifttypec8i64 = type <8 x i64>
414 define %shifttypec8i64 @shift8i64c(%shifttypec8i64 %a, %shifttypec8i64 %b) {
416 ; SSE2-LABEL: shift8i64c
417 ; SSE2: cost of 4 {{.*}} lshr
418 ; SSE2-CODEGEN-LABEL: shift8i64c
419 ; SSE2-CODEGEN: psrlq $3
421 %0 = lshr %shifttypec8i64 %a , <i64 3, i64 3, i64 3, i64 3,
422 i64 3, i64 3, i64 3, i64 3>
423 ret %shifttypec8i64 %0
426 %shifttypec16i64 = type <16 x i64>
427 define %shifttypec16i64 @shift16i64c(%shifttypec16i64 %a, %shifttypec16i64 %b) {
429 ; SSE2-LABEL: shift16i64c
430 ; SSE2: cost of 8 {{.*}} lshr
431 ; SSE2-CODEGEN-LABEL: shift16i64c
432 ; SSE2-CODEGEN: psrlq $3
434 %0 = lshr %shifttypec16i64 %a , <i64 3, i64 3, i64 3, i64 3,
435 i64 3, i64 3, i64 3, i64 3,
436 i64 3, i64 3, i64 3, i64 3,
437 i64 3, i64 3, i64 3, i64 3>
438 ret %shifttypec16i64 %0
441 %shifttypec32i64 = type <32 x i64>
442 define %shifttypec32i64 @shift32i64c(%shifttypec32i64 %a, %shifttypec32i64 %b) {
444 ; SSE2-LABEL: shift32i64c
445 ; SSE2: cost of 16 {{.*}} lshr
446 ; SSE2-CODEGEN-LABEL: shift32i64c
447 ; SSE2-CODEGEN: psrlq $3
449 %0 = lshr %shifttypec32i64 %a ,<i64 3, i64 3, i64 3, i64 3,
450 i64 3, i64 3, i64 3, i64 3,
451 i64 3, i64 3, i64 3, i64 3,
452 i64 3, i64 3, i64 3, i64 3,
453 i64 3, i64 3, i64 3, i64 3,
454 i64 3, i64 3, i64 3, i64 3,
455 i64 3, i64 3, i64 3, i64 3,
456 i64 3, i64 3, i64 3, i64 3>
457 ret %shifttypec32i64 %0
460 %shifttypec2i8 = type <2 x i8>
461 define %shifttypec2i8 @shift2i8c(%shifttypec2i8 %a, %shifttypec2i8 %b) {
463 ; SSE2-LABEL: shift2i8c
464 ; SSE2: cost of 2 {{.*}} lshr
465 ; SSE2-CODEGEN-LABEL: shift2i8c
466 ; SSE2-CODEGEN: psrlw $3
468 %0 = lshr %shifttypec2i8 %a , <i8 3, i8 3>
469 ret %shifttypec2i8 %0
472 %shifttypec4i8 = type <4 x i8>
473 define %shifttypec4i8 @shift4i8c(%shifttypec4i8 %a, %shifttypec4i8 %b) {
475 ; SSE2-LABEL: shift4i8c
476 ; SSE2: cost of 2 {{.*}} lshr
477 ; SSE2-CODEGEN-LABEL: shift4i8c
478 ; SSE2-CODEGEN: psrlw $3
480 %0 = lshr %shifttypec4i8 %a , <i8 3, i8 3, i8 3, i8 3>
481 ret %shifttypec4i8 %0
484 %shifttypec8i8 = type <8 x i8>
485 define %shifttypec8i8 @shift8i8c(%shifttypec8i8 %a, %shifttypec8i8 %b) {
487 ; SSE2-LABEL: shift8i8c
488 ; SSE2: cost of 2 {{.*}} lshr
489 ; SSE2-CODEGEN-LABEL: shift8i8c
490 ; SSE2-CODEGEN: psrlw $3
492 %0 = lshr %shifttypec8i8 %a , <i8 3, i8 3, i8 3, i8 3,
493 i8 3, i8 3, i8 3, i8 3>
494 ret %shifttypec8i8 %0
497 %shifttypec16i8 = type <16 x i8>
498 define %shifttypec16i8 @shift16i8c(%shifttypec16i8 %a, %shifttypec16i8 %b) {
500 ; SSE2-LABEL: shift16i8c
501 ; SSE2: cost of 2 {{.*}} lshr
502 ; SSE2-CODEGEN-LABEL: shift16i8c
503 ; SSE2-CODEGEN: psrlw $3
505 %0 = lshr %shifttypec16i8 %a , <i8 3, i8 3, i8 3, i8 3,
506 i8 3, i8 3, i8 3, i8 3,
507 i8 3, i8 3, i8 3, i8 3,
508 i8 3, i8 3, i8 3, i8 3>
509 ret %shifttypec16i8 %0
512 %shifttypec32i8 = type <32 x i8>
513 define %shifttypec32i8 @shift32i8c(%shifttypec32i8 %a, %shifttypec32i8 %b) {
515 ; SSE2-LABEL: shift32i8c
516 ; SSE2: cost of 4 {{.*}} lshr
517 ; SSE2-CODEGEN-LABEL: shift32i8c
518 ; SSE2-CODEGEN: psrlw $3
520 %0 = lshr %shifttypec32i8 %a , <i8 3, i8 3, i8 3, i8 3,
521 i8 3, i8 3, i8 3, i8 3,
522 i8 3, i8 3, i8 3, i8 3,
523 i8 3, i8 3, i8 3, i8 3,
524 i8 3, i8 3, i8 3, i8 3,
525 i8 3, i8 3, i8 3, i8 3,
526 i8 3, i8 3, i8 3, i8 3,
527 i8 3, i8 3, i8 3, i8 3>
528 ret %shifttypec32i8 %0