1 ; RUN: llc -mtriple=x86_64-apple-darwin -mcpu=core2 < %s | FileCheck --check-prefix=SSE2-CODEGEN %s
2 ; RUN: opt -mtriple=x86_64-apple-darwin -mcpu=core2 -cost-model -analyze < %s | FileCheck --check-prefix=SSE2 %s
4 %shifttype = type <2 x i16>
5 define %shifttype @shift2i16(%shifttype %a, %shifttype %b) {
7 ; SSE2-LABEL: shift2i16
8 ; SSE2: cost of 32 {{.*}} ashr
9 ; SSE2-CODEGEN-LABEL: shift2i16
12 %0 = ashr %shifttype %a , %b
16 %shifttype4i16 = type <4 x i16>
17 define %shifttype4i16 @shift4i16(%shifttype4i16 %a, %shifttype4i16 %b) {
19 ; SSE2-LABEL: shift4i16
20 ; SSE2: cost of 32 {{.*}} ashr
21 ; SSE2-CODEGEN-LABEL: shift4i16
24 %0 = ashr %shifttype4i16 %a , %b
28 %shifttype8i16 = type <8 x i16>
29 define %shifttype8i16 @shift8i16(%shifttype8i16 %a, %shifttype8i16 %b) {
31 ; SSE2-LABEL: shift8i16
32 ; SSE2: cost of 32 {{.*}} ashr
33 ; SSE2-CODEGEN-LABEL: shift8i16
36 %0 = ashr %shifttype8i16 %a , %b
40 %shifttype16i16 = type <16 x i16>
41 define %shifttype16i16 @shift16i16(%shifttype16i16 %a, %shifttype16i16 %b) {
43 ; SSE2-LABEL: shift16i16
44 ; SSE2: cost of 64 {{.*}} ashr
45 ; SSE2-CODEGEN-LABEL: shift16i16
48 %0 = ashr %shifttype16i16 %a , %b
49 ret %shifttype16i16 %0
52 %shifttype32i16 = type <32 x i16>
53 define %shifttype32i16 @shift32i16(%shifttype32i16 %a, %shifttype32i16 %b) {
55 ; SSE2-LABEL: shift32i16
56 ; SSE2: cost of 128 {{.*}} ashr
57 ; SSE2-CODEGEN-LABEL: shift32i16
60 %0 = ashr %shifttype32i16 %a , %b
61 ret %shifttype32i16 %0
64 %shifttype2i32 = type <2 x i32>
65 define %shifttype2i32 @shift2i32(%shifttype2i32 %a, %shifttype2i32 %b) {
67 ; SSE2-LABEL: shift2i32
68 ; SSE2: cost of 16 {{.*}} ashr
69 ; SSE2-CODEGEN-LABEL: shift2i32
72 %0 = ashr %shifttype2i32 %a , %b
76 %shifttype4i32 = type <4 x i32>
77 define %shifttype4i32 @shift4i32(%shifttype4i32 %a, %shifttype4i32 %b) {
79 ; SSE2-LABEL: shift4i32
80 ; SSE2: cost of 16 {{.*}} ashr
81 ; SSE2-CODEGEN-LABEL: shift4i32
84 %0 = ashr %shifttype4i32 %a , %b
88 %shifttype8i32 = type <8 x i32>
89 define %shifttype8i32 @shift8i32(%shifttype8i32 %a, %shifttype8i32 %b) {
91 ; SSE2-LABEL: shift8i32
92 ; SSE2: cost of 32 {{.*}} ashr
93 ; SSE2-CODEGEN-LABEL: shift8i32
96 %0 = ashr %shifttype8i32 %a , %b
100 %shifttype16i32 = type <16 x i32>
101 define %shifttype16i32 @shift16i32(%shifttype16i32 %a, %shifttype16i32 %b) {
103 ; SSE2-LABEL: shift16i32
104 ; SSE2: cost of 64 {{.*}} ashr
105 ; SSE2-CODEGEN-LABEL: shift16i32
106 ; SSE2-CODEGEN: psrad
108 %0 = ashr %shifttype16i32 %a , %b
109 ret %shifttype16i32 %0
112 %shifttype32i32 = type <32 x i32>
113 define %shifttype32i32 @shift32i32(%shifttype32i32 %a, %shifttype32i32 %b) {
115 ; SSE2-LABEL: shift32i32
116 ; SSE2: cost of 128 {{.*}} ashr
117 ; SSE2-CODEGEN-LABEL: shift32i32
118 ; SSE2-CODEGEN: psrad
120 %0 = ashr %shifttype32i32 %a , %b
121 ret %shifttype32i32 %0
124 %shifttype2i64 = type <2 x i64>
125 define %shifttype2i64 @shift2i64(%shifttype2i64 %a, %shifttype2i64 %b) {
127 ; SSE2-LABEL: shift2i64
128 ; SSE2: cost of 12 {{.*}} ashr
129 ; SSE2-CODEGEN-LABEL: shift2i64
130 ; SSE2-CODEGEN: psrlq
132 %0 = ashr %shifttype2i64 %a , %b
133 ret %shifttype2i64 %0
136 %shifttype4i64 = type <4 x i64>
137 define %shifttype4i64 @shift4i64(%shifttype4i64 %a, %shifttype4i64 %b) {
139 ; SSE2-LABEL: shift4i64
140 ; SSE2: cost of 24 {{.*}} ashr
141 ; SSE2-CODEGEN-LABEL: shift4i64
142 ; SSE2-CODEGEN: psrlq
144 %0 = ashr %shifttype4i64 %a , %b
145 ret %shifttype4i64 %0
148 %shifttype8i64 = type <8 x i64>
149 define %shifttype8i64 @shift8i64(%shifttype8i64 %a, %shifttype8i64 %b) {
151 ; SSE2-LABEL: shift8i64
152 ; SSE2: cost of 48 {{.*}} ashr
153 ; SSE2-CODEGEN-LABEL: shift8i64
154 ; SSE2-CODEGEN: psrlq
156 %0 = ashr %shifttype8i64 %a , %b
157 ret %shifttype8i64 %0
160 %shifttype16i64 = type <16 x i64>
161 define %shifttype16i64 @shift16i64(%shifttype16i64 %a, %shifttype16i64 %b) {
163 ; SSE2-LABEL: shift16i64
164 ; SSE2: cost of 96 {{.*}} ashr
165 ; SSE2-CODEGEN-LABEL: shift16i64
166 ; SSE2-CODEGEN: psrlq
168 %0 = ashr %shifttype16i64 %a , %b
169 ret %shifttype16i64 %0
172 %shifttype32i64 = type <32 x i64>
173 define %shifttype32i64 @shift32i64(%shifttype32i64 %a, %shifttype32i64 %b) {
175 ; SSE2-LABEL: shift32i64
176 ; SSE2: cost of 192 {{.*}} ashr
177 ; SSE2-CODEGEN-LABEL: shift32i64
178 ; SSE2-CODEGEN: psrlq
180 %0 = ashr %shifttype32i64 %a , %b
181 ret %shifttype32i64 %0
184 %shifttype2i8 = type <2 x i8>
185 define %shifttype2i8 @shift2i8(%shifttype2i8 %a, %shifttype2i8 %b) {
187 ; SSE2-LABEL: shift2i8
188 ; SSE2: cost of 54 {{.*}} ashr
189 ; SSE2-CODEGEN-LABEL: shift2i8
190 ; SSE2-CODEGEN: psrlw
192 %0 = ashr %shifttype2i8 %a , %b
196 %shifttype4i8 = type <4 x i8>
197 define %shifttype4i8 @shift4i8(%shifttype4i8 %a, %shifttype4i8 %b) {
199 ; SSE2-LABEL: shift4i8
200 ; SSE2: cost of 54 {{.*}} ashr
201 ; SSE2-CODEGEN-LABEL: shift4i8
202 ; SSE2-CODEGEN: psraw
204 %0 = ashr %shifttype4i8 %a , %b
208 %shifttype8i8 = type <8 x i8>
209 define %shifttype8i8 @shift8i8(%shifttype8i8 %a, %shifttype8i8 %b) {
211 ; SSE2-LABEL: shift8i8
212 ; SSE2: cost of 54 {{.*}} ashr
213 ; SSE2-CODEGEN-LABEL: shift8i8
214 ; SSE2-CODEGEN: psraw
216 %0 = ashr %shifttype8i8 %a , %b
220 %shifttype16i8 = type <16 x i8>
221 define %shifttype16i8 @shift16i8(%shifttype16i8 %a, %shifttype16i8 %b) {
223 ; SSE2-LABEL: shift16i8
224 ; SSE2: cost of 54 {{.*}} ashr
225 ; SSE2-CODEGEN-LABEL: shift16i8
226 ; SSE2-CODEGEN: psraw
228 %0 = ashr %shifttype16i8 %a , %b
229 ret %shifttype16i8 %0
232 %shifttype32i8 = type <32 x i8>
233 define %shifttype32i8 @shift32i8(%shifttype32i8 %a, %shifttype32i8 %b) {
235 ; SSE2-LABEL: shift32i8
236 ; SSE2: cost of 108 {{.*}} ashr
237 ; SSE2-CODEGEN-LABEL: shift32i8
238 ; SSE2-CODEGEN: psraw
240 %0 = ashr %shifttype32i8 %a , %b
241 ret %shifttype32i8 %0
244 ; Test shift by a constant a value.
246 %shifttypec = type <2 x i16>
247 define %shifttypec @shift2i16const(%shifttypec %a, %shifttypec %b) {
249 ; SSE2-LABEL: shift2i16const
250 ; SSE2: cost of 1 {{.*}} ashr
251 ; SSE2-CODEGEN-LABEL: shift2i16const
252 ; SSE2-CODEGEN: psraw $3
254 %0 = ashr %shifttypec %a , <i16 3, i16 3>
258 %shifttypec4i16 = type <4 x i16>
259 define %shifttypec4i16 @shift4i16const(%shifttypec4i16 %a, %shifttypec4i16 %b) {
261 ; SSE2-LABEL: shift4i16const
262 ; SSE2: cost of 1 {{.*}} ashr
263 ; SSE2-CODEGEN-LABEL: shift4i16const
264 ; SSE2-CODEGEN: psraw $3
266 %0 = ashr %shifttypec4i16 %a , <i16 3, i16 3, i16 3, i16 3>
267 ret %shifttypec4i16 %0
270 %shifttypec8i16 = type <8 x i16>
271 define %shifttypec8i16 @shift8i16const(%shifttypec8i16 %a, %shifttypec8i16 %b) {
273 ; SSE2-LABEL: shift8i16const
274 ; SSE2: cost of 1 {{.*}} ashr
275 ; SSE2-CODEGEN-LABEL: shift8i16const
276 ; SSE2-CODEGEN: psraw $3
278 %0 = ashr %shifttypec8i16 %a , <i16 3, i16 3, i16 3, i16 3,
279 i16 3, i16 3, i16 3, i16 3>
280 ret %shifttypec8i16 %0
283 %shifttypec16i16 = type <16 x i16>
284 define %shifttypec16i16 @shift16i16const(%shifttypec16i16 %a,
285 %shifttypec16i16 %b) {
287 ; SSE2-LABEL: shift16i16const
288 ; SSE2: cost of 2 {{.*}} ashr
289 ; SSE2-CODEGEN-LABEL: shift16i16const
290 ; SSE2-CODEGEN: psraw $3
292 %0 = ashr %shifttypec16i16 %a , <i16 3, i16 3, i16 3, i16 3,
293 i16 3, i16 3, i16 3, i16 3,
294 i16 3, i16 3, i16 3, i16 3,
295 i16 3, i16 3, i16 3, i16 3>
296 ret %shifttypec16i16 %0
299 %shifttypec32i16 = type <32 x i16>
300 define %shifttypec32i16 @shift32i16const(%shifttypec32i16 %a,
301 %shifttypec32i16 %b) {
303 ; SSE2-LABEL: shift32i16const
304 ; SSE2: cost of 4 {{.*}} ashr
305 ; SSE2-CODEGEN-LABEL: shift32i16const
306 ; SSE2-CODEGEN: psraw $3
308 %0 = ashr %shifttypec32i16 %a , <i16 3, i16 3, i16 3, i16 3,
309 i16 3, i16 3, i16 3, i16 3,
310 i16 3, i16 3, i16 3, i16 3,
311 i16 3, i16 3, i16 3, i16 3,
312 i16 3, i16 3, i16 3, i16 3,
313 i16 3, i16 3, i16 3, i16 3,
314 i16 3, i16 3, i16 3, i16 3,
315 i16 3, i16 3, i16 3, i16 3>
316 ret %shifttypec32i16 %0
319 %shifttypec2i32 = type <2 x i32>
320 define %shifttypec2i32 @shift2i32c(%shifttypec2i32 %a, %shifttypec2i32 %b) {
322 ; SSE2-LABEL: shift2i32c
323 ; SSE2: cost of 1 {{.*}} ashr
324 ; SSE2-CODEGEN-LABEL: shift2i32c
325 ; SSE2-CODEGEN: psrad $3
327 %0 = ashr %shifttypec2i32 %a , <i32 3, i32 3>
328 ret %shifttypec2i32 %0
331 %shifttypec4i32 = type <4 x i32>
332 define %shifttypec4i32 @shift4i32c(%shifttypec4i32 %a, %shifttypec4i32 %b) {
334 ; SSE2-LABEL: shift4i32c
335 ; SSE2: cost of 1 {{.*}} ashr
336 ; SSE2-CODEGEN-LABEL: shift4i32c
337 ; SSE2-CODEGEN: psrad $3
339 %0 = ashr %shifttypec4i32 %a , <i32 3, i32 3, i32 3, i32 3>
340 ret %shifttypec4i32 %0
343 %shifttypec8i32 = type <8 x i32>
344 define %shifttypec8i32 @shift8i32c(%shifttypec8i32 %a, %shifttypec8i32 %b) {
346 ; SSE2-LABEL: shift8i32c
347 ; SSE2: cost of 2 {{.*}} ashr
348 ; SSE2-CODEGEN-LABEL: shift8i32c
349 ; SSE2-CODEGEN: psrad $3
351 %0 = ashr %shifttypec8i32 %a , <i32 3, i32 3, i32 3, i32 3,
352 i32 3, i32 3, i32 3, i32 3>
353 ret %shifttypec8i32 %0
356 %shifttypec16i32 = type <16 x i32>
357 define %shifttypec16i32 @shift16i32c(%shifttypec16i32 %a, %shifttypec16i32 %b) {
359 ; SSE2-LABEL: shift16i32c
360 ; SSE2: cost of 4 {{.*}} ashr
361 ; SSE2-CODEGEN-LABEL: shift16i32c
362 ; SSE2-CODEGEN: psrad $3
364 %0 = ashr %shifttypec16i32 %a , <i32 3, i32 3, i32 3, i32 3,
365 i32 3, i32 3, i32 3, i32 3,
366 i32 3, i32 3, i32 3, i32 3,
367 i32 3, i32 3, i32 3, i32 3>
368 ret %shifttypec16i32 %0
371 %shifttypec32i32 = type <32 x i32>
372 define %shifttypec32i32 @shift32i32c(%shifttypec32i32 %a, %shifttypec32i32 %b) {
374 ; SSE2-LABEL: shift32i32c
375 ; getTypeConversion fails here and promotes this to a i64.
376 ; SSE2: cost of 8 {{.*}} ashr
377 ; SSE2-CODEGEN-LABEL: shift32i32c
378 ; SSE2-CODEGEN: psrad $3
379 %0 = ashr %shifttypec32i32 %a , <i32 3, i32 3, i32 3, i32 3,
380 i32 3, i32 3, i32 3, i32 3,
381 i32 3, i32 3, i32 3, i32 3,
382 i32 3, i32 3, i32 3, i32 3,
383 i32 3, i32 3, i32 3, i32 3,
384 i32 3, i32 3, i32 3, i32 3,
385 i32 3, i32 3, i32 3, i32 3,
386 i32 3, i32 3, i32 3, i32 3>
387 ret %shifttypec32i32 %0
390 %shifttypec2i64 = type <2 x i64>
391 define %shifttypec2i64 @shift2i64c(%shifttypec2i64 %a, %shifttypec2i64 %b) {
393 ; SSE2-LABEL: shift2i64c
394 ; SSE2: cost of 4 {{.*}} ashr
395 ; SSE2-CODEGEN-LABEL: shift2i64c
396 ; SSE2-CODEGEN: psrad $3
398 %0 = ashr %shifttypec2i64 %a , <i64 3, i64 3>
399 ret %shifttypec2i64 %0
402 %shifttypec4i64 = type <4 x i64>
403 define %shifttypec4i64 @shift4i64c(%shifttypec4i64 %a, %shifttypec4i64 %b) {
405 ; SSE2-LABEL: shift4i64c
406 ; SSE2: cost of 8 {{.*}} ashr
407 ; SSE2-CODEGEN-LABEL: shift4i64c
408 ; SSE2-CODEGEN: psrad $3
410 %0 = ashr %shifttypec4i64 %a , <i64 3, i64 3, i64 3, i64 3>
411 ret %shifttypec4i64 %0
414 %shifttypec8i64 = type <8 x i64>
415 define %shifttypec8i64 @shift8i64c(%shifttypec8i64 %a, %shifttypec8i64 %b) {
417 ; SSE2-LABEL: shift8i64c
418 ; SSE2: cost of 16 {{.*}} ashr
419 ; SSE2-CODEGEN-LABEL: shift8i64c
420 ; SSE2-CODEGEN: psrad $3
422 %0 = ashr %shifttypec8i64 %a , <i64 3, i64 3, i64 3, i64 3,
423 i64 3, i64 3, i64 3, i64 3>
424 ret %shifttypec8i64 %0
427 %shifttypec16i64 = type <16 x i64>
428 define %shifttypec16i64 @shift16i64c(%shifttypec16i64 %a, %shifttypec16i64 %b) {
430 ; SSE2-LABEL: shift16i64c
431 ; SSE2: cost of 32 {{.*}} ashr
432 ; SSE2-CODEGEN-LABEL: shift16i64c
433 ; SSE2-CODEGEN: psrad $3
435 %0 = ashr %shifttypec16i64 %a , <i64 3, i64 3, i64 3, i64 3,
436 i64 3, i64 3, i64 3, i64 3,
437 i64 3, i64 3, i64 3, i64 3,
438 i64 3, i64 3, i64 3, i64 3>
439 ret %shifttypec16i64 %0
442 %shifttypec32i64 = type <32 x i64>
443 define %shifttypec32i64 @shift32i64c(%shifttypec32i64 %a, %shifttypec32i64 %b) {
445 ; SSE2-LABEL: shift32i64c
446 ; SSE2: cost of 64 {{.*}} ashr
447 ; SSE2-CODEGEN-LABEL: shift32i64c
448 ; SSE2-CODEGEN: psrad $3
450 %0 = ashr %shifttypec32i64 %a ,<i64 3, i64 3, i64 3, i64 3,
451 i64 3, i64 3, i64 3, i64 3,
452 i64 3, i64 3, i64 3, i64 3,
453 i64 3, i64 3, i64 3, i64 3,
454 i64 3, i64 3, i64 3, i64 3,
455 i64 3, i64 3, i64 3, i64 3,
456 i64 3, i64 3, i64 3, i64 3,
457 i64 3, i64 3, i64 3, i64 3>
458 ret %shifttypec32i64 %0
461 %shifttypec2i8 = type <2 x i8>
462 define %shifttypec2i8 @shift2i8c(%shifttypec2i8 %a, %shifttypec2i8 %b) {
464 ; SSE2-LABEL: shift2i8c
465 ; SSE2: cost of 4 {{.*}} ashr
466 ; SSE2-CODEGEN-LABEL: shift2i8c
467 ; SSE2-CODEGEN: psrlw $3
469 %0 = ashr %shifttypec2i8 %a , <i8 3, i8 3>
470 ret %shifttypec2i8 %0
473 %shifttypec4i8 = type <4 x i8>
474 define %shifttypec4i8 @shift4i8c(%shifttypec4i8 %a, %shifttypec4i8 %b) {
476 ; SSE2-LABEL: shift4i8c
477 ; SSE2: cost of 4 {{.*}} ashr
478 ; SSE2-CODEGEN-LABEL: shift4i8c
479 ; SSE2-CODEGEN: psrlw $3
481 %0 = ashr %shifttypec4i8 %a , <i8 3, i8 3, i8 3, i8 3>
482 ret %shifttypec4i8 %0
485 %shifttypec8i8 = type <8 x i8>
486 define %shifttypec8i8 @shift8i8c(%shifttypec8i8 %a, %shifttypec8i8 %b) {
488 ; SSE2-LABEL: shift8i8c
489 ; SSE2: cost of 4 {{.*}} ashr
490 ; SSE2-CODEGEN-LABEL: shift8i8c
491 ; SSE2-CODEGEN: psrlw $3
493 %0 = ashr %shifttypec8i8 %a , <i8 3, i8 3, i8 3, i8 3,
494 i8 3, i8 3, i8 3, i8 3>
495 ret %shifttypec8i8 %0
498 %shifttypec16i8 = type <16 x i8>
499 define %shifttypec16i8 @shift16i8c(%shifttypec16i8 %a, %shifttypec16i8 %b) {
501 ; SSE2-LABEL: shift16i8c
502 ; SSE2: cost of 4 {{.*}} ashr
503 ; SSE2-CODEGEN-LABEL: shift16i8c
504 ; SSE2-CODEGEN: psrlw $3
506 %0 = ashr %shifttypec16i8 %a , <i8 3, i8 3, i8 3, i8 3,
507 i8 3, i8 3, i8 3, i8 3,
508 i8 3, i8 3, i8 3, i8 3,
509 i8 3, i8 3, i8 3, i8 3>
510 ret %shifttypec16i8 %0
513 %shifttypec32i8 = type <32 x i8>
514 define %shifttypec32i8 @shift32i8c(%shifttypec32i8 %a, %shifttypec32i8 %b) {
516 ; SSE2-LABEL: shift32i8c
517 ; SSE2: cost of 8 {{.*}} ashr
518 ; SSE2-CODEGEN-LABEL: shift32i8c
519 ; SSE2-CODEGEN: psrlw $3
521 %0 = ashr %shifttypec32i8 %a , <i8 3, i8 3, i8 3, i8 3,
522 i8 3, i8 3, i8 3, i8 3,
523 i8 3, i8 3, i8 3, i8 3,
524 i8 3, i8 3, i8 3, i8 3,
525 i8 3, i8 3, i8 3, i8 3,
526 i8 3, i8 3, i8 3, i8 3,
527 i8 3, i8 3, i8 3, i8 3,
528 i8 3, i8 3, i8 3, i8 3>
529 ret %shifttypec32i8 %0