1 ; RUN: llc < %s -mtriple=x86_64-- -mcpu=corei7 | FileCheck %s
5 ;CHECK-LABEL: intrin_pmov:
6 ;CHECK: pmovzxbw (%{{.*}}), %xmm0
9 define void @intrin_pmov(ptr noalias %dest, ptr noalias %src) nounwind uwtable ssp {
10 %1 = load <2 x i64>, ptr %src, align 16
11 %2 = bitcast <2 x i64> %1 to <16 x i8>
12 %3 = tail call <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8> %2) nounwind
13 %4 = bitcast <8 x i16> %3 to <16 x i8>
14 tail call void @llvm.x86.sse2.storeu.dq(ptr %dest, <16 x i8> %4) nounwind
18 declare <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8>) nounwind readnone
19 declare void @llvm.x86.sse2.storeu.dq(ptr, <16 x i8>) nounwind
23 define <4 x i32> @foo0(double %v.coerce) nounwind ssp {
25 ; CHECK: pmovzxwd %xmm0, %xmm0
27 %tmp = bitcast double %v.coerce to <4 x i16>
28 %tmp1 = shufflevector <4 x i16> %tmp, <4 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
29 %tmp2 = tail call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> %tmp1) nounwind
33 define <8 x i16> @foo1(double %v.coerce) nounwind ssp {
35 ; CHECK: pmovzxbw %xmm0, %xmm0
37 %tmp = bitcast double %v.coerce to <8 x i8>
38 %tmp1 = shufflevector <8 x i8> %tmp, <8 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
39 %tmp2 = tail call <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8> %tmp1)
43 declare <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16>) nounwind readnone