1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-apple-macosx -mattr=+sse2 -verify-machineinstrs | FileCheck %s
4 ; After tail duplication, two copies in an early exit BB can be cancelled out.
6 define i32 @t1(i32 %a, i32 %b) nounwind {
8 ; CHECK: ## %bb.0: ## %entry
9 ; CHECK-NEXT: movl %edi, %eax
10 ; CHECK-NEXT: testl %esi, %esi
11 ; CHECK-NEXT: je LBB0_4
12 ; CHECK-NEXT: ## %bb.1: ## %while.body.preheader
13 ; CHECK-NEXT: movl %esi, %edx
14 ; CHECK-NEXT: .p2align 4
15 ; CHECK-NEXT: LBB0_2: ## %while.body
16 ; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1
17 ; CHECK-NEXT: movl %edx, %ecx
19 ; CHECK-NEXT: idivl %ecx
20 ; CHECK-NEXT: testl %edx, %edx
21 ; CHECK-NEXT: movl %ecx, %eax
22 ; CHECK-NEXT: jne LBB0_2
23 ; CHECK-NEXT: ## %bb.3: ## %while.end
24 ; CHECK-NEXT: movl %ecx, %eax
28 %cmp1 = icmp eq i32 %b, 0
29 br i1 %cmp1, label %while.end, label %while.body
31 while.body: ; preds = %entry, %while.body
32 %a.addr.03 = phi i32 [ %b.addr.02, %while.body ], [ %a, %entry ]
33 %b.addr.02 = phi i32 [ %rem, %while.body ], [ %b, %entry ]
34 %rem = srem i32 %a.addr.03, %b.addr.02
35 %cmp = icmp eq i32 %rem, 0
36 br i1 %cmp, label %while.end, label %while.body
38 while.end: ; preds = %while.body, %entry
39 %a.addr.0.lcssa = phi i32 [ %a, %entry ], [ %b.addr.02, %while.body ]
40 ret i32 %a.addr.0.lcssa
43 ; Two movdqa (from phi-elimination) in the entry BB cancels out.
45 define <8 x i16> @t2(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
47 ; CHECK: ## %bb.0: ## %entry
48 ; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3]
49 ; CHECK-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,1,2,4,5,6,7]
50 ; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
53 %tmp8 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 undef, i32 undef, i32 7, i32 2, i32 8, i32 undef, i32 undef , i32 undef >
57 define i32 @t3(i64 %a, i64 %b) nounwind {
59 ; CHECK: ## %bb.0: ## %entry
60 ; CHECK-NEXT: movq %rdi, %rax
61 ; CHECK-NEXT: testq %rsi, %rsi
62 ; CHECK-NEXT: je LBB2_4
63 ; CHECK-NEXT: ## %bb.1: ## %while.body.preheader
64 ; CHECK-NEXT: movq %rsi, %rdx
65 ; CHECK-NEXT: .p2align 4
66 ; CHECK-NEXT: LBB2_2: ## %while.body
67 ; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1
68 ; CHECK-NEXT: movq %rdx, %rcx
70 ; CHECK-NEXT: idivq %rcx
71 ; CHECK-NEXT: testq %rdx, %rdx
72 ; CHECK-NEXT: movq %rcx, %rax
73 ; CHECK-NEXT: jne LBB2_2
74 ; CHECK-NEXT: ## %bb.3: ## %while.end
75 ; CHECK-NEXT: movl %ecx, %eax
79 %cmp1 = icmp eq i64 %b, 0
80 br i1 %cmp1, label %while.end, label %while.body
82 while.body: ; preds = %entry, %while.body
83 %a.addr.03 = phi i64 [ %b.addr.02, %while.body ], [ %a, %entry ]
84 %b.addr.02 = phi i64 [ %rem, %while.body ], [ %b, %entry ]
85 %rem = srem i64 %a.addr.03, %b.addr.02
86 %cmp = icmp eq i64 %rem, 0
87 br i1 %cmp, label %while.end, label %while.body
89 while.end: ; preds = %while.body, %entry
90 %a.addr.0.lcssa = phi i64 [ %a, %entry ], [ %b.addr.02, %while.body ]
91 %t = trunc i64 %a.addr.0.lcssa to i32
95 ; Check that copy propagation does not kill thing like:
96 ; dst = copy src <-- do not kill that.
98 ; ... = op2 dst <-- this is used here.
99 define <16 x float> @foo(<16 x float> %x) {
101 ; CHECK: ## %bb.0: ## %bb
102 ; CHECK-NEXT: xorps %xmm5, %xmm5
103 ; CHECK-NEXT: cvttps2dq %xmm3, %xmm8
104 ; CHECK-NEXT: movaps %xmm3, %xmm4
105 ; CHECK-NEXT: cmpltps %xmm5, %xmm4
106 ; CHECK-NEXT: movaps {{.*#+}} xmm7 = [13,14,15,16]
107 ; CHECK-NEXT: movaps %xmm4, %xmm6
108 ; CHECK-NEXT: orps %xmm7, %xmm6
109 ; CHECK-NEXT: cvtdq2ps %xmm8, %xmm3
110 ; CHECK-NEXT: andps %xmm7, %xmm3
111 ; CHECK-NEXT: andps %xmm6, %xmm3
112 ; CHECK-NEXT: andnps %xmm4, %xmm6
113 ; CHECK-NEXT: cvttps2dq %xmm2, %xmm4
114 ; CHECK-NEXT: movaps %xmm2, %xmm7
115 ; CHECK-NEXT: cmpltps %xmm5, %xmm7
116 ; CHECK-NEXT: movaps {{.*#+}} xmm8 = [9,10,11,12]
117 ; CHECK-NEXT: movaps %xmm7, %xmm9
118 ; CHECK-NEXT: orps %xmm8, %xmm9
119 ; CHECK-NEXT: cvtdq2ps %xmm4, %xmm2
120 ; CHECK-NEXT: andps %xmm8, %xmm2
121 ; CHECK-NEXT: andps %xmm9, %xmm2
122 ; CHECK-NEXT: andnps %xmm7, %xmm9
123 ; CHECK-NEXT: cvttps2dq %xmm1, %xmm4
124 ; CHECK-NEXT: cmpltps %xmm5, %xmm1
125 ; CHECK-NEXT: movaps {{.*#+}} xmm7 = [5,6,7,8]
126 ; CHECK-NEXT: movaps %xmm1, %xmm8
127 ; CHECK-NEXT: orps %xmm7, %xmm8
128 ; CHECK-NEXT: cvtdq2ps %xmm4, %xmm4
129 ; CHECK-NEXT: andps %xmm7, %xmm4
130 ; CHECK-NEXT: andps %xmm8, %xmm4
131 ; CHECK-NEXT: andnps %xmm1, %xmm8
132 ; CHECK-NEXT: cvttps2dq %xmm0, %xmm1
133 ; CHECK-NEXT: cmpltps %xmm5, %xmm0
134 ; CHECK-NEXT: movaps {{.*#+}} xmm5 = [1,2,3,4]
135 ; CHECK-NEXT: movaps %xmm0, %xmm7
136 ; CHECK-NEXT: orps %xmm5, %xmm7
137 ; CHECK-NEXT: cvtdq2ps %xmm1, %xmm1
138 ; CHECK-NEXT: andps %xmm5, %xmm1
139 ; CHECK-NEXT: andps %xmm7, %xmm1
140 ; CHECK-NEXT: andnps %xmm0, %xmm7
141 ; CHECK-NEXT: movaps {{.*#+}} xmm0 = [1,1,1,1]
142 ; CHECK-NEXT: andps %xmm0, %xmm7
143 ; CHECK-NEXT: orps %xmm7, %xmm1
144 ; CHECK-NEXT: andps %xmm0, %xmm8
145 ; CHECK-NEXT: orps %xmm8, %xmm4
146 ; CHECK-NEXT: andps %xmm0, %xmm9
147 ; CHECK-NEXT: orps %xmm9, %xmm2
148 ; CHECK-NEXT: andps %xmm0, %xmm6
149 ; CHECK-NEXT: orps %xmm6, %xmm3
150 ; CHECK-NEXT: movaps %xmm1, %xmm0
151 ; CHECK-NEXT: movaps %xmm4, %xmm1
154 %v3 = icmp slt <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>, zeroinitializer
155 %v14 = zext <16 x i1> %v3 to <16 x i32>
156 %v16 = fcmp olt <16 x float> %x, zeroinitializer
157 %v17 = sext <16 x i1> %v16 to <16 x i32>
158 %v18 = zext <16 x i1> %v16 to <16 x i32>
159 %v19 = xor <16 x i32> %v14, %v18
160 %v20 = or <16 x i32> %v17, <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16>
161 %v21 = fptosi <16 x float> %x to <16 x i32>
162 %v22 = sitofp <16 x i32> %v21 to <16 x float>
163 %v69 = fcmp ogt <16 x float> %v22, zeroinitializer
164 %v75 = and <16 x i1> %v69, %v3
165 %v77 = bitcast <16 x float> %v22 to <16 x i32>
166 %v79 = sext <16 x i1> %v75 to <16 x i32>
167 %v80 = and <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16>, %v79
168 %v81 = xor <16 x i32> %v77, %v80
169 %v82 = and <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16>, %v81
170 %v83 = xor <16 x i32> %v19, %v82
171 %v84 = and <16 x i32> %v83, %v20
172 %v85 = xor <16 x i32> %v19, %v84
173 %v86 = bitcast <16 x i32> %v85 to <16 x float>
174 ret <16 x float> %v86