1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mcpu=generic -mtriple=x86_64-- | FileCheck %s --implicit-check-not '{{and|movz|sar|shl}}'
4 ; Optimize away zext-inreg and sext-inreg on the loop induction
5 ; variable using trip-count information.
7 define void @count_up(double* %d, i64 %n) nounwind {
8 ; CHECK-LABEL: count_up:
9 ; CHECK: # %bb.0: # %entry
10 ; CHECK-NEXT: movq $-80, %rax
11 ; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
12 ; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
13 ; CHECK-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero
14 ; CHECK-NEXT: .p2align 4, 0x90
15 ; CHECK-NEXT: .LBB0_1: # %loop
16 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
17 ; CHECK-NEXT: movsd {{.*#+}} xmm3 = mem[0],zero
18 ; CHECK-NEXT: mulsd %xmm0, %xmm3
19 ; CHECK-NEXT: mulsd %xmm1, %xmm3
20 ; CHECK-NEXT: mulsd %xmm2, %xmm3
21 ; CHECK-NEXT: movsd %xmm3, 80(%rdi,%rax)
22 ; CHECK-NEXT: addq $8, %rax
23 ; CHECK-NEXT: jne .LBB0_1
24 ; CHECK-NEXT: # %bb.2: # %return
30 %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %loop ]
31 %indvar.i8 = and i64 %indvar, 255
32 %t0 = getelementptr double, double* %d, i64 %indvar.i8
33 %t1 = load double, double* %t0
34 %t2 = fmul double %t1, 0.1
35 store double %t2, double* %t0
36 %indvar.i24 = and i64 %indvar, 16777215
37 %t3 = getelementptr double, double* %d, i64 %indvar.i24
38 %t4 = load double, double* %t3
39 %t5 = fmul double %t4, 2.3
40 store double %t5, double* %t3
41 %t6 = getelementptr double, double* %d, i64 %indvar
42 %t7 = load double, double* %t6
43 %t8 = fmul double %t7, 4.5
44 store double %t8, double* %t6
45 %indvar.next = add i64 %indvar, 1
46 %exitcond = icmp eq i64 %indvar.next, 10
47 br i1 %exitcond, label %return, label %loop
53 define void @count_down(double* %d, i64 %n) nounwind {
54 ; CHECK-LABEL: count_down:
55 ; CHECK: # %bb.0: # %entry
56 ; CHECK-NEXT: movl $80, %eax
57 ; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
58 ; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
59 ; CHECK-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero
60 ; CHECK-NEXT: .p2align 4, 0x90
61 ; CHECK-NEXT: .LBB1_1: # %loop
62 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
63 ; CHECK-NEXT: movsd {{.*#+}} xmm3 = mem[0],zero
64 ; CHECK-NEXT: mulsd %xmm0, %xmm3
65 ; CHECK-NEXT: mulsd %xmm1, %xmm3
66 ; CHECK-NEXT: mulsd %xmm2, %xmm3
67 ; CHECK-NEXT: movsd %xmm3, (%rdi,%rax)
68 ; CHECK-NEXT: addq $-8, %rax
69 ; CHECK-NEXT: jne .LBB1_1
70 ; CHECK-NEXT: # %bb.2: # %return
76 %indvar = phi i64 [ 10, %entry ], [ %indvar.next, %loop ]
77 %indvar.i8 = and i64 %indvar, 255
78 %t0 = getelementptr double, double* %d, i64 %indvar.i8
79 %t1 = load double, double* %t0
80 %t2 = fmul double %t1, 0.1
81 store double %t2, double* %t0
82 %indvar.i24 = and i64 %indvar, 16777215
83 %t3 = getelementptr double, double* %d, i64 %indvar.i24
84 %t4 = load double, double* %t3
85 %t5 = fmul double %t4, 2.3
86 store double %t5, double* %t3
87 %t6 = getelementptr double, double* %d, i64 %indvar
88 %t7 = load double, double* %t6
89 %t8 = fmul double %t7, 4.5
90 store double %t8, double* %t6
91 %indvar.next = sub i64 %indvar, 1
92 %exitcond = icmp eq i64 %indvar.next, 0
93 br i1 %exitcond, label %return, label %loop
99 define void @count_up_signed(double* %d, i64 %n) nounwind {
100 ; CHECK-LABEL: count_up_signed:
101 ; CHECK: # %bb.0: # %entry
102 ; CHECK-NEXT: movq $-80, %rax
103 ; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
104 ; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
105 ; CHECK-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero
106 ; CHECK-NEXT: .p2align 4, 0x90
107 ; CHECK-NEXT: .LBB2_1: # %loop
108 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
109 ; CHECK-NEXT: movsd {{.*#+}} xmm3 = mem[0],zero
110 ; CHECK-NEXT: mulsd %xmm0, %xmm3
111 ; CHECK-NEXT: mulsd %xmm1, %xmm3
112 ; CHECK-NEXT: mulsd %xmm2, %xmm3
113 ; CHECK-NEXT: movsd %xmm3, 80(%rdi,%rax)
114 ; CHECK-NEXT: addq $8, %rax
115 ; CHECK-NEXT: jne .LBB2_1
116 ; CHECK-NEXT: # %bb.2: # %return
122 %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %loop ]
123 %s0 = shl i64 %indvar, 8
124 %indvar.i8 = ashr i64 %s0, 8
125 %t0 = getelementptr double, double* %d, i64 %indvar.i8
126 %t1 = load double, double* %t0
127 %t2 = fmul double %t1, 0.1
128 store double %t2, double* %t0
129 %s1 = shl i64 %indvar, 24
130 %indvar.i24 = ashr i64 %s1, 24
131 %t3 = getelementptr double, double* %d, i64 %indvar.i24
132 %t4 = load double, double* %t3
133 %t5 = fmul double %t4, 2.3
134 store double %t5, double* %t3
135 %t6 = getelementptr double, double* %d, i64 %indvar
136 %t7 = load double, double* %t6
137 %t8 = fmul double %t7, 4.5
138 store double %t8, double* %t6
139 %indvar.next = add i64 %indvar, 1
140 %exitcond = icmp eq i64 %indvar.next, 10
141 br i1 %exitcond, label %return, label %loop
147 define void @count_down_signed(double* %d, i64 %n) nounwind {
148 ; CHECK-LABEL: count_down_signed:
149 ; CHECK: # %bb.0: # %entry
150 ; CHECK-NEXT: movl $80, %eax
151 ; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
152 ; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
153 ; CHECK-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero
154 ; CHECK-NEXT: .p2align 4, 0x90
155 ; CHECK-NEXT: .LBB3_1: # %loop
156 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
157 ; CHECK-NEXT: movsd {{.*#+}} xmm3 = mem[0],zero
158 ; CHECK-NEXT: mulsd %xmm0, %xmm3
159 ; CHECK-NEXT: mulsd %xmm1, %xmm3
160 ; CHECK-NEXT: mulsd %xmm2, %xmm3
161 ; CHECK-NEXT: movsd %xmm3, (%rdi,%rax)
162 ; CHECK-NEXT: addq $-8, %rax
163 ; CHECK-NEXT: jne .LBB3_1
164 ; CHECK-NEXT: # %bb.2: # %return
170 %indvar = phi i64 [ 10, %entry ], [ %indvar.next, %loop ]
171 %s0 = shl i64 %indvar, 8
172 %indvar.i8 = ashr i64 %s0, 8
173 %t0 = getelementptr double, double* %d, i64 %indvar.i8
174 %t1 = load double, double* %t0
175 %t2 = fmul double %t1, 0.1
176 store double %t2, double* %t0
177 %s1 = shl i64 %indvar, 24
178 %indvar.i24 = ashr i64 %s1, 24
179 %t3 = getelementptr double, double* %d, i64 %indvar.i24
180 %t4 = load double, double* %t3
181 %t5 = fmul double %t4, 2.3
182 store double %t5, double* %t3
183 %t6 = getelementptr double, double* %d, i64 %indvar
184 %t7 = load double, double* %t6
185 %t8 = fmul double %t7, 4.5
186 store double %t8, double* %t6
187 %indvar.next = sub i64 %indvar, 1
188 %exitcond = icmp eq i64 %indvar.next, 0
189 br i1 %exitcond, label %return, label %loop
195 define void @another_count_up(double* %d, i64 %n) nounwind {
196 ; CHECK-LABEL: another_count_up:
197 ; CHECK: # %bb.0: # %entry
198 ; CHECK-NEXT: movq $-8, %rax
199 ; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
200 ; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
201 ; CHECK-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero
202 ; CHECK-NEXT: .p2align 4, 0x90
203 ; CHECK-NEXT: .LBB4_1: # %loop
204 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
205 ; CHECK-NEXT: movsd {{.*#+}} xmm3 = mem[0],zero
206 ; CHECK-NEXT: mulsd %xmm0, %xmm3
207 ; CHECK-NEXT: movsd %xmm3, 2048(%rdi,%rax)
208 ; CHECK-NEXT: movsd {{.*#+}} xmm3 = mem[0],zero
209 ; CHECK-NEXT: mulsd %xmm1, %xmm3
210 ; CHECK-NEXT: movsd %xmm3, 134217728(%rdi,%rax)
211 ; CHECK-NEXT: movsd {{.*#+}} xmm3 = mem[0],zero
212 ; CHECK-NEXT: mulsd %xmm2, %xmm3
213 ; CHECK-NEXT: movsd %xmm3, (%rdi,%rax)
214 ; CHECK-NEXT: addq $8, %rax
215 ; CHECK-NEXT: jne .LBB4_1
216 ; CHECK-NEXT: # %bb.2: # %return
222 %indvar = phi i64 [ 18446744073709551615, %entry ], [ %indvar.next, %loop ]
223 %indvar.i8 = and i64 %indvar, 255
224 %t0 = getelementptr double, double* %d, i64 %indvar.i8
225 %t1 = load double, double* %t0
226 %t2 = fmul double %t1, 0.1
227 store double %t2, double* %t0
228 %indvar.i24 = and i64 %indvar, 16777215
229 %t3 = getelementptr double, double* %d, i64 %indvar.i24
230 %t4 = load double, double* %t3
231 %t5 = fmul double %t4, 2.3
232 store double %t5, double* %t3
233 %t6 = getelementptr double, double* %d, i64 %indvar
234 %t7 = load double, double* %t6
235 %t8 = fmul double %t7, 4.5
236 store double %t8, double* %t6
237 %indvar.next = add i64 %indvar, 1
238 %exitcond = icmp eq i64 %indvar.next, 0
239 br i1 %exitcond, label %return, label %loop
245 define void @another_count_down(double* %d, i64 %n) nounwind {
246 ; CHECK-LABEL: another_count_down:
247 ; CHECK: # %bb.0: # %entry
248 ; CHECK-NEXT: movq $-2040, %rax # imm = 0xF808
249 ; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
250 ; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
251 ; CHECK-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero
252 ; CHECK-NEXT: movq %rdi, %rcx
253 ; CHECK-NEXT: movq %rdi, %rdx
254 ; CHECK-NEXT: .p2align 4, 0x90
255 ; CHECK-NEXT: .LBB5_1: # %loop
256 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
257 ; CHECK-NEXT: movsd {{.*#+}} xmm3 = mem[0],zero
258 ; CHECK-NEXT: mulsd %xmm0, %xmm3
259 ; CHECK-NEXT: movsd %xmm3, 2040(%rdi,%rax)
260 ; CHECK-NEXT: movsd {{.*#+}} xmm3 = mem[0],zero
261 ; CHECK-NEXT: divsd %xmm1, %xmm3
262 ; CHECK-NEXT: movsd %xmm3, (%rcx)
263 ; CHECK-NEXT: movsd {{.*#+}} xmm3 = mem[0],zero
264 ; CHECK-NEXT: mulsd %xmm2, %xmm3
265 ; CHECK-NEXT: movsd %xmm3, (%rdx)
266 ; CHECK-NEXT: addq $-8, %rdx
267 ; CHECK-NEXT: addq $134217720, %rcx # imm = 0x7FFFFF8
268 ; CHECK-NEXT: addq $2040, %rax # imm = 0x7F8
269 ; CHECK-NEXT: jne .LBB5_1
270 ; CHECK-NEXT: # %bb.2: # %return
276 %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %loop ]
277 %indvar.i8 = and i64 %indvar, 255
278 %t0 = getelementptr double, double* %d, i64 %indvar.i8
279 %t1 = load double, double* %t0
280 %t2 = fmul double %t1, 0.1
281 store double %t2, double* %t0
282 %indvar.i24 = and i64 %indvar, 16777215
283 %t3 = getelementptr double, double* %d, i64 %indvar.i24
284 %t4 = load double, double* %t3
285 %t5 = fdiv double %t4, 2.3
286 store double %t5, double* %t3
287 %t6 = getelementptr double, double* %d, i64 %indvar
288 %t7 = load double, double* %t6
289 %t8 = fmul double %t7, 4.5
290 store double %t8, double* %t6
291 %indvar.next = sub i64 %indvar, 1
292 %exitcond = icmp eq i64 %indvar.next, 18446744073709551615
293 br i1 %exitcond, label %return, label %loop
299 define void @another_count_up_signed(double* %d, i64 %n) nounwind {
300 ; CHECK-LABEL: another_count_up_signed:
301 ; CHECK: # %bb.0: # %entry
302 ; CHECK-NEXT: movq $-8, %rax
303 ; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
304 ; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
305 ; CHECK-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero
306 ; CHECK-NEXT: .p2align 4, 0x90
307 ; CHECK-NEXT: .LBB6_1: # %loop
308 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
309 ; CHECK-NEXT: movsd {{.*#+}} xmm3 = mem[0],zero
310 ; CHECK-NEXT: mulsd %xmm0, %xmm3
311 ; CHECK-NEXT: divsd %xmm1, %xmm3
312 ; CHECK-NEXT: mulsd %xmm2, %xmm3
313 ; CHECK-NEXT: movsd %xmm3, (%rdi,%rax)
314 ; CHECK-NEXT: addq $8, %rax
315 ; CHECK-NEXT: jne .LBB6_1
316 ; CHECK-NEXT: # %bb.2: # %return
322 %indvar = phi i64 [ 18446744073709551615, %entry ], [ %indvar.next, %loop ]
323 %s0 = shl i64 %indvar, 8
324 %indvar.i8 = ashr i64 %s0, 8
325 %t0 = getelementptr double, double* %d, i64 %indvar.i8
326 %t1 = load double, double* %t0
327 %t2 = fmul double %t1, 0.1
328 store double %t2, double* %t0
329 %s1 = shl i64 %indvar, 24
330 %indvar.i24 = ashr i64 %s1, 24
331 %t3 = getelementptr double, double* %d, i64 %indvar.i24
332 %t4 = load double, double* %t3
333 %t5 = fdiv double %t4, 2.3
334 store double %t5, double* %t3
335 %t6 = getelementptr double, double* %d, i64 %indvar
336 %t7 = load double, double* %t6
337 %t8 = fmul double %t7, 4.5
338 store double %t8, double* %t6
339 %indvar.next = add i64 %indvar, 1
340 %exitcond = icmp eq i64 %indvar.next, 0
341 br i1 %exitcond, label %return, label %loop
347 define void @another_count_down_signed(double* %d, i64 %n) nounwind {
348 ; CHECK-LABEL: another_count_down_signed:
349 ; CHECK: # %bb.0: # %entry
350 ; CHECK-NEXT: movl $8, %eax
351 ; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
352 ; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
353 ; CHECK-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero
354 ; CHECK-NEXT: .p2align 4, 0x90
355 ; CHECK-NEXT: .LBB7_1: # %loop
356 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
357 ; CHECK-NEXT: movsd {{.*#+}} xmm3 = mem[0],zero
358 ; CHECK-NEXT: mulsd %xmm0, %xmm3
359 ; CHECK-NEXT: divsd %xmm1, %xmm3
360 ; CHECK-NEXT: mulsd %xmm2, %xmm3
361 ; CHECK-NEXT: movsd %xmm3, -8(%rdi,%rax)
362 ; CHECK-NEXT: addq $-8, %rax
363 ; CHECK-NEXT: jne .LBB7_1
364 ; CHECK-NEXT: # %bb.2: # %return
370 %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %loop ]
371 %s0 = shl i64 %indvar, 8
372 %indvar.i8 = ashr i64 %s0, 8
373 %t0 = getelementptr double, double* %d, i64 %indvar.i8
374 %t1 = load double, double* %t0
375 %t2 = fmul double %t1, 0.1
376 store double %t2, double* %t0
377 %s1 = shl i64 %indvar, 24
378 %indvar.i24 = ashr i64 %s1, 24
379 %t3 = getelementptr double, double* %d, i64 %indvar.i24
380 %t4 = load double, double* %t3
381 %t5 = fdiv double %t4, 2.3
382 store double %t5, double* %t3
383 %t6 = getelementptr double, double* %d, i64 %indvar
384 %t7 = load double, double* %t6
385 %t8 = fmul double %t7, 4.5
386 store double %t8, double* %t6
387 %indvar.next = sub i64 %indvar, 1
388 %exitcond = icmp eq i64 %indvar.next, 18446744073709551615
389 br i1 %exitcond, label %return, label %loop