1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=x86_64-pc-linux -x86-cmov-converter=true -verify-machineinstrs -disable-block-placement < %s | FileCheck -allow-deprecated-dag-overlap %s
3 ; RUN: llc -mtriple=x86_64-pc-linux -x86-cmov-converter=true -x86-cmov-converter-force-all=true -verify-machineinstrs -disable-block-placement < %s | FileCheck -allow-deprecated-dag-overlap %s -check-prefix=CHECK-FORCEALL
5 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6 ;; This test checks that x86-cmov-converter optimization transform CMOV
7 ;; instruction into branches when it is profitable.
8 ;; There are 5 cases below:
9 ;; 1. CmovInCriticalPath:
10 ;; CMOV depends on the condition and it is in the hot path.
11 ;; Thus, it worths transforming.
13 ;; 2. CmovNotInCriticalPath:
14 ;; Similar test like in (1), just that CMOV is not in the hot path.
15 ;; Thus, it does not worth transforming.
18 ;; Maximum calculation algorithm that is looking for the max index,
19 ;; calculating CMOV value is cheaper than calculating CMOV condition.
20 ;; Thus, it worths transforming.
23 ;; Maximum calculation algorithm that is looking for the max value,
24 ;; calculating CMOV value is not cheaper than calculating CMOV condition.
25 ;; Thus, it does not worth transforming.
28 ;; Usually, binary search CMOV is not predicted.
29 ;; Thus, it does not worth transforming.
31 ;; Test was created using the following command line:
32 ;; > clang -S -O2 -m64 -fno-vectorize -fno-unroll-loops -emit-llvm foo.c -o -
34 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
35 ;;void CmovInHotPath(int n, int a, int b, int *c, int *d) {
36 ;; for (int i = 0; i < n; i++) {
40 ;; c[i] = (c[i] + 1) * t;
45 ;;void CmovNotInHotPath(int n, int a, int b, int *c, int *d) {
46 ;; for (int i = 0; i < n; i++) {
56 ;;int MaxIndex(int n, int *a) {
58 ;; for (int i = 1; i < n; i++) {
66 ;;int MaxValue(int n, int *a) {
68 ;; for (int i = 1; i < n; i++) {
75 ;;typedef struct Node Node;
82 ;;unsigned BinarySearch(unsigned Mask, Node *Curr, Node *Next) {
83 ;; while (Curr->Val > Next->Val) {
85 ;; if (Mask & (0x1 << Curr->Val))
86 ;; Next = Curr->Right;
94 ;;void SmallGainPerLoop(int n, int a, int b, int *c, int *d) {
95 ;; for (int i = 0; i < n; i++) {
102 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
104 %struct.Node = type { i32, ptr, ptr }
106 define void @CmovInHotPath(i32 %n, i32 %a, i32 %b, ptr nocapture %c, ptr nocapture readnone %d) #0 {
107 ; CHECK-LABEL: CmovInHotPath:
108 ; CHECK: # %bb.0: # %entry
109 ; CHECK-NEXT: testl %edi, %edi
110 ; CHECK-NEXT: jle .LBB0_5
111 ; CHECK-NEXT: # %bb.1: # %for.body.preheader
112 ; CHECK-NEXT: movl %edi, %eax
113 ; CHECK-NEXT: xorl %edi, %edi
114 ; CHECK-NEXT: .LBB0_2: # %for.body
115 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
116 ; CHECK-NEXT: movl (%rcx,%rdi,4), %r10d
117 ; CHECK-NEXT: leal 1(%r10), %r8d
118 ; CHECK-NEXT: imull %esi, %r10d
119 ; CHECK-NEXT: movl $10, %r9d
120 ; CHECK-NEXT: cmpl %edx, %r10d
121 ; CHECK-NEXT: jg .LBB0_4
122 ; CHECK-NEXT: # %bb.3: # %for.body
123 ; CHECK-NEXT: # in Loop: Header=BB0_2 Depth=1
124 ; CHECK-NEXT: movl %r8d, %r9d
125 ; CHECK-NEXT: .LBB0_4: # %for.body
126 ; CHECK-NEXT: # in Loop: Header=BB0_2 Depth=1
127 ; CHECK-NEXT: imull %r8d, %r9d
128 ; CHECK-NEXT: movl %r9d, (%rcx,%rdi,4)
129 ; CHECK-NEXT: addq $1, %rdi
130 ; CHECK-NEXT: cmpq %rdi, %rax
131 ; CHECK-NEXT: jne .LBB0_2
132 ; CHECK-NEXT: .LBB0_5: # %for.cond.cleanup
135 ; CHECK-FORCEALL-LABEL: CmovInHotPath:
136 ; CHECK-FORCEALL: # %bb.0: # %entry
137 ; CHECK-FORCEALL-NEXT: testl %edi, %edi
138 ; CHECK-FORCEALL-NEXT: jle .LBB0_5
139 ; CHECK-FORCEALL-NEXT: # %bb.1: # %for.body.preheader
140 ; CHECK-FORCEALL-NEXT: movl %edi, %eax
141 ; CHECK-FORCEALL-NEXT: xorl %edi, %edi
142 ; CHECK-FORCEALL-NEXT: .LBB0_2: # %for.body
143 ; CHECK-FORCEALL-NEXT: # =>This Inner Loop Header: Depth=1
144 ; CHECK-FORCEALL-NEXT: movl (%rcx,%rdi,4), %r10d
145 ; CHECK-FORCEALL-NEXT: leal 1(%r10), %r8d
146 ; CHECK-FORCEALL-NEXT: imull %esi, %r10d
147 ; CHECK-FORCEALL-NEXT: movl $10, %r9d
148 ; CHECK-FORCEALL-NEXT: cmpl %edx, %r10d
149 ; CHECK-FORCEALL-NEXT: jg .LBB0_4
150 ; CHECK-FORCEALL-NEXT: # %bb.3: # %for.body
151 ; CHECK-FORCEALL-NEXT: # in Loop: Header=BB0_2 Depth=1
152 ; CHECK-FORCEALL-NEXT: movl %r8d, %r9d
153 ; CHECK-FORCEALL-NEXT: .LBB0_4: # %for.body
154 ; CHECK-FORCEALL-NEXT: # in Loop: Header=BB0_2 Depth=1
155 ; CHECK-FORCEALL-NEXT: imull %r8d, %r9d
156 ; CHECK-FORCEALL-NEXT: movl %r9d, (%rcx,%rdi,4)
157 ; CHECK-FORCEALL-NEXT: addq $1, %rdi
158 ; CHECK-FORCEALL-NEXT: cmpq %rdi, %rax
159 ; CHECK-FORCEALL-NEXT: jne .LBB0_2
160 ; CHECK-FORCEALL-NEXT: .LBB0_5: # %for.cond.cleanup
161 ; CHECK-FORCEALL-NEXT: retq
163 %cmp14 = icmp sgt i32 %n, 0
164 br i1 %cmp14, label %for.body.preheader, label %for.cond.cleanup
166 for.body.preheader: ; preds = %entry
167 %wide.trip.count = zext i32 %n to i64
170 for.cond.cleanup: ; preds = %for.body, %entry
173 for.body: ; preds = %for.body.preheader, %for.body
174 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
175 %arrayidx = getelementptr inbounds i32, ptr %c, i64 %indvars.iv
176 %0 = load i32, ptr %arrayidx, align 4
177 %add = add nsw i32 %0, 1
178 %mul = mul nsw i32 %0, %a
179 %cmp3 = icmp sgt i32 %mul, %b
180 %. = select i1 %cmp3, i32 10, i32 %add
181 %mul7 = mul nsw i32 %., %add
182 store i32 %mul7, ptr %arrayidx, align 4
183 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
184 %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
185 br i1 %exitcond, label %for.cond.cleanup, label %for.body
188 define void @CmovNotInHotPath(i32 %n, i32 %a, i32 %b, ptr nocapture %c, ptr nocapture %d) #0 {
189 ; CHECK-LABEL: CmovNotInHotPath:
190 ; CHECK: # %bb.0: # %entry
191 ; CHECK-NEXT: testl %edi, %edi
192 ; CHECK-NEXT: jle .LBB1_3
193 ; CHECK-NEXT: # %bb.1: # %for.body.preheader
194 ; CHECK-NEXT: movl %edx, %r9d
195 ; CHECK-NEXT: movl %edi, %edi
196 ; CHECK-NEXT: xorl %r10d, %r10d
197 ; CHECK-NEXT: movl $10, %r11d
198 ; CHECK-NEXT: .LBB1_2: # %for.body
199 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
200 ; CHECK-NEXT: movl (%rcx,%r10,4), %eax
201 ; CHECK-NEXT: movl %eax, %edx
202 ; CHECK-NEXT: imull %esi, %edx
203 ; CHECK-NEXT: cmpl %r9d, %edx
204 ; CHECK-NEXT: cmovgl %r11d, %eax
205 ; CHECK-NEXT: movl %eax, (%rcx,%r10,4)
206 ; CHECK-NEXT: movl (%r8,%r10,4), %eax
208 ; CHECK-NEXT: idivl %r9d
209 ; CHECK-NEXT: movl %eax, (%r8,%r10,4)
210 ; CHECK-NEXT: addq $1, %r10
211 ; CHECK-NEXT: cmpq %r10, %rdi
212 ; CHECK-NEXT: jne .LBB1_2
213 ; CHECK-NEXT: .LBB1_3: # %for.cond.cleanup
216 ; CHECK-FORCEALL-LABEL: CmovNotInHotPath:
217 ; CHECK-FORCEALL: # %bb.0: # %entry
218 ; CHECK-FORCEALL-NEXT: testl %edi, %edi
219 ; CHECK-FORCEALL-NEXT: jle .LBB1_5
220 ; CHECK-FORCEALL-NEXT: # %bb.1: # %for.body.preheader
221 ; CHECK-FORCEALL-NEXT: movl %edx, %r9d
222 ; CHECK-FORCEALL-NEXT: movl %edi, %edi
223 ; CHECK-FORCEALL-NEXT: xorl %r10d, %r10d
224 ; CHECK-FORCEALL-NEXT: .LBB1_2: # %for.body
225 ; CHECK-FORCEALL-NEXT: # =>This Inner Loop Header: Depth=1
226 ; CHECK-FORCEALL-NEXT: movl (%rcx,%r10,4), %eax
227 ; CHECK-FORCEALL-NEXT: movl %eax, %r11d
228 ; CHECK-FORCEALL-NEXT: imull %esi, %r11d
229 ; CHECK-FORCEALL-NEXT: movl $10, %edx
230 ; CHECK-FORCEALL-NEXT: cmpl %r9d, %r11d
231 ; CHECK-FORCEALL-NEXT: jg .LBB1_4
232 ; CHECK-FORCEALL-NEXT: # %bb.3: # %for.body
233 ; CHECK-FORCEALL-NEXT: # in Loop: Header=BB1_2 Depth=1
234 ; CHECK-FORCEALL-NEXT: movl %eax, %edx
235 ; CHECK-FORCEALL-NEXT: .LBB1_4: # %for.body
236 ; CHECK-FORCEALL-NEXT: # in Loop: Header=BB1_2 Depth=1
237 ; CHECK-FORCEALL-NEXT: movl %edx, (%rcx,%r10,4)
238 ; CHECK-FORCEALL-NEXT: movl (%r8,%r10,4), %eax
239 ; CHECK-FORCEALL-NEXT: cltd
240 ; CHECK-FORCEALL-NEXT: idivl %r9d
241 ; CHECK-FORCEALL-NEXT: movl %eax, (%r8,%r10,4)
242 ; CHECK-FORCEALL-NEXT: addq $1, %r10
243 ; CHECK-FORCEALL-NEXT: cmpq %r10, %rdi
244 ; CHECK-FORCEALL-NEXT: jne .LBB1_2
245 ; CHECK-FORCEALL-NEXT: .LBB1_5: # %for.cond.cleanup
246 ; CHECK-FORCEALL-NEXT: retq
248 %cmp18 = icmp sgt i32 %n, 0
249 br i1 %cmp18, label %for.body.preheader, label %for.cond.cleanup
251 for.body.preheader: ; preds = %entry
252 %wide.trip.count = zext i32 %n to i64
255 for.cond.cleanup: ; preds = %for.body, %entry
258 for.body: ; preds = %for.body.preheader, %for.body
259 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
260 %arrayidx = getelementptr inbounds i32, ptr %c, i64 %indvars.iv
261 %0 = load i32, ptr %arrayidx, align 4
262 %mul = mul nsw i32 %0, %a
263 %cmp3 = icmp sgt i32 %mul, %b
264 %. = select i1 %cmp3, i32 10, i32 %0
265 store i32 %., ptr %arrayidx, align 4
266 %arrayidx7 = getelementptr inbounds i32, ptr %d, i64 %indvars.iv
267 %1 = load i32, ptr %arrayidx7, align 4
268 %div = sdiv i32 %1, %b
269 store i32 %div, ptr %arrayidx7, align 4
270 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
271 %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
272 br i1 %exitcond, label %for.cond.cleanup, label %for.body
275 define i32 @MaxIndex(i32 %n, ptr nocapture readonly %a) #0 {
276 ; CHECK-LABEL: MaxIndex:
277 ; CHECK: # %bb.0: # %entry
278 ; CHECK-NEXT: xorl %eax, %eax
279 ; CHECK-NEXT: cmpl $2, %edi
280 ; CHECK-NEXT: jl .LBB2_5
281 ; CHECK-NEXT: # %bb.1: # %for.body.preheader
282 ; CHECK-NEXT: movl %edi, %ecx
283 ; CHECK-NEXT: xorl %edi, %edi
284 ; CHECK-NEXT: movl $1, %edx
285 ; CHECK-NEXT: .LBB2_2: # %for.body
286 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
287 ; CHECK-NEXT: movl (%rsi,%rdx,4), %r8d
288 ; CHECK-NEXT: movslq %edi, %r9
289 ; CHECK-NEXT: movl %edx, %eax
290 ; CHECK-NEXT: cmpl (%rsi,%r9,4), %r8d
291 ; CHECK-NEXT: jg .LBB2_4
292 ; CHECK-NEXT: # %bb.3: # %for.body
293 ; CHECK-NEXT: # in Loop: Header=BB2_2 Depth=1
294 ; CHECK-NEXT: movl %edi, %eax
295 ; CHECK-NEXT: .LBB2_4: # %for.body
296 ; CHECK-NEXT: # in Loop: Header=BB2_2 Depth=1
297 ; CHECK-NEXT: addq $1, %rdx
298 ; CHECK-NEXT: movl %eax, %edi
299 ; CHECK-NEXT: cmpq %rdx, %rcx
300 ; CHECK-NEXT: jne .LBB2_2
301 ; CHECK-NEXT: .LBB2_5: # %for.cond.cleanup
304 ; CHECK-FORCEALL-LABEL: MaxIndex:
305 ; CHECK-FORCEALL: # %bb.0: # %entry
306 ; CHECK-FORCEALL-NEXT: xorl %eax, %eax
307 ; CHECK-FORCEALL-NEXT: cmpl $2, %edi
308 ; CHECK-FORCEALL-NEXT: jl .LBB2_5
309 ; CHECK-FORCEALL-NEXT: # %bb.1: # %for.body.preheader
310 ; CHECK-FORCEALL-NEXT: movl %edi, %ecx
311 ; CHECK-FORCEALL-NEXT: xorl %edi, %edi
312 ; CHECK-FORCEALL-NEXT: movl $1, %edx
313 ; CHECK-FORCEALL-NEXT: .LBB2_2: # %for.body
314 ; CHECK-FORCEALL-NEXT: # =>This Inner Loop Header: Depth=1
315 ; CHECK-FORCEALL-NEXT: movl (%rsi,%rdx,4), %r8d
316 ; CHECK-FORCEALL-NEXT: movslq %edi, %r9
317 ; CHECK-FORCEALL-NEXT: movl %edx, %eax
318 ; CHECK-FORCEALL-NEXT: cmpl (%rsi,%r9,4), %r8d
319 ; CHECK-FORCEALL-NEXT: jg .LBB2_4
320 ; CHECK-FORCEALL-NEXT: # %bb.3: # %for.body
321 ; CHECK-FORCEALL-NEXT: # in Loop: Header=BB2_2 Depth=1
322 ; CHECK-FORCEALL-NEXT: movl %edi, %eax
323 ; CHECK-FORCEALL-NEXT: .LBB2_4: # %for.body
324 ; CHECK-FORCEALL-NEXT: # in Loop: Header=BB2_2 Depth=1
325 ; CHECK-FORCEALL-NEXT: addq $1, %rdx
326 ; CHECK-FORCEALL-NEXT: movl %eax, %edi
327 ; CHECK-FORCEALL-NEXT: cmpq %rdx, %rcx
328 ; CHECK-FORCEALL-NEXT: jne .LBB2_2
329 ; CHECK-FORCEALL-NEXT: .LBB2_5: # %for.cond.cleanup
330 ; CHECK-FORCEALL-NEXT: retq
332 %cmp14 = icmp sgt i32 %n, 1
333 br i1 %cmp14, label %for.body.preheader, label %for.cond.cleanup
335 for.body.preheader: ; preds = %entry
336 %wide.trip.count = zext i32 %n to i64
339 for.cond.cleanup: ; preds = %for.body, %entry
340 %t.0.lcssa = phi i32 [ 0, %entry ], [ %i.0.t.0, %for.body ]
343 for.body: ; preds = %for.body.preheader, %for.body
344 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 1, %for.body.preheader ]
345 %t.015 = phi i32 [ %i.0.t.0, %for.body ], [ 0, %for.body.preheader ]
346 %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
347 %0 = load i32, ptr %arrayidx, align 4
348 %idxprom1 = sext i32 %t.015 to i64
349 %arrayidx2 = getelementptr inbounds i32, ptr %a, i64 %idxprom1
350 %1 = load i32, ptr %arrayidx2, align 4
351 %cmp3 = icmp sgt i32 %0, %1
352 %2 = trunc i64 %indvars.iv to i32
353 %i.0.t.0 = select i1 %cmp3, i32 %2, i32 %t.015
354 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
355 %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
356 br i1 %exitcond, label %for.cond.cleanup, label %for.body
359 ; If cmov instruction is marked as unpredictable, do not convert it to branch.
360 define i32 @MaxIndex_unpredictable(i32 %n, ptr nocapture readonly %a) #0 {
361 ; CHECK-LABEL: MaxIndex_unpredictable:
362 ; CHECK: # %bb.0: # %entry
363 ; CHECK-NEXT: xorl %eax, %eax
364 ; CHECK-NEXT: cmpl $2, %edi
365 ; CHECK-NEXT: jl .LBB3_3
366 ; CHECK-NEXT: # %bb.1: # %for.body.preheader
367 ; CHECK-NEXT: movl %edi, %ecx
368 ; CHECK-NEXT: xorl %eax, %eax
369 ; CHECK-NEXT: movl $1, %edx
370 ; CHECK-NEXT: .LBB3_2: # %for.body
371 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
372 ; CHECK-NEXT: movl (%rsi,%rdx,4), %edi
374 ; CHECK-NEXT: cmpl (%rsi,%rax,4), %edi
375 ; CHECK-NEXT: cmovgl %edx, %eax
376 ; CHECK-NEXT: addq $1, %rdx
377 ; CHECK-NEXT: cmpq %rdx, %rcx
378 ; CHECK-NEXT: jne .LBB3_2
379 ; CHECK-NEXT: .LBB3_3: # %for.cond.cleanup
380 ; CHECK-NEXT: # kill: def $eax killed $eax killed $rax
383 ; CHECK-FORCEALL-LABEL: MaxIndex_unpredictable:
384 ; CHECK-FORCEALL: # %bb.0: # %entry
385 ; CHECK-FORCEALL-NEXT: xorl %eax, %eax
386 ; CHECK-FORCEALL-NEXT: cmpl $2, %edi
387 ; CHECK-FORCEALL-NEXT: jl .LBB3_3
388 ; CHECK-FORCEALL-NEXT: # %bb.1: # %for.body.preheader
389 ; CHECK-FORCEALL-NEXT: movl %edi, %ecx
390 ; CHECK-FORCEALL-NEXT: xorl %eax, %eax
391 ; CHECK-FORCEALL-NEXT: movl $1, %edx
392 ; CHECK-FORCEALL-NEXT: .LBB3_2: # %for.body
393 ; CHECK-FORCEALL-NEXT: # =>This Inner Loop Header: Depth=1
394 ; CHECK-FORCEALL-NEXT: movl (%rsi,%rdx,4), %edi
395 ; CHECK-FORCEALL-NEXT: cltq
396 ; CHECK-FORCEALL-NEXT: cmpl (%rsi,%rax,4), %edi
397 ; CHECK-FORCEALL-NEXT: cmovgl %edx, %eax
398 ; CHECK-FORCEALL-NEXT: addq $1, %rdx
399 ; CHECK-FORCEALL-NEXT: cmpq %rdx, %rcx
400 ; CHECK-FORCEALL-NEXT: jne .LBB3_2
401 ; CHECK-FORCEALL-NEXT: .LBB3_3: # %for.cond.cleanup
402 ; CHECK-FORCEALL-NEXT: # kill: def $eax killed $eax killed $rax
403 ; CHECK-FORCEALL-NEXT: retq
405 %cmp14 = icmp sgt i32 %n, 1
406 br i1 %cmp14, label %for.body.preheader, label %for.cond.cleanup
408 for.body.preheader: ; preds = %entry
409 %wide.trip.count = zext i32 %n to i64
412 for.cond.cleanup: ; preds = %for.body, %entry
413 %t.0.lcssa = phi i32 [ 0, %entry ], [ %i.0.t.0, %for.body ]
416 for.body: ; preds = %for.body.preheader, %for.body
417 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 1, %for.body.preheader ]
418 %t.015 = phi i32 [ %i.0.t.0, %for.body ], [ 0, %for.body.preheader ]
419 %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
420 %0 = load i32, ptr %arrayidx, align 4
421 %idxprom1 = sext i32 %t.015 to i64
422 %arrayidx2 = getelementptr inbounds i32, ptr %a, i64 %idxprom1
423 %1 = load i32, ptr %arrayidx2, align 4
424 %cmp3 = icmp sgt i32 %0, %1
425 %2 = trunc i64 %indvars.iv to i32
426 %i.0.t.0 = select i1 %cmp3, i32 %2, i32 %t.015, !unpredictable !0
427 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
428 %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
429 br i1 %exitcond, label %for.cond.cleanup, label %for.body
432 define i32 @MaxValue(i32 %n, ptr nocapture readonly %a) #0 {
433 ; CHECK-LABEL: MaxValue:
434 ; CHECK: # %bb.0: # %entry
435 ; CHECK-NEXT: movl (%rsi), %eax
436 ; CHECK-NEXT: cmpl $2, %edi
437 ; CHECK-NEXT: jl .LBB4_3
438 ; CHECK-NEXT: # %bb.1: # %for.body.preheader
439 ; CHECK-NEXT: movl %edi, %ecx
440 ; CHECK-NEXT: movl $1, %edx
441 ; CHECK-NEXT: .LBB4_2: # %for.body
442 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
443 ; CHECK-NEXT: movl (%rsi,%rdx,4), %edi
444 ; CHECK-NEXT: cmpl %eax, %edi
445 ; CHECK-NEXT: cmovgl %edi, %eax
446 ; CHECK-NEXT: addq $1, %rdx
447 ; CHECK-NEXT: cmpq %rdx, %rcx
448 ; CHECK-NEXT: jne .LBB4_2
449 ; CHECK-NEXT: .LBB4_3: # %for.cond.cleanup
452 ; CHECK-FORCEALL-LABEL: MaxValue:
453 ; CHECK-FORCEALL: # %bb.0: # %entry
454 ; CHECK-FORCEALL-NEXT: movl (%rsi), %r8d
455 ; CHECK-FORCEALL-NEXT: cmpl $2, %edi
456 ; CHECK-FORCEALL-NEXT: jge .LBB4_3
457 ; CHECK-FORCEALL-NEXT: # %bb.1:
458 ; CHECK-FORCEALL-NEXT: movl %r8d, %eax
459 ; CHECK-FORCEALL-NEXT: .LBB4_2: # %for.cond.cleanup
460 ; CHECK-FORCEALL-NEXT: retq
461 ; CHECK-FORCEALL-NEXT: .LBB4_3: # %for.body.preheader
462 ; CHECK-FORCEALL-NEXT: movl %edi, %ecx
463 ; CHECK-FORCEALL-NEXT: movl $1, %edx
464 ; CHECK-FORCEALL-NEXT: .LBB4_4: # %for.body
465 ; CHECK-FORCEALL-NEXT: # =>This Inner Loop Header: Depth=1
466 ; CHECK-FORCEALL-NEXT: movl (%rsi,%rdx,4), %eax
467 ; CHECK-FORCEALL-NEXT: cmpl %r8d, %eax
468 ; CHECK-FORCEALL-NEXT: jg .LBB4_6
469 ; CHECK-FORCEALL-NEXT: # %bb.5: # %for.body
470 ; CHECK-FORCEALL-NEXT: # in Loop: Header=BB4_4 Depth=1
471 ; CHECK-FORCEALL-NEXT: movl %r8d, %eax
472 ; CHECK-FORCEALL-NEXT: .LBB4_6: # %for.body
473 ; CHECK-FORCEALL-NEXT: # in Loop: Header=BB4_4 Depth=1
474 ; CHECK-FORCEALL-NEXT: addq $1, %rdx
475 ; CHECK-FORCEALL-NEXT: movl %eax, %r8d
476 ; CHECK-FORCEALL-NEXT: cmpq %rdx, %rcx
477 ; CHECK-FORCEALL-NEXT: je .LBB4_2
478 ; CHECK-FORCEALL-NEXT: jmp .LBB4_4
480 %0 = load i32, ptr %a, align 4
481 %cmp13 = icmp sgt i32 %n, 1
482 br i1 %cmp13, label %for.body.preheader, label %for.cond.cleanup
484 for.body.preheader: ; preds = %entry
485 %wide.trip.count = zext i32 %n to i64
488 for.cond.cleanup: ; preds = %for.body, %entry
489 %t.0.lcssa = phi i32 [ %0, %entry ], [ %.t.0, %for.body ]
492 for.body: ; preds = %for.body.preheader, %for.body
493 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 1, %for.body.preheader ]
494 %t.014 = phi i32 [ %.t.0, %for.body ], [ %0, %for.body.preheader ]
495 %arrayidx1 = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
496 %1 = load i32, ptr %arrayidx1, align 4
497 %cmp2 = icmp sgt i32 %1, %t.014
498 %.t.0 = select i1 %cmp2, i32 %1, i32 %t.014
499 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
500 %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
501 br i1 %exitcond, label %for.cond.cleanup, label %for.body
504 define i32 @BinarySearch(i32 %Mask, ptr nocapture readonly %Curr, ptr nocapture readonly %Next) #0 {
505 ; CHECK-LABEL: BinarySearch:
506 ; CHECK: # %bb.0: # %entry
507 ; CHECK-NEXT: movl (%rsi), %eax
508 ; CHECK-NEXT: jmp .LBB5_2
509 ; CHECK-NEXT: .LBB5_1: # %while.body
510 ; CHECK-NEXT: # in Loop: Header=BB5_2 Depth=1
511 ; CHECK-NEXT: movl %ecx, %eax
512 ; CHECK-NEXT: xorl %ecx, %ecx
513 ; CHECK-NEXT: btl %eax, %edi
514 ; CHECK-NEXT: setae %cl
515 ; CHECK-NEXT: movq 8(%rdx,%rcx,8), %rdx
516 ; CHECK-NEXT: .LBB5_2: # %while.body
517 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
518 ; CHECK-NEXT: movl (%rdx), %ecx
519 ; CHECK-NEXT: cmpl %ecx, %eax
520 ; CHECK-NEXT: ja .LBB5_1
521 ; CHECK-NEXT: # %bb.3: # %while.end
524 ; CHECK-FORCEALL-LABEL: BinarySearch:
525 ; CHECK-FORCEALL: # %bb.0: # %entry
526 ; CHECK-FORCEALL-NEXT: movl (%rsi), %eax
527 ; CHECK-FORCEALL-NEXT: jmp .LBB5_2
528 ; CHECK-FORCEALL-NEXT: .LBB5_1: # %while.body
529 ; CHECK-FORCEALL-NEXT: # in Loop: Header=BB5_2 Depth=1
530 ; CHECK-FORCEALL-NEXT: movl %ecx, %eax
531 ; CHECK-FORCEALL-NEXT: xorl %ecx, %ecx
532 ; CHECK-FORCEALL-NEXT: btl %eax, %edi
533 ; CHECK-FORCEALL-NEXT: setae %cl
534 ; CHECK-FORCEALL-NEXT: movq 8(%rdx,%rcx,8), %rdx
535 ; CHECK-FORCEALL-NEXT: .LBB5_2: # %while.body
536 ; CHECK-FORCEALL-NEXT: # =>This Inner Loop Header: Depth=1
537 ; CHECK-FORCEALL-NEXT: movl (%rdx), %ecx
538 ; CHECK-FORCEALL-NEXT: cmpl %ecx, %eax
539 ; CHECK-FORCEALL-NEXT: ja .LBB5_1
540 ; CHECK-FORCEALL-NEXT: # %bb.3: # %while.end
541 ; CHECK-FORCEALL-NEXT: retq
543 %0 = load i32, ptr %Curr, align 8
544 %1 = load i32, ptr %Next, align 8
545 %cmp10 = icmp ugt i32 %0, %1
546 br i1 %cmp10, label %while.body, label %while.end
548 while.body: ; preds = %entry, %while.body
549 %2 = phi i32 [ %4, %while.body ], [ %1, %entry ]
550 %Next.addr.011 = phi ptr [ %3, %while.body ], [ %Next, %entry ]
552 %and = and i32 %shl, %Mask
553 %tobool = icmp eq i32 %and, 0
554 %Left = getelementptr inbounds %struct.Node, ptr %Next.addr.011, i64 0, i32 2
555 %Right = getelementptr inbounds %struct.Node, ptr %Next.addr.011, i64 0, i32 1
556 %Left.sink = select i1 %tobool, ptr %Left, ptr %Right
557 %3 = load ptr, ptr %Left.sink, align 8
558 %4 = load i32, ptr %3, align 8
559 %cmp = icmp ugt i32 %2, %4
560 br i1 %cmp, label %while.body, label %while.end
562 while.end: ; preds = %while.body, %entry
563 %.lcssa = phi i32 [ %0, %entry ], [ %2, %while.body ]
567 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
568 ;; The following test checks that x86-cmov-converter optimization transforms
569 ;; CMOV instructions into branch correctly.
573 ;; v1 = CMOVgt t1, f1, cond
574 ;; v2 = CMOVle s1, f2, cond
576 ;; Where: t1 = 11, f1 = 22, f2 = a
578 ;; After CMOV transformation
579 ;; -------------------------
588 ;; %v1 = phi[%f1, %FalseMBB], [%t1, %MBB]
589 ;; %v2 = phi[%f1, %FalseMBB], [%f2, %MBB] ; For CMOV with OppCC switch
590 ;; ; true-value with false-value
591 ;; ; Phi instruction cannot use
592 ;; ; previous Phi instruction result
593 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
595 define void @Transform(ptr%arr, ptr%arr2, i32 %a, i32 %b, i32 %c, i32 %n) #0 {
596 ; CHECK-LABEL: Transform:
597 ; CHECK: # %bb.0: # %entry
598 ; CHECK-NEXT: movb $1, %al
599 ; CHECK-NEXT: testb %al, %al
600 ; CHECK-NEXT: jne .LBB6_5
601 ; CHECK-NEXT: # %bb.1: # %while.body.preheader
602 ; CHECK-NEXT: movl %edx, %ecx
603 ; CHECK-NEXT: xorl %esi, %esi
604 ; CHECK-NEXT: .LBB6_2: # %while.body
605 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
606 ; CHECK-NEXT: movslq %esi, %rsi
607 ; CHECK-NEXT: movl (%rdi,%rsi,4), %eax
608 ; CHECK-NEXT: xorl %edx, %edx
609 ; CHECK-NEXT: divl %ecx
610 ; CHECK-NEXT: movl %eax, %edx
611 ; CHECK-NEXT: movl $11, %eax
612 ; CHECK-NEXT: movl %ecx, %r8d
613 ; CHECK-NEXT: cmpl %ecx, %edx
614 ; CHECK-NEXT: ja .LBB6_4
615 ; CHECK-NEXT: # %bb.3: # %while.body
616 ; CHECK-NEXT: # in Loop: Header=BB6_2 Depth=1
617 ; CHECK-NEXT: movl $22, %eax
618 ; CHECK-NEXT: movl $22, %r8d
619 ; CHECK-NEXT: .LBB6_4: # %while.body
620 ; CHECK-NEXT: # in Loop: Header=BB6_2 Depth=1
621 ; CHECK-NEXT: xorl %edx, %edx
622 ; CHECK-NEXT: divl %r8d
623 ; CHECK-NEXT: movl %edx, (%rdi,%rsi,4)
624 ; CHECK-NEXT: addl $1, %esi
625 ; CHECK-NEXT: cmpl %r9d, %esi
626 ; CHECK-NEXT: ja .LBB6_2
627 ; CHECK-NEXT: .LBB6_5: # %while.end
630 ; CHECK-FORCEALL-LABEL: Transform:
631 ; CHECK-FORCEALL: # %bb.0: # %entry
632 ; CHECK-FORCEALL-NEXT: movb $1, %al
633 ; CHECK-FORCEALL-NEXT: testb %al, %al
634 ; CHECK-FORCEALL-NEXT: jne .LBB6_5
635 ; CHECK-FORCEALL-NEXT: # %bb.1: # %while.body.preheader
636 ; CHECK-FORCEALL-NEXT: movl %edx, %ecx
637 ; CHECK-FORCEALL-NEXT: xorl %esi, %esi
638 ; CHECK-FORCEALL-NEXT: .LBB6_2: # %while.body
639 ; CHECK-FORCEALL-NEXT: # =>This Inner Loop Header: Depth=1
640 ; CHECK-FORCEALL-NEXT: movslq %esi, %rsi
641 ; CHECK-FORCEALL-NEXT: movl (%rdi,%rsi,4), %eax
642 ; CHECK-FORCEALL-NEXT: xorl %edx, %edx
643 ; CHECK-FORCEALL-NEXT: divl %ecx
644 ; CHECK-FORCEALL-NEXT: movl %eax, %edx
645 ; CHECK-FORCEALL-NEXT: movl $11, %eax
646 ; CHECK-FORCEALL-NEXT: movl %ecx, %r8d
647 ; CHECK-FORCEALL-NEXT: cmpl %ecx, %edx
648 ; CHECK-FORCEALL-NEXT: ja .LBB6_4
649 ; CHECK-FORCEALL-NEXT: # %bb.3: # %while.body
650 ; CHECK-FORCEALL-NEXT: # in Loop: Header=BB6_2 Depth=1
651 ; CHECK-FORCEALL-NEXT: movl $22, %eax
652 ; CHECK-FORCEALL-NEXT: movl $22, %r8d
653 ; CHECK-FORCEALL-NEXT: .LBB6_4: # %while.body
654 ; CHECK-FORCEALL-NEXT: # in Loop: Header=BB6_2 Depth=1
655 ; CHECK-FORCEALL-NEXT: xorl %edx, %edx
656 ; CHECK-FORCEALL-NEXT: divl %r8d
657 ; CHECK-FORCEALL-NEXT: movl %edx, (%rdi,%rsi,4)
658 ; CHECK-FORCEALL-NEXT: addl $1, %esi
659 ; CHECK-FORCEALL-NEXT: cmpl %r9d, %esi
660 ; CHECK-FORCEALL-NEXT: ja .LBB6_2
661 ; CHECK-FORCEALL-NEXT: .LBB6_5: # %while.end
662 ; CHECK-FORCEALL-NEXT: retq
664 %cmp10 = icmp ugt i32 0, %n
665 br i1 %cmp10, label %while.body, label %while.end
667 while.body: ; preds = %entry, %while.body
668 %i = phi i32 [ %i_inc, %while.body ], [ 0, %entry ]
669 %arr_i = getelementptr inbounds i32, ptr %arr, i32 %i
670 %x = load i32, ptr %arr_i, align 4
671 %div = udiv i32 %x, %a
672 %cond = icmp ugt i32 %div, %a
673 %condOpp = icmp ule i32 %div, %a
674 %s1 = select i1 %cond, i32 11, i32 22
675 %s2 = select i1 %condOpp, i32 %s1, i32 %a
676 %sum = urem i32 %s1, %s2
677 store i32 %sum, ptr %arr_i, align 4
678 %i_inc = add i32 %i, 1
679 %cmp = icmp ugt i32 %i_inc, %n
680 br i1 %cmp, label %while.body, label %while.end
682 while.end: ; preds = %while.body, %entry
686 ; Test that we always will convert a cmov with a memory operand into a branch,
687 ; even outside of a loop.
688 define i32 @test_cmov_memoperand(i32 %a, i32 %b, i32 %x, ptr %y) #0 {
689 ; CHECK-LABEL: test_cmov_memoperand:
690 ; CHECK: # %bb.0: # %entry
691 ; CHECK-NEXT: movl %edx, %eax
692 ; CHECK-NEXT: cmpl %esi, %edi
693 ; CHECK-NEXT: ja .LBB7_2
694 ; CHECK-NEXT: # %bb.1: # %entry
695 ; CHECK-NEXT: movl (%rcx), %eax
696 ; CHECK-NEXT: .LBB7_2: # %entry
699 ; CHECK-FORCEALL-LABEL: test_cmov_memoperand:
700 ; CHECK-FORCEALL: # %bb.0: # %entry
701 ; CHECK-FORCEALL-NEXT: movl %edx, %eax
702 ; CHECK-FORCEALL-NEXT: cmpl %esi, %edi
703 ; CHECK-FORCEALL-NEXT: ja .LBB7_2
704 ; CHECK-FORCEALL-NEXT: # %bb.1: # %entry
705 ; CHECK-FORCEALL-NEXT: movl (%rcx), %eax
706 ; CHECK-FORCEALL-NEXT: .LBB7_2: # %entry
707 ; CHECK-FORCEALL-NEXT: retq
709 %cond = icmp ugt i32 %a, %b
710 %load = load i32, ptr %y
711 %z = select i1 %cond, i32 %x, i32 %load
715 ; If cmov instruction is marked as unpredictable, do not convert it to branch.
716 define i32 @test_cmov_memoperand_unpredictable(i32 %a, i32 %b, i32 %x, ptr %y) #0 {
717 ; CHECK-LABEL: test_cmov_memoperand_unpredictable:
718 ; CHECK: # %bb.0: # %entry
719 ; CHECK-NEXT: movl %edx, %eax
720 ; CHECK-NEXT: cmpl %esi, %edi
721 ; CHECK-NEXT: cmovbel (%rcx), %eax
724 ; CHECK-FORCEALL-LABEL: test_cmov_memoperand_unpredictable:
725 ; CHECK-FORCEALL: # %bb.0: # %entry
726 ; CHECK-FORCEALL-NEXT: movl %edx, %eax
727 ; CHECK-FORCEALL-NEXT: cmpl %esi, %edi
728 ; CHECK-FORCEALL-NEXT: cmovbel (%rcx), %eax
729 ; CHECK-FORCEALL-NEXT: retq
731 %cond = icmp ugt i32 %a, %b
732 %load = load i32, ptr %y
733 %z = select i1 %cond, i32 %x, i32 %load, !unpredictable !0
737 ; Test that we can convert a group of cmovs where only one has a memory
739 define i32 @test_cmov_memoperand_in_group(i32 %a, i32 %b, i32 %x, ptr %y.ptr) #0 {
740 ; CHECK-LABEL: test_cmov_memoperand_in_group:
741 ; CHECK: # %bb.0: # %entry
742 ; CHECK-NEXT: movl %edx, %eax
743 ; CHECK-NEXT: movl %edx, %r8d
744 ; CHECK-NEXT: cmpl %esi, %edi
745 ; CHECK-NEXT: ja .LBB9_2
746 ; CHECK-NEXT: # %bb.1: # %entry
747 ; CHECK-NEXT: movl (%rcx), %edx
748 ; CHECK-NEXT: movl %edi, %eax
749 ; CHECK-NEXT: movl %esi, %r8d
750 ; CHECK-NEXT: .LBB9_2: # %entry
751 ; CHECK-NEXT: addl %r8d, %eax
752 ; CHECK-NEXT: addl %edx, %eax
755 ; CHECK-FORCEALL-LABEL: test_cmov_memoperand_in_group:
756 ; CHECK-FORCEALL: # %bb.0: # %entry
757 ; CHECK-FORCEALL-NEXT: movl %edx, %eax
758 ; CHECK-FORCEALL-NEXT: movl %edx, %r8d
759 ; CHECK-FORCEALL-NEXT: cmpl %esi, %edi
760 ; CHECK-FORCEALL-NEXT: ja .LBB9_2
761 ; CHECK-FORCEALL-NEXT: # %bb.1: # %entry
762 ; CHECK-FORCEALL-NEXT: movl (%rcx), %edx
763 ; CHECK-FORCEALL-NEXT: movl %edi, %eax
764 ; CHECK-FORCEALL-NEXT: movl %esi, %r8d
765 ; CHECK-FORCEALL-NEXT: .LBB9_2: # %entry
766 ; CHECK-FORCEALL-NEXT: addl %r8d, %eax
767 ; CHECK-FORCEALL-NEXT: addl %edx, %eax
768 ; CHECK-FORCEALL-NEXT: retq
770 %cond = icmp ugt i32 %a, %b
771 %y = load i32, ptr %y.ptr
772 %z1 = select i1 %cond, i32 %x, i32 %a
773 %z2 = select i1 %cond, i32 %x, i32 %y
774 %z3 = select i1 %cond, i32 %x, i32 %b
775 %s1 = add i32 %z1, %z2
776 %s2 = add i32 %s1, %z3
780 ; Same as before but with operands reversed in the select with a load.
781 define i32 @test_cmov_memoperand_in_group2(i32 %a, i32 %b, i32 %x, ptr %y.ptr) #0 {
782 ; CHECK-LABEL: test_cmov_memoperand_in_group2:
783 ; CHECK: # %bb.0: # %entry
784 ; CHECK-NEXT: movl %edx, %eax
785 ; CHECK-NEXT: movl %edx, %r8d
786 ; CHECK-NEXT: cmpl %esi, %edi
787 ; CHECK-NEXT: jbe .LBB10_2
788 ; CHECK-NEXT: # %bb.1: # %entry
789 ; CHECK-NEXT: movl (%rcx), %edx
790 ; CHECK-NEXT: movl %edi, %eax
791 ; CHECK-NEXT: movl %esi, %r8d
792 ; CHECK-NEXT: .LBB10_2: # %entry
793 ; CHECK-NEXT: addl %r8d, %eax
794 ; CHECK-NEXT: addl %edx, %eax
797 ; CHECK-FORCEALL-LABEL: test_cmov_memoperand_in_group2:
798 ; CHECK-FORCEALL: # %bb.0: # %entry
799 ; CHECK-FORCEALL-NEXT: movl %edx, %eax
800 ; CHECK-FORCEALL-NEXT: movl %edx, %r8d
801 ; CHECK-FORCEALL-NEXT: cmpl %esi, %edi
802 ; CHECK-FORCEALL-NEXT: jbe .LBB10_2
803 ; CHECK-FORCEALL-NEXT: # %bb.1: # %entry
804 ; CHECK-FORCEALL-NEXT: movl (%rcx), %edx
805 ; CHECK-FORCEALL-NEXT: movl %edi, %eax
806 ; CHECK-FORCEALL-NEXT: movl %esi, %r8d
807 ; CHECK-FORCEALL-NEXT: .LBB10_2: # %entry
808 ; CHECK-FORCEALL-NEXT: addl %r8d, %eax
809 ; CHECK-FORCEALL-NEXT: addl %edx, %eax
810 ; CHECK-FORCEALL-NEXT: retq
812 %cond = icmp ugt i32 %a, %b
813 %y = load i32, ptr %y.ptr
814 %z2 = select i1 %cond, i32 %a, i32 %x
815 %z1 = select i1 %cond, i32 %y, i32 %x
816 %z3 = select i1 %cond, i32 %b, i32 %x
817 %s1 = add i32 %z1, %z2
818 %s2 = add i32 %s1, %z3
822 ; Test that we don't convert a group of cmovs with conflicting directions of
824 define i32 @test_cmov_memoperand_conflicting_dir(i32 %a, i32 %b, i32 %x, ptr %y1.ptr, ptr %y2.ptr) #0 {
825 ; CHECK-LABEL: test_cmov_memoperand_conflicting_dir:
826 ; CHECK: # %bb.0: # %entry
827 ; CHECK-NEXT: cmpl %esi, %edi
828 ; CHECK-NEXT: movl (%rcx), %eax
829 ; CHECK-NEXT: cmoval %edx, %eax
830 ; CHECK-NEXT: cmoval (%r8), %edx
831 ; CHECK-NEXT: addl %edx, %eax
834 ; CHECK-FORCEALL-LABEL: test_cmov_memoperand_conflicting_dir:
835 ; CHECK-FORCEALL: # %bb.0: # %entry
836 ; CHECK-FORCEALL-NEXT: cmpl %esi, %edi
837 ; CHECK-FORCEALL-NEXT: movl (%rcx), %eax
838 ; CHECK-FORCEALL-NEXT: cmoval %edx, %eax
839 ; CHECK-FORCEALL-NEXT: cmoval (%r8), %edx
840 ; CHECK-FORCEALL-NEXT: addl %edx, %eax
841 ; CHECK-FORCEALL-NEXT: retq
843 %cond = icmp ugt i32 %a, %b
844 %y1 = load i32, ptr %y1.ptr
845 %y2 = load i32, ptr %y2.ptr
846 %z1 = select i1 %cond, i32 %x, i32 %y1
847 %z2 = select i1 %cond, i32 %y2, i32 %x
848 %s1 = add i32 %z1, %z2
852 ; Test that we can convert a group of cmovs where only one has a memory
853 ; operand and where that memory operand's registers come from a prior cmov in
855 define i32 @test_cmov_memoperand_in_group_reuse_for_addr(i32 %a, i32 %b, ptr %x, ptr %y) #0 {
856 ; CHECK-LABEL: test_cmov_memoperand_in_group_reuse_for_addr:
857 ; CHECK: # %bb.0: # %entry
858 ; CHECK-NEXT: movl %edi, %eax
859 ; CHECK-NEXT: cmpl %esi, %edi
860 ; CHECK-NEXT: ja .LBB12_2
861 ; CHECK-NEXT: # %bb.1: # %entry
862 ; CHECK-NEXT: movl (%rcx), %eax
863 ; CHECK-NEXT: .LBB12_2: # %entry
866 ; CHECK-FORCEALL-LABEL: test_cmov_memoperand_in_group_reuse_for_addr:
867 ; CHECK-FORCEALL: # %bb.0: # %entry
868 ; CHECK-FORCEALL-NEXT: movl %edi, %eax
869 ; CHECK-FORCEALL-NEXT: cmpl %esi, %edi
870 ; CHECK-FORCEALL-NEXT: ja .LBB12_2
871 ; CHECK-FORCEALL-NEXT: # %bb.1: # %entry
872 ; CHECK-FORCEALL-NEXT: movl (%rcx), %eax
873 ; CHECK-FORCEALL-NEXT: .LBB12_2: # %entry
874 ; CHECK-FORCEALL-NEXT: retq
876 %cond = icmp ugt i32 %a, %b
877 %p = select i1 %cond, ptr %x, ptr %y
878 %load = load i32, ptr %p
879 %z = select i1 %cond, i32 %a, i32 %load
883 ; Test that we can convert a group of two cmovs with memory operands where one
884 ; uses the result of the other as part of the address.
885 define i32 @test_cmov_memoperand_in_group_reuse_for_addr2(i32 %a, i32 %b, ptr %x, ptr %y) #0 {
886 ; CHECK-LABEL: test_cmov_memoperand_in_group_reuse_for_addr2:
887 ; CHECK: # %bb.0: # %entry
888 ; CHECK-NEXT: movl %edi, %eax
889 ; CHECK-NEXT: cmpl %esi, %edi
890 ; CHECK-NEXT: ja .LBB13_2
891 ; CHECK-NEXT: # %bb.1: # %entry
892 ; CHECK-NEXT: movq (%rcx), %rax
893 ; CHECK-NEXT: movl (%rax), %eax
894 ; CHECK-NEXT: .LBB13_2: # %entry
897 ; CHECK-FORCEALL-LABEL: test_cmov_memoperand_in_group_reuse_for_addr2:
898 ; CHECK-FORCEALL: # %bb.0: # %entry
899 ; CHECK-FORCEALL-NEXT: movl %edi, %eax
900 ; CHECK-FORCEALL-NEXT: cmpl %esi, %edi
901 ; CHECK-FORCEALL-NEXT: ja .LBB13_2
902 ; CHECK-FORCEALL-NEXT: # %bb.1: # %entry
903 ; CHECK-FORCEALL-NEXT: movq (%rcx), %rax
904 ; CHECK-FORCEALL-NEXT: movl (%rax), %eax
905 ; CHECK-FORCEALL-NEXT: .LBB13_2: # %entry
906 ; CHECK-FORCEALL-NEXT: retq
908 %cond = icmp ugt i32 %a, %b
909 %load1 = load ptr, ptr %y
910 %p = select i1 %cond, ptr %x, ptr %load1
911 %load2 = load i32, ptr %p
912 %z = select i1 %cond, i32 %a, i32 %load2
916 ; Test that we can convert a group of cmovs where only one has a memory
917 ; operand and where that memory operand's registers come from a prior cmov and
918 ; where that cmov gets *its* input from a prior cmov in the group.
919 define i32 @test_cmov_memoperand_in_group_reuse_for_addr3(i32 %a, i32 %b, ptr %x, ptr %y, ptr %z) #0 {
920 ; CHECK-LABEL: test_cmov_memoperand_in_group_reuse_for_addr3:
921 ; CHECK: # %bb.0: # %entry
922 ; CHECK-NEXT: movl %edi, %eax
923 ; CHECK-NEXT: cmpl %esi, %edi
924 ; CHECK-NEXT: ja .LBB14_2
925 ; CHECK-NEXT: # %bb.1: # %entry
926 ; CHECK-NEXT: movl (%rcx), %eax
927 ; CHECK-NEXT: .LBB14_2: # %entry
930 ; CHECK-FORCEALL-LABEL: test_cmov_memoperand_in_group_reuse_for_addr3:
931 ; CHECK-FORCEALL: # %bb.0: # %entry
932 ; CHECK-FORCEALL-NEXT: movl %edi, %eax
933 ; CHECK-FORCEALL-NEXT: cmpl %esi, %edi
934 ; CHECK-FORCEALL-NEXT: ja .LBB14_2
935 ; CHECK-FORCEALL-NEXT: # %bb.1: # %entry
936 ; CHECK-FORCEALL-NEXT: movl (%rcx), %eax
937 ; CHECK-FORCEALL-NEXT: .LBB14_2: # %entry
938 ; CHECK-FORCEALL-NEXT: retq
940 %cond = icmp ugt i32 %a, %b
941 %p = select i1 %cond, ptr %x, ptr %y
942 %p2 = select i1 %cond, ptr %z, ptr %p
943 %load = load i32, ptr %p2
944 %r = select i1 %cond, i32 %a, i32 %load
948 @begin = external global ptr
949 @end = external global ptr
951 define void @test_memoperand_loop(i32 %data) #0 {
952 ; CHECK-LABEL: test_memoperand_loop:
953 ; CHECK: # %bb.0: # %entry
954 ; CHECK-NEXT: movq begin@GOTPCREL(%rip), %rax
955 ; CHECK-NEXT: movq (%rax), %rcx
956 ; CHECK-NEXT: movq end@GOTPCREL(%rip), %rdx
957 ; CHECK-NEXT: movq (%rdx), %rdx
958 ; CHECK-NEXT: xorl %esi, %esi
959 ; CHECK-NEXT: movq %rcx, %r8
960 ; CHECK-NEXT: .LBB15_1: # %loop.body
961 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
962 ; CHECK-NEXT: addq $8, %r8
963 ; CHECK-NEXT: cmpq %rdx, %r8
964 ; CHECK-NEXT: ja .LBB15_3
965 ; CHECK-NEXT: # %bb.2: # %loop.body
966 ; CHECK-NEXT: # in Loop: Header=BB15_1 Depth=1
967 ; CHECK-NEXT: movq (%rax), %r8
968 ; CHECK-NEXT: .LBB15_3: # %loop.body
969 ; CHECK-NEXT: # in Loop: Header=BB15_1 Depth=1
970 ; CHECK-NEXT: movl %edi, (%r8)
971 ; CHECK-NEXT: addq $8, %r8
972 ; CHECK-NEXT: cmpq %rdx, %r8
973 ; CHECK-NEXT: ja .LBB15_5
974 ; CHECK-NEXT: # %bb.4: # %loop.body
975 ; CHECK-NEXT: # in Loop: Header=BB15_1 Depth=1
976 ; CHECK-NEXT: movq %rcx, %r8
977 ; CHECK-NEXT: .LBB15_5: # %loop.body
978 ; CHECK-NEXT: # in Loop: Header=BB15_1 Depth=1
979 ; CHECK-NEXT: movl %edi, (%r8)
980 ; CHECK-NEXT: addl $1, %esi
981 ; CHECK-NEXT: cmpl $1024, %esi # imm = 0x400
982 ; CHECK-NEXT: jl .LBB15_1
983 ; CHECK-NEXT: # %bb.6: # %exit
986 ; CHECK-FORCEALL-LABEL: test_memoperand_loop:
987 ; CHECK-FORCEALL: # %bb.0: # %entry
988 ; CHECK-FORCEALL-NEXT: movq begin@GOTPCREL(%rip), %rax
989 ; CHECK-FORCEALL-NEXT: movq (%rax), %rcx
990 ; CHECK-FORCEALL-NEXT: movq end@GOTPCREL(%rip), %rdx
991 ; CHECK-FORCEALL-NEXT: movq (%rdx), %rdx
992 ; CHECK-FORCEALL-NEXT: xorl %esi, %esi
993 ; CHECK-FORCEALL-NEXT: movq %rcx, %r8
994 ; CHECK-FORCEALL-NEXT: .LBB15_1: # %loop.body
995 ; CHECK-FORCEALL-NEXT: # =>This Inner Loop Header: Depth=1
996 ; CHECK-FORCEALL-NEXT: addq $8, %r8
997 ; CHECK-FORCEALL-NEXT: cmpq %rdx, %r8
998 ; CHECK-FORCEALL-NEXT: ja .LBB15_3
999 ; CHECK-FORCEALL-NEXT: # %bb.2: # %loop.body
1000 ; CHECK-FORCEALL-NEXT: # in Loop: Header=BB15_1 Depth=1
1001 ; CHECK-FORCEALL-NEXT: movq (%rax), %r8
1002 ; CHECK-FORCEALL-NEXT: .LBB15_3: # %loop.body
1003 ; CHECK-FORCEALL-NEXT: # in Loop: Header=BB15_1 Depth=1
1004 ; CHECK-FORCEALL-NEXT: movl %edi, (%r8)
1005 ; CHECK-FORCEALL-NEXT: addq $8, %r8
1006 ; CHECK-FORCEALL-NEXT: cmpq %rdx, %r8
1007 ; CHECK-FORCEALL-NEXT: ja .LBB15_5
1008 ; CHECK-FORCEALL-NEXT: # %bb.4: # %loop.body
1009 ; CHECK-FORCEALL-NEXT: # in Loop: Header=BB15_1 Depth=1
1010 ; CHECK-FORCEALL-NEXT: movq %rcx, %r8
1011 ; CHECK-FORCEALL-NEXT: .LBB15_5: # %loop.body
1012 ; CHECK-FORCEALL-NEXT: # in Loop: Header=BB15_1 Depth=1
1013 ; CHECK-FORCEALL-NEXT: movl %edi, (%r8)
1014 ; CHECK-FORCEALL-NEXT: addl $1, %esi
1015 ; CHECK-FORCEALL-NEXT: cmpl $1024, %esi # imm = 0x400
1016 ; CHECK-FORCEALL-NEXT: jl .LBB15_1
1017 ; CHECK-FORCEALL-NEXT: # %bb.6: # %exit
1018 ; CHECK-FORCEALL-NEXT: retq
1020 %begin = load ptr, ptr @begin, align 8
1021 %end = load ptr, ptr @end, align 8
1024 %phi.iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.body ]
1025 %phi.ptr = phi ptr [ %begin, %entry ], [ %dst2, %loop.body ]
1026 %gep1 = getelementptr inbounds i32, ptr%phi.ptr, i64 2
1027 %cmp1 = icmp ugt ptr %gep1, %end
1028 %begin_dup = load ptr, ptr @begin, align 8
1029 %dst1 = select i1 %cmp1, ptr %gep1, ptr %begin_dup
1030 store i32 %data, ptr%dst1, align 4
1031 %gep2 = getelementptr inbounds i32, ptr%dst1, i64 2
1032 %cmp2 = icmp ugt ptr %gep2, %end
1033 %dst2 = select i1 %cmp2, ptr %gep2, ptr %begin
1034 store i32 %data, ptr%dst2, align 4
1035 %iv.next = add i32 %phi.iv, 1
1036 %cond = icmp slt i32 %iv.next, 1024
1037 br i1 %cond, label %loop.body, label %exit
1042 attributes #0 = {"target-cpu"="x86-64" "tune-cpu"="x86-64"}