test/CodeGen/X86/cmovcmov.ll

   1 ; RUN: llc < %s -asm-verbose=false -mtriple=x86_64-unknown-linux | FileCheck %s --check-prefix=CHECK --check-prefix=CMOV
   2 ; RUN: llc < %s -asm-verbose=false -mtriple=i686-unknown-linux | FileCheck %s --check-prefix=CHECK --check-prefix=NOCMOV
   3
   4 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
   5
   6 ; Test 2xCMOV patterns exposed after legalization.
   7 ; One way to do that is with (select (fcmp une/oeq)), which gets
   8 ; legalized to setp/setne.
   9
  10 ; CHECK-LABEL: test_select_fcmp_oeq_i32:
  11
  12 ; CMOV-NEXT: movl  %edi, %eax
  13 ; CMOV-NEXT: ucomiss  %xmm1, %xmm0
  14 ; CMOV-NEXT: cmovnel  %esi, %eax
  15 ; CMOV-NEXT: cmovpl  %esi, %eax
  16 ; CMOV-NEXT: retq
  17
  18 ; NOCMOV-NEXT:  flds  8(%esp)
  19 ; NOCMOV-NEXT:  flds  4(%esp)
  20 ; NOCMOV-NEXT:  fucompp
  21 ; NOCMOV-NEXT:  fnstsw  %ax
  22 ; NOCMOV-NEXT:  sahf
  23 ; NOCMOV-NEXT:  leal  16(%esp), %eax
  24 ; NOCMOV-NEXT:  jne  [[TBB:.LBB[0-9_]+]]
  25 ; NOCMOV-NEXT:  jp  [[TBB]]
  26 ; NOCMOV-NEXT:  leal  12(%esp), %eax
  27 ; NOCMOV-NEXT:[[TBB]]:
  28 ; NOCMOV-NEXT:  movl  (%eax), %eax
  29 ; NOCMOV-NEXT:  retl
  30 define i32 @test_select_fcmp_oeq_i32(float %a, float %b, i32 %c, i32 %d) #0 {
  31 entry:
  32   %cmp = fcmp oeq float %a, %b
  33   %r = select i1 %cmp, i32 %c, i32 %d
  34   ret i32 %r
  35 }
  36
  37 ; CHECK-LABEL: test_select_fcmp_oeq_i64:
  38
  39 ; CMOV-NEXT:   movq  %rdi, %rax
  40 ; CMOV-NEXT:   ucomiss  %xmm1, %xmm0
  41 ; CMOV-NEXT:   cmovneq  %rsi, %rax
  42 ; CMOV-NEXT:   cmovpq  %rsi, %rax
  43 ; CMOV-NEXT:   retq
  44
  45 ; NOCMOV-NEXT:   flds  8(%esp)
  46 ; NOCMOV-NEXT:   flds  4(%esp)
  47 ; NOCMOV-NEXT:   fucompp
  48 ; NOCMOV-NEXT:   fnstsw  %ax
  49 ; NOCMOV-NEXT:   sahf
  50 ; NOCMOV-NEXT:   leal  20(%esp), %ecx
  51 ; NOCMOV-NEXT:   jne  [[TBB:.LBB[0-9_]+]]
  52 ; NOCMOV-NEXT:   jp  [[TBB]]
  53 ; NOCMOV-NEXT:   leal  12(%esp), %ecx
  54 ; NOCMOV-NEXT: [[TBB]]:
  55 ; NOCMOV-NEXT:   movl  (%ecx), %eax
  56 ; NOCMOV-NEXT:   movl  4(%ecx), %edx
  57 ; NOCMOV-NEXT:   retl
  58 define i64 @test_select_fcmp_oeq_i64(float %a, float %b, i64 %c, i64 %d) #0 {
  59 entry:
  60   %cmp = fcmp oeq float %a, %b
  61   %r = select i1 %cmp, i64 %c, i64 %d
  62   ret i64 %r
  63 }
  64
  65 ; CHECK-LABEL: test_select_fcmp_une_i64:
  66
  67 ; CMOV-NEXT:   movq  %rsi, %rax
  68 ; CMOV-NEXT:   ucomiss  %xmm1, %xmm0
  69 ; CMOV-NEXT:   cmovneq  %rdi, %rax
  70 ; CMOV-NEXT:   cmovpq  %rdi, %rax
  71 ; CMOV-NEXT:   retq
  72
  73 ; NOCMOV-NEXT:   flds  8(%esp)
  74 ; NOCMOV-NEXT:   flds  4(%esp)
  75 ; NOCMOV-NEXT:   fucompp
  76 ; NOCMOV-NEXT:   fnstsw  %ax
  77 ; NOCMOV-NEXT:   sahf
  78 ; NOCMOV-NEXT:   leal  12(%esp), %ecx
  79 ; NOCMOV-NEXT:   jne  [[TBB:.LBB[0-9_]+]]
  80 ; NOCMOV-NEXT:   jp  [[TBB]]
  81 ; NOCMOV-NEXT:   leal  20(%esp), %ecx
  82 ; NOCMOV-NEXT: [[TBB]]:
  83 ; NOCMOV-NEXT:   movl  (%ecx), %eax
  84 ; NOCMOV-NEXT:   movl  4(%ecx), %edx
  85 ; NOCMOV-NEXT:   retl
  86 define i64 @test_select_fcmp_une_i64(float %a, float %b, i64 %c, i64 %d) #0 {
  87 entry:
  88   %cmp = fcmp une float %a, %b
  89   %r = select i1 %cmp, i64 %c, i64 %d
  90   ret i64 %r
  91 }
  92
  93 ; CHECK-LABEL: test_select_fcmp_oeq_f64:
  94
  95 ; CMOV-NEXT:   ucomiss  %xmm1, %xmm0
  96 ; CMOV-NEXT:   jne  [[TBB:.LBB[0-9_]+]]
  97 ; CMOV-NEXT:   jp  [[TBB]]
  98 ; CMOV-NEXT:   movaps  %xmm2, %xmm3
  99 ; CMOV-NEXT: [[TBB]]:
 100 ; CMOV-NEXT:   movaps  %xmm3, %xmm0
 101 ; CMOV-NEXT:   retq
 102
 103 ; NOCMOV-NEXT:   flds  8(%esp)
 104 ; NOCMOV-NEXT:   flds  4(%esp)
 105 ; NOCMOV-NEXT:   fucompp
 106 ; NOCMOV-NEXT:   fnstsw  %ax
 107 ; NOCMOV-NEXT:   sahf
 108 ; NOCMOV-NEXT:   leal  20(%esp), %eax
 109 ; NOCMOV-NEXT:   jne  [[TBB:.LBB[0-9_]+]]
 110 ; NOCMOV-NEXT:   jp  [[TBB]]
 111 ; NOCMOV-NEXT:   leal  12(%esp), %eax
 112 ; NOCMOV-NEXT: [[TBB]]:
 113 ; NOCMOV-NEXT:   fldl  (%eax)
 114 ; NOCMOV-NEXT:   retl
 115 define double @test_select_fcmp_oeq_f64(float %a, float %b, double %c, double %d) #0 {
 116 entry:
 117   %cmp = fcmp oeq float %a, %b
 118   %r = select i1 %cmp, double %c, double %d
 119   ret double %r
 120 }
 121
 122 ; CHECK-LABEL: test_select_fcmp_oeq_v4i32:
 123
 124 ; CMOV-NEXT:   ucomiss  %xmm1, %xmm0
 125 ; CMOV-NEXT:   jne  [[TBB:.LBB[0-9_]+]]
 126 ; CMOV-NEXT:   jp  [[TBB]]
 127 ; CMOV-NEXT:   movaps  %xmm2, %xmm3
 128 ; CMOV-NEXT: [[TBB]]:
 129 ; CMOV-NEXT:   movaps  %xmm3, %xmm0
 130 ; CMOV-NEXT:   retq
 131
 132 ; NOCMOV-NEXT:   pushl  %edi
 133 ; NOCMOV-NEXT:   pushl  %esi
 134 ; NOCMOV-NEXT:   flds  20(%esp)
 135 ; NOCMOV-NEXT:   flds  16(%esp)
 136 ; NOCMOV-NEXT:   fucompp
 137 ; NOCMOV-NEXT:   fnstsw  %ax
 138 ; NOCMOV-NEXT:   sahf
 139 ; NOCMOV-NEXT:   leal  40(%esp), %eax
 140 ; NOCMOV-NEXT:   jne  [[TBB:.LBB[0-9_]+]]
 141 ; NOCMOV-NEXT:   jp  [[TBB]]
 142 ; NOCMOV-NEXT:   leal  24(%esp), %eax
 143 ; NOCMOV-NEXT: [[TBB]]:
 144 ; NOCMOV-NEXT:   movl  (%eax), %ecx
 145 ; NOCMOV-NEXT:   leal  44(%esp), %edx
 146 ; NOCMOV-NEXT:   jne  [[TBB:.LBB[0-9_]+]]
 147 ; NOCMOV-NEXT:   jp  [[TBB]]
 148 ; NOCMOV-NEXT:   leal  28(%esp), %edx
 149 ; NOCMOV-NEXT: [[TBB]]:
 150 ; NOCMOV-NEXT:   movl  12(%esp), %eax
 151 ; NOCMOV-NEXT:   movl  (%edx), %edx
 152 ; NOCMOV-NEXT:   leal  48(%esp), %esi
 153 ; NOCMOV-NEXT:   jne  [[TBB:.LBB[0-9_]+]]
 154 ; NOCMOV-NEXT:   jp  [[TBB]]
 155 ; NOCMOV-NEXT:   leal  32(%esp), %esi
 156 ; NOCMOV-NEXT: [[TBB]]:
 157 ; NOCMOV-NEXT:   movl  (%esi), %esi
 158 ; NOCMOV-NEXT:   leal  52(%esp), %edi
 159 ; NOCMOV-NEXT:   jne  [[TBB:.LBB[0-9_]+]]
 160 ; NOCMOV-NEXT:   jp  [[TBB]]
 161 ; NOCMOV-NEXT:   leal  36(%esp), %edi
 162 ; NOCMOV-NEXT: [[TBB]]:
 163 ; NOCMOV-NEXT:   movl  (%edi), %edi
 164 ; NOCMOV-NEXT:   movl  %edi, 12(%eax)
 165 ; NOCMOV-NEXT:   movl  %esi, 8(%eax)
 166 ; NOCMOV-NEXT:   movl  %edx, 4(%eax)
 167 ; NOCMOV-NEXT:   movl  %ecx, (%eax)
 168 ; NOCMOV-NEXT:   popl  %esi
 169 ; NOCMOV-NEXT:   popl  %edi
 170 ; NOCMOV-NEXT:   retl  $4
 171 define <4 x i32> @test_select_fcmp_oeq_v4i32(float %a, float %b, <4 x i32> %c, <4 x i32> %d) #0 {
 172 entry:
 173   %cmp = fcmp oeq float %a, %b
 174   %r = select i1 %cmp, <4 x i32> %c, <4 x i32> %d
 175   ret <4 x i32> %r
 176 }
 177
 178 ; Also make sure we catch the original code-sequence of interest:
 179
 180 ; CMOV: [[ONE_F32_LCPI:.LCPI.*]]:
 181 ; CMOV-NEXT:   .long  1065353216
 182
 183 ; CHECK-LABEL: test_zext_fcmp_une:
 184 ; CMOV-NEXT:   ucomiss  %xmm1, %xmm0
 185 ; CMOV-NEXT:   movss  [[ONE_F32_LCPI]](%rip), %xmm0
 186 ; CMOV-NEXT:   jne  [[TBB:.LBB[0-9_]+]]
 187 ; CMOV-NEXT:   jp  [[TBB]]
 188 ; CMOV-NEXT:   xorps  %xmm0, %xmm0
 189 ; CMOV-NEXT: [[TBB]]:
 190 ; CMOV-NEXT:   retq
 191
 192 ; NOCMOV:        jne
 193 ; NOCMOV-NEXT:   jp
 194 define float @test_zext_fcmp_une(float %a, float %b) #0 {
 195 entry:
 196   %cmp = fcmp une float %a, %b
 197   %conv1 = zext i1 %cmp to i32
 198   %conv2 = sitofp i32 %conv1 to float
 199   ret float %conv2
 200 }
 201
 202 ; CMOV: [[ONE_F32_LCPI:.LCPI.*]]:
 203 ; CMOV-NEXT:   .long  1065353216
 204
 205 ; CHECK-LABEL: test_zext_fcmp_oeq:
 206 ; CMOV-NEXT:   ucomiss  %xmm1, %xmm0
 207 ; CMOV-NEXT:   xorps  %xmm0, %xmm0
 208 ; CMOV-NEXT:   jne  [[TBB:.LBB[0-9_]+]]
 209 ; CMOV-NEXT:   jp  [[TBB]]
 210 ; CMOV-NEXT:   movss  [[ONE_F32_LCPI]](%rip), %xmm0
 211 ; CMOV-NEXT: [[TBB]]:
 212 ; CMOV-NEXT:   retq
 213
 214 ; NOCMOV:        jne
 215 ; NOCMOV-NEXT:   jp
 216 define float @test_zext_fcmp_oeq(float %a, float %b) #0 {
 217 entry:
 218   %cmp = fcmp oeq float %a, %b
 219   %conv1 = zext i1 %cmp to i32
 220   %conv2 = sitofp i32 %conv1 to float
 221   ret float %conv2
 222 }
 223
 224 attributes #0 = { nounwind }
 225
 226 @g8 = global i8 0
 227
 228 ; The following test failed because llvm had a bug where a structure like:
 229 ;
 230 ; %12 = CMOV_GR8 %7, %11 ... (lt)
 231 ; %13 = CMOV_GR8 %12, %11 ... (gt)
 232 ;
 233 ; was lowered to:
 234 ;
 235 ; The first two cmovs got expanded to:
 236 ; %bb.0:
 237 ;   JL_1 %bb.9
 238 ; %bb.7:
 239 ;   JG_1 %bb.9
 240 ; %bb.8:
 241 ; %bb.9:
 242 ;   %12 = phi(%7, %bb.8, %11, %bb.0, %12, %bb.7)
 243 ;   %13 = COPY %12
 244 ; Which was invalid as %12 is not the same value as %13
 245
 246 ; CHECK-LABEL: no_cascade_opt:
 247 ; CMOV-DAG: cmpl %edx, %esi
 248 ; CMOV-DAG: movb $20, %al
 249 ; CMOV-DAG: movb $20, %dl
 250 ; CMOV:   jge [[BB2:.LBB[0-9_]+]]
 251 ; CMOV:   jle [[BB3:.LBB[0-9_]+]]
 252 ; CMOV: [[BB0:.LBB[0-9_]+]]
 253 ; CMOV:   testl %edi, %edi
 254 ; CMOV:   jne [[BB4:.LBB[0-9_]+]]
 255 ; CMOV: [[BB1:.LBB[0-9_]+]]
 256 ; CMOV:   movb %al, g8(%rip)
 257 ; CMOV:   retq
 258 ; CMOV: [[BB2]]:
 259 ; CMOV:   movl %ecx, %edx
 260 ; CMOV:   jg [[BB0]]
 261 ; CMOV: [[BB3]]:
 262 ; CMOV:   movl %edx, %eax
 263 ; CMOV:   testl %edi, %edi
 264 ; CMOV:   je [[BB1]]
 265 ; CMOV: [[BB4]]:
 266 ; CMOV:   movl %edx, %eax
 267 ; CMOV:   movb %al, g8(%rip)
 268 ; CMOV:   retq
 269 define void @no_cascade_opt(i32 %v0, i32 %v1, i32 %v2, i32 %v3) {
 270 entry:
 271   %c0 = icmp eq i32 %v0, 0
 272   %c1 = icmp slt i32 %v1, %v2
 273   %c2 = icmp sgt i32 %v1, %v2
 274   %trunc = trunc i32 %v3 to i8
 275   %sel0 = select i1 %c1, i8 20, i8 %trunc
 276   %sel1 = select i1 %c2, i8 20, i8 %sel0
 277   %sel2 = select i1 %c0, i8 %sel1, i8 %sel0
 278   store volatile i8 %sel2, i8* @g8
 279   ret void
 280 }