test/CodeGen/X86/urem-seteq.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
   2 ; RUN: llc -mtriple=i686-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=CHECK,X86
   3 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=CHECK,X64
   4
   5 ; This tests the BuildREMEqFold optimization with UREM, i32, odd divisor, SETEQ.
   6 ; The corresponding pseudocode is:
   7 ; Q <- [N * multInv(5, 2^32)] <=> [N * 0xCCCCCCCD] <=> [N * (-858993459)]
   8 ; res <- [Q <= (2^32 - 1) / 5] <=> [Q <= 858993459] <=> [Q < 858993460]
   9 define i32 @test_urem_odd(i32 %X) nounwind readnone {
  10 ; X86-LABEL: test_urem_odd:
  11 ; X86:       # %bb.0:
  12 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
  13 ; X86-NEXT:    movl $-858993459, %edx # imm = 0xCCCCCCCD
  14 ; X86-NEXT:    movl %ecx, %eax
  15 ; X86-NEXT:    mull %edx
  16 ; X86-NEXT:    shrl $2, %edx
  17 ; X86-NEXT:    leal (%edx,%edx,4), %edx
  18 ; X86-NEXT:    xorl %eax, %eax
  19 ; X86-NEXT:    cmpl %edx, %ecx
  20 ; X86-NEXT:    sete %al
  21 ; X86-NEXT:    retl
  22 ;
  23 ; X64-LABEL: test_urem_odd:
  24 ; X64:       # %bb.0:
  25 ; X64-NEXT:    movl %edi, %eax
  26 ; X64-NEXT:    movl $3435973837, %ecx # imm = 0xCCCCCCCD
  27 ; X64-NEXT:    imulq %rax, %rcx
  28 ; X64-NEXT:    shrq $34, %rcx
  29 ; X64-NEXT:    leal (%rcx,%rcx,4), %ecx
  30 ; X64-NEXT:    xorl %eax, %eax
  31 ; X64-NEXT:    cmpl %ecx, %edi
  32 ; X64-NEXT:    sete %al
  33 ; X64-NEXT:    retq
  34   %urem = urem i32 %X, 5
  35   %cmp = icmp eq i32 %urem, 0
  36   %ret = zext i1 %cmp to i32
  37   ret i32 %ret
  38 }
  39
  40 ; This is like test_urem_odd, except the divisor has bit 30 set.
  41 define i32 @test_urem_odd_bit30(i32 %X) nounwind readnone {
  42 ; X86-LABEL: test_urem_odd_bit30:
  43 ; X86:       # %bb.0:
  44 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
  45 ; X86-NEXT:    movl $-11, %edx
  46 ; X86-NEXT:    movl %ecx, %eax
  47 ; X86-NEXT:    mull %edx
  48 ; X86-NEXT:    shrl $30, %edx
  49 ; X86-NEXT:    imull $1073741827, %edx, %edx # imm = 0x40000003
  50 ; X86-NEXT:    xorl %eax, %eax
  51 ; X86-NEXT:    cmpl %edx, %ecx
  52 ; X86-NEXT:    sete %al
  53 ; X86-NEXT:    retl
  54 ;
  55 ; X64-LABEL: test_urem_odd_bit30:
  56 ; X64:       # %bb.0:
  57 ; X64-NEXT:    movl %edi, %eax
  58 ; X64-NEXT:    movl $4294967285, %ecx # imm = 0xFFFFFFF5
  59 ; X64-NEXT:    imulq %rax, %rcx
  60 ; X64-NEXT:    shrq $62, %rcx
  61 ; X64-NEXT:    imull $1073741827, %ecx, %ecx # imm = 0x40000003
  62 ; X64-NEXT:    xorl %eax, %eax
  63 ; X64-NEXT:    cmpl %ecx, %edi
  64 ; X64-NEXT:    sete %al
  65 ; X64-NEXT:    retq
  66   %urem = urem i32 %X, 1073741827
  67   %cmp = icmp eq i32 %urem, 0
  68   %ret = zext i1 %cmp to i32
  69   ret i32 %ret
  70 }
  71
  72 ; This is like test_urem_odd, except the divisor has bit 31 set.
  73 define i32 @test_urem_odd_bit31(i32 %X) nounwind readnone {
  74 ; X86-LABEL: test_urem_odd_bit31:
  75 ; X86:       # %bb.0:
  76 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
  77 ; X86-NEXT:    movl $1073741823, %edx # imm = 0x3FFFFFFF
  78 ; X86-NEXT:    movl %ecx, %eax
  79 ; X86-NEXT:    mull %edx
  80 ; X86-NEXT:    shrl $29, %edx
  81 ; X86-NEXT:    imull $-2147483645, %edx, %edx # imm = 0x80000003
  82 ; X86-NEXT:    xorl %eax, %eax
  83 ; X86-NEXT:    cmpl %edx, %ecx
  84 ; X86-NEXT:    sete %al
  85 ; X86-NEXT:    retl
  86 ;
  87 ; X64-LABEL: test_urem_odd_bit31:
  88 ; X64:       # %bb.0:
  89 ; X64-NEXT:    movl %edi, %eax
  90 ; X64-NEXT:    movq %rax, %rcx
  91 ; X64-NEXT:    shlq $30, %rcx
  92 ; X64-NEXT:    subq %rax, %rcx
  93 ; X64-NEXT:    shrq $61, %rcx
  94 ; X64-NEXT:    imull $-2147483645, %ecx, %ecx # imm = 0x80000003
  95 ; X64-NEXT:    xorl %eax, %eax
  96 ; X64-NEXT:    cmpl %ecx, %edi
  97 ; X64-NEXT:    sete %al
  98 ; X64-NEXT:    retq
  99   %urem = urem i32 %X, 2147483651
 100   %cmp = icmp eq i32 %urem, 0
 101   %ret = zext i1 %cmp to i32
 102   ret i32 %ret
 103 }
 104
 105 ; This tests the BuildREMEqFold optimization with UREM, i16, even divisor, SETNE.
 106 ; In this case, D <=> 14 <=> 7 * 2^1, so D0 = 7 and K = 1.
 107 ; The corresponding pseudocode is:
 108 ; Q <- [N * multInv(D0, 2^16)] <=> [N * multInv(7, 2^16)] <=> [N * 28087]
 109 ; Q <- [Q >>rot K] <=> [Q >>rot 1]
 110 ; res <- ![Q <= (2^16 - 1) / 7] <=> ![Q <= 9362] <=> [Q > 9362]
 111 define i16 @test_urem_even(i16 %X) nounwind readnone {
 112 ; X86-LABEL: test_urem_even:
 113 ; X86:       # %bb.0:
 114 ; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
 115 ; X86-NEXT:    movl %ecx, %eax
 116 ; X86-NEXT:    shrl %eax
 117 ; X86-NEXT:    imull $18725, %eax, %eax # imm = 0x4925
 118 ; X86-NEXT:    shrl $17, %eax
 119 ; X86-NEXT:    movl %eax, %edx
 120 ; X86-NEXT:    shll $4, %edx
 121 ; X86-NEXT:    subl %eax, %edx
 122 ; X86-NEXT:    subl %eax, %edx
 123 ; X86-NEXT:    xorl %eax, %eax
 124 ; X86-NEXT:    cmpw %dx, %cx
 125 ; X86-NEXT:    setne %al
 126 ; X86-NEXT:    # kill: def $ax killed $ax killed $eax
 127 ; X86-NEXT:    retl
 128 ;
 129 ; X64-LABEL: test_urem_even:
 130 ; X64:       # %bb.0:
 131 ; X64-NEXT:    movzwl %di, %ecx
 132 ; X64-NEXT:    movl %ecx, %eax
 133 ; X64-NEXT:    shrl %eax
 134 ; X64-NEXT:    imull $18725, %eax, %eax # imm = 0x4925
 135 ; X64-NEXT:    shrl $17, %eax
 136 ; X64-NEXT:    movl %eax, %edx
 137 ; X64-NEXT:    shll $4, %edx
 138 ; X64-NEXT:    subl %eax, %edx
 139 ; X64-NEXT:    subl %eax, %edx
 140 ; X64-NEXT:    xorl %eax, %eax
 141 ; X64-NEXT:    cmpw %dx, %cx
 142 ; X64-NEXT:    setne %al
 143 ; X64-NEXT:    # kill: def $ax killed $ax killed $eax
 144 ; X64-NEXT:    retq
 145   %urem = urem i16 %X, 14
 146   %cmp = icmp ne i16 %urem, 0
 147   %ret = zext i1 %cmp to i16
 148   ret i16 %ret
 149 }
 150
 151 ; This is like test_urem_even, except the divisor has bit 30 set.
 152 define i32 @test_urem_even_bit30(i32 %X) nounwind readnone {
 153 ; X86-LABEL: test_urem_even_bit30:
 154 ; X86:       # %bb.0:
 155 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 156 ; X86-NEXT:    movl $-415, %edx # imm = 0xFE61
 157 ; X86-NEXT:    movl %ecx, %eax
 158 ; X86-NEXT:    mull %edx
 159 ; X86-NEXT:    shrl $30, %edx
 160 ; X86-NEXT:    imull $1073741928, %edx, %edx # imm = 0x40000068
 161 ; X86-NEXT:    xorl %eax, %eax
 162 ; X86-NEXT:    cmpl %edx, %ecx
 163 ; X86-NEXT:    sete %al
 164 ; X86-NEXT:    retl
 165 ;
 166 ; X64-LABEL: test_urem_even_bit30:
 167 ; X64:       # %bb.0:
 168 ; X64-NEXT:    movl %edi, %eax
 169 ; X64-NEXT:    movl $4294966881, %ecx # imm = 0xFFFFFE61
 170 ; X64-NEXT:    imulq %rax, %rcx
 171 ; X64-NEXT:    shrq $62, %rcx
 172 ; X64-NEXT:    imull $1073741928, %ecx, %ecx # imm = 0x40000068
 173 ; X64-NEXT:    xorl %eax, %eax
 174 ; X64-NEXT:    cmpl %ecx, %edi
 175 ; X64-NEXT:    sete %al
 176 ; X64-NEXT:    retq
 177   %urem = urem i32 %X, 1073741928
 178   %cmp = icmp eq i32 %urem, 0
 179   %ret = zext i1 %cmp to i32
 180   ret i32 %ret
 181 }
 182
 183 ; This is like test_urem_odd, except the divisor has bit 31 set.
 184 define i32 @test_urem_even_bit31(i32 %X) nounwind readnone {
 185 ; X86-LABEL: test_urem_even_bit31:
 186 ; X86:       # %bb.0:
 187 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 188 ; X86-NEXT:    movl $2147483547, %edx # imm = 0x7FFFFF9B
 189 ; X86-NEXT:    movl %ecx, %eax
 190 ; X86-NEXT:    mull %edx
 191 ; X86-NEXT:    shrl $30, %edx
 192 ; X86-NEXT:    imull $-2147483546, %edx, %edx # imm = 0x80000066
 193 ; X86-NEXT:    xorl %eax, %eax
 194 ; X86-NEXT:    cmpl %edx, %ecx
 195 ; X86-NEXT:    sete %al
 196 ; X86-NEXT:    retl
 197 ;
 198 ; X64-LABEL: test_urem_even_bit31:
 199 ; X64:       # %bb.0:
 200 ; X64-NEXT:    movl %edi, %eax
 201 ; X64-NEXT:    imulq $2147483547, %rax, %rax # imm = 0x7FFFFF9B
 202 ; X64-NEXT:    shrq $62, %rax
 203 ; X64-NEXT:    imull $-2147483546, %eax, %ecx # imm = 0x80000066
 204 ; X64-NEXT:    xorl %eax, %eax
 205 ; X64-NEXT:    cmpl %ecx, %edi
 206 ; X64-NEXT:    sete %al
 207 ; X64-NEXT:    retq
 208   %urem = urem i32 %X, 2147483750
 209   %cmp = icmp eq i32 %urem, 0
 210   %ret = zext i1 %cmp to i32
 211   ret i32 %ret
 212 }
 213
 214 ; We should not proceed with this fold if the divisor is 1 or -1
 215 define i32 @test_urem_one(i32 %X) nounwind readnone {
 216 ; CHECK-LABEL: test_urem_one:
 217 ; CHECK:       # %bb.0:
 218 ; CHECK-NEXT:    movl $1, %eax
 219 ; CHECK-NEXT:    ret{{[l|q]}}
 220   %urem = urem i32 %X, 1
 221   %cmp = icmp eq i32 %urem, 0
 222   %ret = zext i1 %cmp to i32
 223   ret i32 %ret
 224 }
 225
 226 ; We can lower remainder of division by powers of two much better elsewhere;
 227 ; also, BuildREMEqFold does not work when the only odd factor of the divisor is 1.
 228 ; This ensures we don't touch powers of two.
 229 define i32 @test_urem_pow2(i32 %X) nounwind readnone {
 230 ; X86-LABEL: test_urem_pow2:
 231 ; X86:       # %bb.0:
 232 ; X86-NEXT:    xorl %eax, %eax
 233 ; X86-NEXT:    testb $15, {{[0-9]+}}(%esp)
 234 ; X86-NEXT:    sete %al
 235 ; X86-NEXT:    retl
 236 ;
 237 ; X64-LABEL: test_urem_pow2:
 238 ; X64:       # %bb.0:
 239 ; X64-NEXT:    xorl %eax, %eax
 240 ; X64-NEXT:    testb $15, %dil
 241 ; X64-NEXT:    sete %al
 242 ; X64-NEXT:    retq
 243   %urem = urem i32 %X, 16
 244   %cmp = icmp eq i32 %urem, 0
 245   %ret = zext i1 %cmp to i32
 246   ret i32 %ret
 247 }