llvm/test/CodeGen/AArch64/urem-seteq-vec-splat.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
   2 ; RUN: llc -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s
   3
   4 ; Odd divisor
   5 define <4 x i32> @test_urem_odd_25(<4 x i32> %X) nounwind {
   6 ; CHECK-LABEL: test_urem_odd_25:
   7 ; CHECK:       // %bb.0:
   8 ; CHECK-NEXT:    mov w8, #23593 // =0x5c29
   9 ; CHECK-NEXT:    movi v2.4s, #1
  10 ; CHECK-NEXT:    movk w8, #49807, lsl #16
  11 ; CHECK-NEXT:    dup v1.4s, w8
  12 ; CHECK-NEXT:    mov w8, #28835 // =0x70a3
  13 ; CHECK-NEXT:    movk w8, #2621, lsl #16
  14 ; CHECK-NEXT:    mul v0.4s, v0.4s, v1.4s
  15 ; CHECK-NEXT:    dup v1.4s, w8
  16 ; CHECK-NEXT:    cmhs v0.4s, v1.4s, v0.4s
  17 ; CHECK-NEXT:    and v0.16b, v0.16b, v2.16b
  18 ; CHECK-NEXT:    ret
  19   %urem = urem <4 x i32> %X, <i32 25, i32 25, i32 25, i32 25>
  20   %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
  21   %ret = zext <4 x i1> %cmp to <4 x i32>
  22   ret <4 x i32> %ret
  23 }
  24
  25 ; Even divisors
  26 define <4 x i32> @test_urem_even_100(<4 x i32> %X) nounwind {
  27 ; CHECK-LABEL: test_urem_even_100:
  28 ; CHECK:       // %bb.0:
  29 ; CHECK-NEXT:    mov w8, #23593 // =0x5c29
  30 ; CHECK-NEXT:    movi v2.4s, #1
  31 ; CHECK-NEXT:    movk w8, #49807, lsl #16
  32 ; CHECK-NEXT:    dup v1.4s, w8
  33 ; CHECK-NEXT:    mov w8, #23592 // =0x5c28
  34 ; CHECK-NEXT:    movk w8, #655, lsl #16
  35 ; CHECK-NEXT:    mul v0.4s, v0.4s, v1.4s
  36 ; CHECK-NEXT:    shl v1.4s, v0.4s, #30
  37 ; CHECK-NEXT:    usra v1.4s, v0.4s, #2
  38 ; CHECK-NEXT:    dup v0.4s, w8
  39 ; CHECK-NEXT:    cmhs v0.4s, v0.4s, v1.4s
  40 ; CHECK-NEXT:    and v0.16b, v0.16b, v2.16b
  41 ; CHECK-NEXT:    ret
  42   %urem = urem <4 x i32> %X, <i32 100, i32 100, i32 100, i32 100>
  43   %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
  44   %ret = zext <4 x i1> %cmp to <4 x i32>
  45   ret <4 x i32> %ret
  46 }
  47
  48 ; Negative divisors should be negated, and thus this is still splat vectors.
  49
  50 ; Odd divisor
  51 define <4 x i32> @test_urem_odd_neg25(<4 x i32> %X) nounwind {
  52 ; CHECK-LABEL: test_urem_odd_neg25:
  53 ; CHECK:       // %bb.0:
  54 ; CHECK-NEXT:    adrp x8, .LCPI2_0
  55 ; CHECK-NEXT:    movi v2.4s, #1
  56 ; CHECK-NEXT:    ldr q1, [x8, :lo12:.LCPI2_0]
  57 ; CHECK-NEXT:    adrp x8, .LCPI2_1
  58 ; CHECK-NEXT:    mul v0.4s, v0.4s, v1.4s
  59 ; CHECK-NEXT:    ldr q1, [x8, :lo12:.LCPI2_1]
  60 ; CHECK-NEXT:    cmhs v0.4s, v1.4s, v0.4s
  61 ; CHECK-NEXT:    and v0.16b, v0.16b, v2.16b
  62 ; CHECK-NEXT:    ret
  63   %urem = urem <4 x i32> %X, <i32 25, i32 -25, i32 -25, i32 25>
  64   %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
  65   %ret = zext <4 x i1> %cmp to <4 x i32>
  66   ret <4 x i32> %ret
  67 }
  68
  69 ; Even divisors
  70 define <4 x i32> @test_urem_even_neg100(<4 x i32> %X) nounwind {
  71 ; CHECK-LABEL: test_urem_even_neg100:
  72 ; CHECK:       // %bb.0:
  73 ; CHECK-NEXT:    adrp x8, .LCPI3_0
  74 ; CHECK-NEXT:    movi v2.4s, #1
  75 ; CHECK-NEXT:    ldr q1, [x8, :lo12:.LCPI3_0]
  76 ; CHECK-NEXT:    adrp x8, .LCPI3_1
  77 ; CHECK-NEXT:    mul v0.4s, v0.4s, v1.4s
  78 ; CHECK-NEXT:    shl v1.4s, v0.4s, #30
  79 ; CHECK-NEXT:    usra v1.4s, v0.4s, #2
  80 ; CHECK-NEXT:    ldr q0, [x8, :lo12:.LCPI3_1]
  81 ; CHECK-NEXT:    cmhs v0.4s, v0.4s, v1.4s
  82 ; CHECK-NEXT:    and v0.16b, v0.16b, v2.16b
  83 ; CHECK-NEXT:    ret
  84   %urem = urem <4 x i32> %X, <i32 -100, i32 100, i32 -100, i32 100>
  85   %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
  86   %ret = zext <4 x i1> %cmp to <4 x i32>
  87   ret <4 x i32> %ret
  88 }
  89
  90 ;------------------------------------------------------------------------------;
  91 ; Comparison constant has undef elements.
  92 ;------------------------------------------------------------------------------;
  93
  94 define <4 x i32> @test_urem_odd_undef1(<4 x i32> %X) nounwind {
  95 ; CHECK-LABEL: test_urem_odd_undef1:
  96 ; CHECK:       // %bb.0:
  97 ; CHECK-NEXT:    mov w8, #34079 // =0x851f
  98 ; CHECK-NEXT:    movk w8, #20971, lsl #16
  99 ; CHECK-NEXT:    dup v1.4s, w8
 100 ; CHECK-NEXT:    umull2 v2.2d, v0.4s, v1.4s
 101 ; CHECK-NEXT:    umull v1.2d, v0.2s, v1.2s
 102 ; CHECK-NEXT:    uzp2 v1.4s, v1.4s, v2.4s
 103 ; CHECK-NEXT:    movi v2.4s, #25
 104 ; CHECK-NEXT:    ushr v1.4s, v1.4s, #3
 105 ; CHECK-NEXT:    mls v0.4s, v1.4s, v2.4s
 106 ; CHECK-NEXT:    movi v1.4s, #1
 107 ; CHECK-NEXT:    cmeq v0.4s, v0.4s, #0
 108 ; CHECK-NEXT:    and v0.16b, v0.16b, v1.16b
 109 ; CHECK-NEXT:    ret
 110   %urem = urem <4 x i32> %X, <i32 25, i32 25, i32 25, i32 25>
 111   %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 undef, i32 0>
 112   %ret = zext <4 x i1> %cmp to <4 x i32>
 113   ret <4 x i32> %ret
 114 }
 115
 116 define <4 x i32> @test_urem_even_undef1(<4 x i32> %X) nounwind {
 117 ; CHECK-LABEL: test_urem_even_undef1:
 118 ; CHECK:       // %bb.0:
 119 ; CHECK-NEXT:    mov w8, #34079 // =0x851f
 120 ; CHECK-NEXT:    movk w8, #20971, lsl #16
 121 ; CHECK-NEXT:    dup v1.4s, w8
 122 ; CHECK-NEXT:    umull2 v2.2d, v0.4s, v1.4s
 123 ; CHECK-NEXT:    umull v1.2d, v0.2s, v1.2s
 124 ; CHECK-NEXT:    uzp2 v1.4s, v1.4s, v2.4s
 125 ; CHECK-NEXT:    movi v2.4s, #100
 126 ; CHECK-NEXT:    ushr v1.4s, v1.4s, #5
 127 ; CHECK-NEXT:    mls v0.4s, v1.4s, v2.4s
 128 ; CHECK-NEXT:    movi v1.4s, #1
 129 ; CHECK-NEXT:    cmeq v0.4s, v0.4s, #0
 130 ; CHECK-NEXT:    and v0.16b, v0.16b, v1.16b
 131 ; CHECK-NEXT:    ret
 132   %urem = urem <4 x i32> %X, <i32 100, i32 100, i32 100, i32 100>
 133   %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 undef, i32 0>
 134   %ret = zext <4 x i1> %cmp to <4 x i32>
 135   ret <4 x i32> %ret
 136 }
 137
 138 ;------------------------------------------------------------------------------;
 139 ; Negative tests
 140 ;------------------------------------------------------------------------------;
 141
 142 define <4 x i32> @test_urem_one_eq(<4 x i32> %X) nounwind {
 143 ; CHECK-LABEL: test_urem_one_eq:
 144 ; CHECK:       // %bb.0:
 145 ; CHECK-NEXT:    movi v0.4s, #1
 146 ; CHECK-NEXT:    ret
 147   %urem = urem <4 x i32> %X, <i32 1, i32 1, i32 1, i32 1>
 148   %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
 149   %ret = zext <4 x i1> %cmp to <4 x i32>
 150   ret <4 x i32> %ret
 151 }
 152 define <4 x i32> @test_urem_one_ne(<4 x i32> %X) nounwind {
 153 ; CHECK-LABEL: test_urem_one_ne:
 154 ; CHECK:       // %bb.0:
 155 ; CHECK-NEXT:    movi v0.2d, #0000000000000000
 156 ; CHECK-NEXT:    ret
 157   %urem = urem <4 x i32> %X, <i32 1, i32 1, i32 1, i32 1>
 158   %cmp = icmp ne <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
 159   %ret = zext <4 x i1> %cmp to <4 x i32>
 160   ret <4 x i32> %ret
 161 }
 162
 163 ; We can lower remainder of division by powers of two much better elsewhere.
 164 define <4 x i32> @test_urem_pow2(<4 x i32> %X) nounwind {
 165 ; CHECK-LABEL: test_urem_pow2:
 166 ; CHECK:       // %bb.0:
 167 ; CHECK-NEXT:    movi v1.4s, #15
 168 ; CHECK-NEXT:    and v0.16b, v0.16b, v1.16b
 169 ; CHECK-NEXT:    movi v1.4s, #1
 170 ; CHECK-NEXT:    cmeq v0.4s, v0.4s, #0
 171 ; CHECK-NEXT:    and v0.16b, v0.16b, v1.16b
 172 ; CHECK-NEXT:    ret
 173   %urem = urem <4 x i32> %X, <i32 16, i32 16, i32 16, i32 16>
 174   %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
 175   %ret = zext <4 x i1> %cmp to <4 x i32>
 176   ret <4 x i32> %ret
 177 }
 178
 179 ; We could lower remainder of division by INT_MIN much better elsewhere.
 180 define <4 x i32> @test_urem_int_min(<4 x i32> %X) nounwind {
 181 ; CHECK-LABEL: test_urem_int_min:
 182 ; CHECK:       // %bb.0:
 183 ; CHECK-NEXT:    bic v0.4s, #128, lsl #24
 184 ; CHECK-NEXT:    movi v1.4s, #1
 185 ; CHECK-NEXT:    cmeq v0.4s, v0.4s, #0
 186 ; CHECK-NEXT:    and v0.16b, v0.16b, v1.16b
 187 ; CHECK-NEXT:    ret
 188   %urem = urem <4 x i32> %X, <i32 2147483648, i32 2147483648, i32 2147483648, i32 2147483648>
 189   %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
 190   %ret = zext <4 x i1> %cmp to <4 x i32>
 191   ret <4 x i32> %ret
 192 }
 193
 194 ; We could lower remainder of division by all-ones much better elsewhere.
 195 define <4 x i32> @test_urem_allones(<4 x i32> %X) nounwind {
 196 ; CHECK-LABEL: test_urem_allones:
 197 ; CHECK:       // %bb.0:
 198 ; CHECK-NEXT:    movi v1.4s, #1
 199 ; CHECK-NEXT:    neg v0.4s, v0.4s
 200 ; CHECK-NEXT:    cmhs v0.4s, v1.4s, v0.4s
 201 ; CHECK-NEXT:    and v0.16b, v0.16b, v1.16b
 202 ; CHECK-NEXT:    ret
 203   %urem = urem <4 x i32> %X, <i32 4294967295, i32 4294967295, i32 4294967295, i32 4294967295>
 204   %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
 205   %ret = zext <4 x i1> %cmp to <4 x i32>
 206   ret <4 x i32> %ret
 207 }