test/CodeGen/AArch64/funnel-shift.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
   2 ; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s
   3
   4 declare i8 @llvm.fshl.i8(i8, i8, i8)
   5 declare i16 @llvm.fshl.i16(i16, i16, i16)
   6 declare i32 @llvm.fshl.i32(i32, i32, i32)
   7 declare i64 @llvm.fshl.i64(i64, i64, i64)
   8 declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
   9
  10 declare i8 @llvm.fshr.i8(i8, i8, i8)
  11 declare i16 @llvm.fshr.i16(i16, i16, i16)
  12 declare i32 @llvm.fshr.i32(i32, i32, i32)
  13 declare i64 @llvm.fshr.i64(i64, i64, i64)
  14 declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
  15
  16 ; General case - all operands can be variables.
  17
  18 define i32 @fshl_i32(i32 %x, i32 %y, i32 %z) {
  19 ; CHECK-LABEL: fshl_i32:
  20 ; CHECK:       // %bb.0:
  21 ; CHECK-NEXT:    and w9, w2, #0x1f
  22 ; CHECK-NEXT:    neg w9, w9
  23 ; CHECK-NEXT:    lsl w8, w0, w2
  24 ; CHECK-NEXT:    lsr w9, w1, w9
  25 ; CHECK-NEXT:    orr w8, w8, w9
  26 ; CHECK-NEXT:    tst w2, #0x1f
  27 ; CHECK-NEXT:    csel w0, w0, w8, eq
  28 ; CHECK-NEXT:    ret
  29   %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %z)
  30   ret i32 %f
  31 }
  32
  33 ; Verify that weird types are minimally supported.
  34 declare i37 @llvm.fshl.i37(i37, i37, i37)
  35 define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) {
  36 ; CHECK-LABEL: fshl_i37:
  37 ; CHECK:       // %bb.0:
  38 ; CHECK-NEXT:    mov x10, #31883
  39 ; CHECK-NEXT:    movk x10, #3542, lsl #16
  40 ; CHECK-NEXT:    movk x10, #51366, lsl #32
  41 ; CHECK-NEXT:    and x9, x2, #0x1fffffffff
  42 ; CHECK-NEXT:    movk x10, #56679, lsl #48
  43 ; CHECK-NEXT:    umulh x10, x9, x10
  44 ; CHECK-NEXT:    mov w11, #37
  45 ; CHECK-NEXT:    lsr x10, x10, #5
  46 ; CHECK-NEXT:    msub x9, x10, x11, x9
  47 ; CHECK-NEXT:    and x8, x1, #0x1fffffffff
  48 ; CHECK-NEXT:    sub x11, x11, x9
  49 ; CHECK-NEXT:    lsl x10, x0, x9
  50 ; CHECK-NEXT:    lsr x8, x8, x11
  51 ; CHECK-NEXT:    orr x8, x10, x8
  52 ; CHECK-NEXT:    cmp x9, #0 // =0
  53 ; CHECK-NEXT:    csel x0, x0, x8, eq
  54 ; CHECK-NEXT:    ret
  55   %f = call i37 @llvm.fshl.i37(i37 %x, i37 %y, i37 %z)
  56   ret i37 %f
  57 }
  58
  59 ; extract(concat(0b1110000, 0b1111111) << 2) = 0b1000011
  60
  61 declare i7 @llvm.fshl.i7(i7, i7, i7)
  62 define i7 @fshl_i7_const_fold() {
  63 ; CHECK-LABEL: fshl_i7_const_fold:
  64 ; CHECK:       // %bb.0:
  65 ; CHECK-NEXT:    mov w0, #67
  66 ; CHECK-NEXT:    ret
  67   %f = call i7 @llvm.fshl.i7(i7 112, i7 127, i7 2)
  68   ret i7 %f
  69 }
  70
  71 define i8 @fshl_i8_const_fold_overshift_1() {
  72 ; CHECK-LABEL: fshl_i8_const_fold_overshift_1:
  73 ; CHECK:       // %bb.0:
  74 ; CHECK-NEXT:    mov w0, #128
  75 ; CHECK-NEXT:    ret
  76   %f = call i8 @llvm.fshl.i8(i8 255, i8 0, i8 15)
  77   ret i8 %f
  78 }
  79
  80 define i8 @fshl_i8_const_fold_overshift_2() {
  81 ; CHECK-LABEL: fshl_i8_const_fold_overshift_2:
  82 ; CHECK:       // %bb.0:
  83 ; CHECK-NEXT:    mov w0, #120
  84 ; CHECK-NEXT:    ret
  85   %f = call i8 @llvm.fshl.i8(i8 15, i8 15, i8 11)
  86   ret i8 %f
  87 }
  88
  89 define i8 @fshl_i8_const_fold_overshift_3() {
  90 ; CHECK-LABEL: fshl_i8_const_fold_overshift_3:
  91 ; CHECK:       // %bb.0:
  92 ; CHECK-NEXT:    mov w0, wzr
  93 ; CHECK-NEXT:    ret
  94   %f = call i8 @llvm.fshl.i8(i8 0, i8 225, i8 8)
  95   ret i8 %f
  96 }
  97
  98 ; With constant shift amount, this is 'extr'.
  99
 100 define i32 @fshl_i32_const_shift(i32 %x, i32 %y) {
 101 ; CHECK-LABEL: fshl_i32_const_shift:
 102 ; CHECK:       // %bb.0:
 103 ; CHECK-NEXT:    extr w0, w0, w1, #23
 104 ; CHECK-NEXT:    ret
 105   %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 9)
 106   ret i32 %f
 107 }
 108
 109 ; Check modulo math on shift amount.
 110
 111 define i32 @fshl_i32_const_overshift(i32 %x, i32 %y) {
 112 ; CHECK-LABEL: fshl_i32_const_overshift:
 113 ; CHECK:       // %bb.0:
 114 ; CHECK-NEXT:    extr w0, w0, w1, #23
 115 ; CHECK-NEXT:    ret
 116   %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 41)
 117   ret i32 %f
 118 }
 119
 120 ; 64-bit should also work.
 121
 122 define i64 @fshl_i64_const_overshift(i64 %x, i64 %y) {
 123 ; CHECK-LABEL: fshl_i64_const_overshift:
 124 ; CHECK:       // %bb.0:
 125 ; CHECK-NEXT:    extr x0, x0, x1, #23
 126 ; CHECK-NEXT:    ret
 127   %f = call i64 @llvm.fshl.i64(i64 %x, i64 %y, i64 105)
 128   ret i64 %f
 129 }
 130
 131 ; This should work without any node-specific logic.
 132
 133 define i8 @fshl_i8_const_fold() {
 134 ; CHECK-LABEL: fshl_i8_const_fold:
 135 ; CHECK:       // %bb.0:
 136 ; CHECK-NEXT:    mov w0, #128
 137 ; CHECK-NEXT:    ret
 138   %f = call i8 @llvm.fshl.i8(i8 255, i8 0, i8 7)
 139   ret i8 %f
 140 }
 141
 142 ; Repeat everything for funnel shift right.
 143
 144 ; General case - all operands can be variables.
 145
 146 define i32 @fshr_i32(i32 %x, i32 %y, i32 %z) {
 147 ; CHECK-LABEL: fshr_i32:
 148 ; CHECK:       // %bb.0:
 149 ; CHECK-NEXT:    and w9, w2, #0x1f
 150 ; CHECK-NEXT:    neg w9, w9
 151 ; CHECK-NEXT:    lsr w8, w1, w2
 152 ; CHECK-NEXT:    lsl w9, w0, w9
 153 ; CHECK-NEXT:    orr w8, w9, w8
 154 ; CHECK-NEXT:    tst w2, #0x1f
 155 ; CHECK-NEXT:    csel w0, w1, w8, eq
 156 ; CHECK-NEXT:    ret
 157   %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 %z)
 158   ret i32 %f
 159 }
 160
 161 ; Verify that weird types are minimally supported.
 162 declare i37 @llvm.fshr.i37(i37, i37, i37)
 163 define i37 @fshr_i37(i37 %x, i37 %y, i37 %z) {
 164 ; CHECK-LABEL: fshr_i37:
 165 ; CHECK:       // %bb.0:
 166 ; CHECK-NEXT:    mov x10, #31883
 167 ; CHECK-NEXT:    movk x10, #3542, lsl #16
 168 ; CHECK-NEXT:    movk x10, #51366, lsl #32
 169 ; CHECK-NEXT:    and x9, x2, #0x1fffffffff
 170 ; CHECK-NEXT:    movk x10, #56679, lsl #48
 171 ; CHECK-NEXT:    umulh x10, x9, x10
 172 ; CHECK-NEXT:    mov w11, #37
 173 ; CHECK-NEXT:    lsr x10, x10, #5
 174 ; CHECK-NEXT:    msub x9, x10, x11, x9
 175 ; CHECK-NEXT:    and x8, x1, #0x1fffffffff
 176 ; CHECK-NEXT:    sub x10, x11, x9
 177 ; CHECK-NEXT:    lsr x8, x8, x9
 178 ; CHECK-NEXT:    lsl x10, x0, x10
 179 ; CHECK-NEXT:    orr x8, x10, x8
 180 ; CHECK-NEXT:    cmp x9, #0 // =0
 181 ; CHECK-NEXT:    csel x0, x1, x8, eq
 182 ; CHECK-NEXT:    ret
 183   %f = call i37 @llvm.fshr.i37(i37 %x, i37 %y, i37 %z)
 184   ret i37 %f
 185 }
 186
 187 ; extract(concat(0b1110000, 0b1111111) >> 2) = 0b0011111
 188
 189 declare i7 @llvm.fshr.i7(i7, i7, i7)
 190 define i7 @fshr_i7_const_fold() {
 191 ; CHECK-LABEL: fshr_i7_const_fold:
 192 ; CHECK:       // %bb.0:
 193 ; CHECK-NEXT:    mov w0, #31
 194 ; CHECK-NEXT:    ret
 195   %f = call i7 @llvm.fshr.i7(i7 112, i7 127, i7 2)
 196   ret i7 %f
 197 }
 198
 199 define i8 @fshr_i8_const_fold_overshift_1() {
 200 ; CHECK-LABEL: fshr_i8_const_fold_overshift_1:
 201 ; CHECK:       // %bb.0:
 202 ; CHECK-NEXT:    mov w0, #254
 203 ; CHECK-NEXT:    ret
 204   %f = call i8 @llvm.fshr.i8(i8 255, i8 0, i8 15)
 205   ret i8 %f
 206 }
 207
 208 define i8 @fshr_i8_const_fold_overshift_2() {
 209 ; CHECK-LABEL: fshr_i8_const_fold_overshift_2:
 210 ; CHECK:       // %bb.0:
 211 ; CHECK-NEXT:    mov w0, #225
 212 ; CHECK-NEXT:    ret
 213   %f = call i8 @llvm.fshr.i8(i8 15, i8 15, i8 11)
 214   ret i8 %f
 215 }
 216
 217 define i8 @fshr_i8_const_fold_overshift_3() {
 218 ; CHECK-LABEL: fshr_i8_const_fold_overshift_3:
 219 ; CHECK:       // %bb.0:
 220 ; CHECK-NEXT:    mov w0, #255
 221 ; CHECK-NEXT:    ret
 222   %f = call i8 @llvm.fshr.i8(i8 0, i8 255, i8 8)
 223   ret i8 %f
 224 }
 225
 226 ; With constant shift amount, this is 'extr'.
 227
 228 define i32 @fshr_i32_const_shift(i32 %x, i32 %y) {
 229 ; CHECK-LABEL: fshr_i32_const_shift:
 230 ; CHECK:       // %bb.0:
 231 ; CHECK-NEXT:    extr w0, w0, w1, #9
 232 ; CHECK-NEXT:    ret
 233   %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 9)
 234   ret i32 %f
 235 }
 236
 237 ; Check modulo math on shift amount. 41-32=9.
 238
 239 define i32 @fshr_i32_const_overshift(i32 %x, i32 %y) {
 240 ; CHECK-LABEL: fshr_i32_const_overshift:
 241 ; CHECK:       // %bb.0:
 242 ; CHECK-NEXT:    extr w0, w0, w1, #9
 243 ; CHECK-NEXT:    ret
 244   %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 41)
 245   ret i32 %f
 246 }
 247
 248 ; 64-bit should also work. 105-64 = 41.
 249
 250 define i64 @fshr_i64_const_overshift(i64 %x, i64 %y) {
 251 ; CHECK-LABEL: fshr_i64_const_overshift:
 252 ; CHECK:       // %bb.0:
 253 ; CHECK-NEXT:    extr x0, x0, x1, #41
 254 ; CHECK-NEXT:    ret
 255   %f = call i64 @llvm.fshr.i64(i64 %x, i64 %y, i64 105)
 256   ret i64 %f
 257 }
 258
 259 ; This should work without any node-specific logic.
 260
 261 define i8 @fshr_i8_const_fold() {
 262 ; CHECK-LABEL: fshr_i8_const_fold:
 263 ; CHECK:       // %bb.0:
 264 ; CHECK-NEXT:    mov w0, #254
 265 ; CHECK-NEXT:    ret
 266   %f = call i8 @llvm.fshr.i8(i8 255, i8 0, i8 7)
 267   ret i8 %f
 268 }
 269
 270 define i32 @fshl_i32_shift_by_bitwidth(i32 %x, i32 %y) {
 271 ; CHECK-LABEL: fshl_i32_shift_by_bitwidth:
 272 ; CHECK:       // %bb.0:
 273 ; CHECK-NEXT:    ret
 274   %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 32)
 275   ret i32 %f
 276 }
 277
 278 define i32 @fshr_i32_shift_by_bitwidth(i32 %x, i32 %y) {
 279 ; CHECK-LABEL: fshr_i32_shift_by_bitwidth:
 280 ; CHECK:       // %bb.0:
 281 ; CHECK-NEXT:    mov w0, w1
 282 ; CHECK-NEXT:    ret
 283   %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 32)
 284   ret i32 %f
 285 }
 286
 287 define <4 x i32> @fshl_v4i32_shift_by_bitwidth(<4 x i32> %x, <4 x i32> %y) {
 288 ; CHECK-LABEL: fshl_v4i32_shift_by_bitwidth:
 289 ; CHECK:       // %bb.0:
 290 ; CHECK-NEXT:    ret
 291   %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 32, i32 32, i32 32, i32 32>)
 292   ret <4 x i32> %f
 293 }
 294
 295 define <4 x i32> @fshr_v4i32_shift_by_bitwidth(<4 x i32> %x, <4 x i32> %y) {
 296 ; CHECK-LABEL: fshr_v4i32_shift_by_bitwidth:
 297 ; CHECK:       // %bb.0:
 298 ; CHECK-NEXT:    mov v0.16b, v1.16b
 299 ; CHECK-NEXT:    ret
 300   %f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 32, i32 32, i32 32, i32 32>)
 301   ret <4 x i32> %f
 302 }
 303