llvm/test/CodeGen/AArch64/funnel-shift.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
   2 ; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s --check-prefixes=CHECK,CHECK-SD
   3 ; RUN: llc < %s -mtriple=aarch64-- -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI
   4
   5 declare i8 @llvm.fshl.i8(i8, i8, i8)
   6 declare i16 @llvm.fshl.i16(i16, i16, i16)
   7 declare i32 @llvm.fshl.i32(i32, i32, i32)
   8 declare i64 @llvm.fshl.i64(i64, i64, i64)
   9 declare i128 @llvm.fshl.i128(i128, i128, i128)
  10 declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
  11
  12 declare i8 @llvm.fshr.i8(i8, i8, i8)
  13 declare i16 @llvm.fshr.i16(i16, i16, i16)
  14 declare i32 @llvm.fshr.i32(i32, i32, i32)
  15 declare i64 @llvm.fshr.i64(i64, i64, i64)
  16 declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
  17
  18 ; General case - all operands can be variables.
  19
  20 define i32 @fshl_i32(i32 %x, i32 %y, i32 %z) {
  21 ; CHECK-SD-LABEL: fshl_i32:
  22 ; CHECK-SD:       // %bb.0:
  23 ; CHECK-SD-NEXT:    lsr w8, w1, #1
  24 ; CHECK-SD-NEXT:    // kill: def $w2 killed $w2 def $x2
  25 ; CHECK-SD-NEXT:    mvn w9, w2
  26 ; CHECK-SD-NEXT:    lsl w10, w0, w2
  27 ; CHECK-SD-NEXT:    lsr w8, w8, w9
  28 ; CHECK-SD-NEXT:    orr w0, w10, w8
  29 ; CHECK-SD-NEXT:    ret
  30 ;
  31 ; CHECK-GI-LABEL: fshl_i32:
  32 ; CHECK-GI:       // %bb.0:
  33 ; CHECK-GI-NEXT:    mov w8, #31 // =0x1f
  34 ; CHECK-GI-NEXT:    lsr w9, w1, #1
  35 ; CHECK-GI-NEXT:    and w10, w2, #0x1f
  36 ; CHECK-GI-NEXT:    bic w8, w8, w2
  37 ; CHECK-GI-NEXT:    lsl w10, w0, w10
  38 ; CHECK-GI-NEXT:    lsr w8, w9, w8
  39 ; CHECK-GI-NEXT:    orr w0, w10, w8
  40 ; CHECK-GI-NEXT:    ret
  41   %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %z)
  42   ret i32 %f
  43 }
  44
  45 define i64 @fshl_i64(i64 %x, i64 %y, i64 %z) {
  46 ; CHECK-SD-LABEL: fshl_i64:
  47 ; CHECK-SD:       // %bb.0:
  48 ; CHECK-SD-NEXT:    lsr x8, x1, #1
  49 ; CHECK-SD-NEXT:    mvn w9, w2
  50 ; CHECK-SD-NEXT:    lsl x10, x0, x2
  51 ; CHECK-SD-NEXT:    lsr x8, x8, x9
  52 ; CHECK-SD-NEXT:    orr x0, x10, x8
  53 ; CHECK-SD-NEXT:    ret
  54 ;
  55 ; CHECK-GI-LABEL: fshl_i64:
  56 ; CHECK-GI:       // %bb.0:
  57 ; CHECK-GI-NEXT:    mov w8, #63 // =0x3f
  58 ; CHECK-GI-NEXT:    lsr x9, x1, #1
  59 ; CHECK-GI-NEXT:    and x10, x2, #0x3f
  60 ; CHECK-GI-NEXT:    bic x8, x8, x2
  61 ; CHECK-GI-NEXT:    lsl x10, x0, x10
  62 ; CHECK-GI-NEXT:    lsr x8, x9, x8
  63 ; CHECK-GI-NEXT:    orr x0, x10, x8
  64 ; CHECK-GI-NEXT:    ret
  65   %f = call i64 @llvm.fshl.i64(i64 %x, i64 %y, i64 %z)
  66   ret i64 %f
  67 }
  68
  69 define i128 @fshl_i128(i128 %x, i128 %y, i128 %z) nounwind {
  70 ; CHECK-SD-LABEL: fshl_i128:
  71 ; CHECK-SD:       // %bb.0:
  72 ; CHECK-SD-NEXT:    tst x4, #0x40
  73 ; CHECK-SD-NEXT:    mvn w11, w4
  74 ; CHECK-SD-NEXT:    csel x8, x3, x0, ne
  75 ; CHECK-SD-NEXT:    csel x9, x2, x3, ne
  76 ; CHECK-SD-NEXT:    csel x12, x0, x1, ne
  77 ; CHECK-SD-NEXT:    lsr x9, x9, #1
  78 ; CHECK-SD-NEXT:    lsr x10, x8, #1
  79 ; CHECK-SD-NEXT:    lsl x8, x8, x4
  80 ; CHECK-SD-NEXT:    lsl x12, x12, x4
  81 ; CHECK-SD-NEXT:    lsr x9, x9, x11
  82 ; CHECK-SD-NEXT:    lsr x10, x10, x11
  83 ; CHECK-SD-NEXT:    orr x0, x8, x9
  84 ; CHECK-SD-NEXT:    orr x1, x12, x10
  85 ; CHECK-SD-NEXT:    ret
  86 ;
  87 ; CHECK-GI-LABEL: fshl_i128:
  88 ; CHECK-GI:       // %bb.0:
  89 ; CHECK-GI-NEXT:    and x9, x4, #0x7f
  90 ; CHECK-GI-NEXT:    mov w10, #64 // =0x40
  91 ; CHECK-GI-NEXT:    lsl x14, x3, #63
  92 ; CHECK-GI-NEXT:    sub x12, x10, x9
  93 ; CHECK-GI-NEXT:    lsl x13, x1, x9
  94 ; CHECK-GI-NEXT:    mov w8, #127 // =0x7f
  95 ; CHECK-GI-NEXT:    lsr x12, x0, x12
  96 ; CHECK-GI-NEXT:    bic x8, x8, x4
  97 ; CHECK-GI-NEXT:    sub x15, x9, #64
  98 ; CHECK-GI-NEXT:    cmp x9, #64
  99 ; CHECK-GI-NEXT:    lsl x9, x0, x9
 100 ; CHECK-GI-NEXT:    lsl x15, x0, x15
 101 ; CHECK-GI-NEXT:    orr x12, x12, x13
 102 ; CHECK-GI-NEXT:    orr x13, x14, x2, lsr #1
 103 ; CHECK-GI-NEXT:    lsr x14, x3, #1
 104 ; CHECK-GI-NEXT:    sub x10, x10, x8
 105 ; CHECK-GI-NEXT:    sub x16, x8, #64
 106 ; CHECK-GI-NEXT:    csel x9, x9, xzr, lo
 107 ; CHECK-GI-NEXT:    lsr x17, x13, x8
 108 ; CHECK-GI-NEXT:    lsl x10, x14, x10
 109 ; CHECK-GI-NEXT:    csel x12, x12, x15, lo
 110 ; CHECK-GI-NEXT:    tst x4, #0x7f
 111 ; CHECK-GI-NEXT:    lsr x15, x14, x16
 112 ; CHECK-GI-NEXT:    mvn x11, x4
 113 ; CHECK-GI-NEXT:    csel x12, x1, x12, eq
 114 ; CHECK-GI-NEXT:    orr x10, x17, x10
 115 ; CHECK-GI-NEXT:    cmp x8, #64
 116 ; CHECK-GI-NEXT:    lsr x14, x14, x8
 117 ; CHECK-GI-NEXT:    csel x10, x10, x15, lo
 118 ; CHECK-GI-NEXT:    tst x11, #0x7f
 119 ; CHECK-GI-NEXT:    csel x10, x13, x10, eq
 120 ; CHECK-GI-NEXT:    cmp x8, #64
 121 ; CHECK-GI-NEXT:    csel x8, x14, xzr, lo
 122 ; CHECK-GI-NEXT:    orr x0, x9, x10
 123 ; CHECK-GI-NEXT:    orr x1, x12, x8
 124 ; CHECK-GI-NEXT:    ret
 125   %f = call i128 @llvm.fshl.i128(i128 %x, i128 %y, i128 %z)
 126   ret i128 %f
 127 }
 128
 129 ; Verify that weird types are minimally supported.
 130 declare i37 @llvm.fshl.i37(i37, i37, i37)
 131 define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) {
 132 ; CHECK-SD-LABEL: fshl_i37:
 133 ; CHECK-SD:       // %bb.0:
 134 ; CHECK-SD-NEXT:    mov x9, #46053 // =0xb3e5
 135 ; CHECK-SD-NEXT:    and x8, x2, #0x1fffffffff
 136 ; CHECK-SD-NEXT:    movk x9, #12398, lsl #16
 137 ; CHECK-SD-NEXT:    movk x9, #15941, lsl #32
 138 ; CHECK-SD-NEXT:    movk x9, #1771, lsl #48
 139 ; CHECK-SD-NEXT:    umulh x8, x8, x9
 140 ; CHECK-SD-NEXT:    mov w9, #37 // =0x25
 141 ; CHECK-SD-NEXT:    msub w8, w8, w9, w2
 142 ; CHECK-SD-NEXT:    ubfiz x9, x1, #26, #37
 143 ; CHECK-SD-NEXT:    mvn w10, w8
 144 ; CHECK-SD-NEXT:    lsl x8, x0, x8
 145 ; CHECK-SD-NEXT:    lsr x9, x9, x10
 146 ; CHECK-SD-NEXT:    orr x0, x8, x9
 147 ; CHECK-SD-NEXT:    ret
 148 ;
 149 ; CHECK-GI-LABEL: fshl_i37:
 150 ; CHECK-GI:       // %bb.0:
 151 ; CHECK-GI-NEXT:    mov w8, #37 // =0x25
 152 ; CHECK-GI-NEXT:    and x9, x2, #0x1fffffffff
 153 ; CHECK-GI-NEXT:    udiv x10, x9, x8
 154 ; CHECK-GI-NEXT:    msub x8, x10, x8, x9
 155 ; CHECK-GI-NEXT:    mov w9, #36 // =0x24
 156 ; CHECK-GI-NEXT:    ubfx x10, x1, #1, #36
 157 ; CHECK-GI-NEXT:    sub x9, x9, x8
 158 ; CHECK-GI-NEXT:    and x8, x8, #0x1fffffffff
 159 ; CHECK-GI-NEXT:    and x9, x9, #0x1fffffffff
 160 ; CHECK-GI-NEXT:    lsl x8, x0, x8
 161 ; CHECK-GI-NEXT:    lsr x9, x10, x9
 162 ; CHECK-GI-NEXT:    orr x0, x8, x9
 163 ; CHECK-GI-NEXT:    ret
 164   %f = call i37 @llvm.fshl.i37(i37 %x, i37 %y, i37 %z)
 165   ret i37 %f
 166 }
 167
 168 ; extract(concat(0b1110000, 0b1111111) << 2) = 0b1000011
 169
 170 declare i7 @llvm.fshl.i7(i7, i7, i7)
 171 define i7 @fshl_i7_const_fold() {
 172 ; CHECK-LABEL: fshl_i7_const_fold:
 173 ; CHECK:       // %bb.0:
 174 ; CHECK-NEXT:    mov w0, #67 // =0x43
 175 ; CHECK-NEXT:    ret
 176   %f = call i7 @llvm.fshl.i7(i7 112, i7 127, i7 2)
 177   ret i7 %f
 178 }
 179
 180 define i8 @fshl_i8_const_fold_overshift_1() {
 181 ; CHECK-SD-LABEL: fshl_i8_const_fold_overshift_1:
 182 ; CHECK-SD:       // %bb.0:
 183 ; CHECK-SD-NEXT:    mov w0, #128 // =0x80
 184 ; CHECK-SD-NEXT:    ret
 185 ;
 186 ; CHECK-GI-LABEL: fshl_i8_const_fold_overshift_1:
 187 ; CHECK-GI:       // %bb.0:
 188 ; CHECK-GI-NEXT:    mov w0, #-128 // =0xffffff80
 189 ; CHECK-GI-NEXT:    ret
 190   %f = call i8 @llvm.fshl.i8(i8 255, i8 0, i8 15)
 191   ret i8 %f
 192 }
 193
 194 define i8 @fshl_i8_const_fold_overshift_2() {
 195 ; CHECK-LABEL: fshl_i8_const_fold_overshift_2:
 196 ; CHECK:       // %bb.0:
 197 ; CHECK-NEXT:    mov w0, #120 // =0x78
 198 ; CHECK-NEXT:    ret
 199   %f = call i8 @llvm.fshl.i8(i8 15, i8 15, i8 11)
 200   ret i8 %f
 201 }
 202
 203 define i8 @fshl_i8_const_fold_overshift_3() {
 204 ; CHECK-LABEL: fshl_i8_const_fold_overshift_3:
 205 ; CHECK:       // %bb.0:
 206 ; CHECK-NEXT:    mov w0, wzr
 207 ; CHECK-NEXT:    ret
 208   %f = call i8 @llvm.fshl.i8(i8 0, i8 225, i8 8)
 209   ret i8 %f
 210 }
 211
 212 ; With constant shift amount, this is 'extr'.
 213
 214 define i32 @fshl_i32_const_shift(i32 %x, i32 %y) {
 215 ; CHECK-LABEL: fshl_i32_const_shift:
 216 ; CHECK:       // %bb.0:
 217 ; CHECK-NEXT:    extr w0, w0, w1, #23
 218 ; CHECK-NEXT:    ret
 219   %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 9)
 220   ret i32 %f
 221 }
 222
 223 ; Check modulo math on shift amount.
 224
 225 define i32 @fshl_i32_const_overshift(i32 %x, i32 %y) {
 226 ; CHECK-LABEL: fshl_i32_const_overshift:
 227 ; CHECK:       // %bb.0:
 228 ; CHECK-NEXT:    extr w0, w0, w1, #23
 229 ; CHECK-NEXT:    ret
 230   %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 41)
 231   ret i32 %f
 232 }
 233
 234 ; 64-bit should also work.
 235
 236 define i64 @fshl_i64_const_overshift(i64 %x, i64 %y) {
 237 ; CHECK-LABEL: fshl_i64_const_overshift:
 238 ; CHECK:       // %bb.0:
 239 ; CHECK-NEXT:    extr x0, x0, x1, #23
 240 ; CHECK-NEXT:    ret
 241   %f = call i64 @llvm.fshl.i64(i64 %x, i64 %y, i64 105)
 242   ret i64 %f
 243 }
 244
 245 ; This should work without any node-specific logic.
 246
 247 define i8 @fshl_i8_const_fold() {
 248 ; CHECK-SD-LABEL: fshl_i8_const_fold:
 249 ; CHECK-SD:       // %bb.0:
 250 ; CHECK-SD-NEXT:    mov w0, #128 // =0x80
 251 ; CHECK-SD-NEXT:    ret
 252 ;
 253 ; CHECK-GI-LABEL: fshl_i8_const_fold:
 254 ; CHECK-GI:       // %bb.0:
 255 ; CHECK-GI-NEXT:    mov w0, #-128 // =0xffffff80
 256 ; CHECK-GI-NEXT:    ret
 257   %f = call i8 @llvm.fshl.i8(i8 255, i8 0, i8 7)
 258   ret i8 %f
 259 }
 260
 261 ; Repeat everything for funnel shift right.
 262
 263 ; General case - all operands can be variables.
 264
 265 define i32 @fshr_i32(i32 %x, i32 %y, i32 %z) {
 266 ; CHECK-SD-LABEL: fshr_i32:
 267 ; CHECK-SD:       // %bb.0:
 268 ; CHECK-SD-NEXT:    lsl w8, w0, #1
 269 ; CHECK-SD-NEXT:    // kill: def $w2 killed $w2 def $x2
 270 ; CHECK-SD-NEXT:    mvn w9, w2
 271 ; CHECK-SD-NEXT:    lsr w10, w1, w2
 272 ; CHECK-SD-NEXT:    lsl w8, w8, w9
 273 ; CHECK-SD-NEXT:    orr w0, w8, w10
 274 ; CHECK-SD-NEXT:    ret
 275 ;
 276 ; CHECK-GI-LABEL: fshr_i32:
 277 ; CHECK-GI:       // %bb.0:
 278 ; CHECK-GI-NEXT:    mov w8, #31 // =0x1f
 279 ; CHECK-GI-NEXT:    lsl w9, w0, #1
 280 ; CHECK-GI-NEXT:    and w10, w2, #0x1f
 281 ; CHECK-GI-NEXT:    bic w8, w8, w2
 282 ; CHECK-GI-NEXT:    lsl w8, w9, w8
 283 ; CHECK-GI-NEXT:    lsr w9, w1, w10
 284 ; CHECK-GI-NEXT:    orr w0, w8, w9
 285 ; CHECK-GI-NEXT:    ret
 286   %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 %z)
 287   ret i32 %f
 288 }
 289
 290 define i64 @fshr_i64(i64 %x, i64 %y, i64 %z) {
 291 ; CHECK-SD-LABEL: fshr_i64:
 292 ; CHECK-SD:       // %bb.0:
 293 ; CHECK-SD-NEXT:    lsl x8, x0, #1
 294 ; CHECK-SD-NEXT:    mvn w9, w2
 295 ; CHECK-SD-NEXT:    lsr x10, x1, x2
 296 ; CHECK-SD-NEXT:    lsl x8, x8, x9
 297 ; CHECK-SD-NEXT:    orr x0, x8, x10
 298 ; CHECK-SD-NEXT:    ret
 299 ;
 300 ; CHECK-GI-LABEL: fshr_i64:
 301 ; CHECK-GI:       // %bb.0:
 302 ; CHECK-GI-NEXT:    mov w8, #63 // =0x3f
 303 ; CHECK-GI-NEXT:    lsl x9, x0, #1
 304 ; CHECK-GI-NEXT:    and x10, x2, #0x3f
 305 ; CHECK-GI-NEXT:    bic x8, x8, x2
 306 ; CHECK-GI-NEXT:    lsl x8, x9, x8
 307 ; CHECK-GI-NEXT:    lsr x9, x1, x10
 308 ; CHECK-GI-NEXT:    orr x0, x8, x9
 309 ; CHECK-GI-NEXT:    ret
 310   %f = call i64 @llvm.fshr.i64(i64 %x, i64 %y, i64 %z)
 311   ret i64 %f
 312 }
 313
 314 ; Verify that weird types are minimally supported.
 315 declare i37 @llvm.fshr.i37(i37, i37, i37)
 316 define i37 @fshr_i37(i37 %x, i37 %y, i37 %z) {
 317 ; CHECK-SD-LABEL: fshr_i37:
 318 ; CHECK-SD:       // %bb.0:
 319 ; CHECK-SD-NEXT:    mov x9, #46053 // =0xb3e5
 320 ; CHECK-SD-NEXT:    and x8, x2, #0x1fffffffff
 321 ; CHECK-SD-NEXT:    lsl x10, x0, #1
 322 ; CHECK-SD-NEXT:    movk x9, #12398, lsl #16
 323 ; CHECK-SD-NEXT:    movk x9, #15941, lsl #32
 324 ; CHECK-SD-NEXT:    movk x9, #1771, lsl #48
 325 ; CHECK-SD-NEXT:    umulh x8, x8, x9
 326 ; CHECK-SD-NEXT:    mov w9, #37 // =0x25
 327 ; CHECK-SD-NEXT:    msub w8, w8, w9, w2
 328 ; CHECK-SD-NEXT:    lsl x9, x1, #27
 329 ; CHECK-SD-NEXT:    add w8, w8, #27
 330 ; CHECK-SD-NEXT:    mvn w11, w8
 331 ; CHECK-SD-NEXT:    lsr x8, x9, x8
 332 ; CHECK-SD-NEXT:    lsl x9, x10, x11
 333 ; CHECK-SD-NEXT:    orr x0, x9, x8
 334 ; CHECK-SD-NEXT:    ret
 335 ;
 336 ; CHECK-GI-LABEL: fshr_i37:
 337 ; CHECK-GI:       // %bb.0:
 338 ; CHECK-GI-NEXT:    mov w8, #37 // =0x25
 339 ; CHECK-GI-NEXT:    and x9, x2, #0x1fffffffff
 340 ; CHECK-GI-NEXT:    and x11, x1, #0x1fffffffff
 341 ; CHECK-GI-NEXT:    udiv x10, x9, x8
 342 ; CHECK-GI-NEXT:    msub x8, x10, x8, x9
 343 ; CHECK-GI-NEXT:    mov w9, #36 // =0x24
 344 ; CHECK-GI-NEXT:    lsl x10, x0, #1
 345 ; CHECK-GI-NEXT:    sub x9, x9, x8
 346 ; CHECK-GI-NEXT:    and x8, x8, #0x1fffffffff
 347 ; CHECK-GI-NEXT:    and x9, x9, #0x1fffffffff
 348 ; CHECK-GI-NEXT:    lsr x8, x11, x8
 349 ; CHECK-GI-NEXT:    lsl x9, x10, x9
 350 ; CHECK-GI-NEXT:    orr x0, x9, x8
 351 ; CHECK-GI-NEXT:    ret
 352   %f = call i37 @llvm.fshr.i37(i37 %x, i37 %y, i37 %z)
 353   ret i37 %f
 354 }
 355
 356 ; extract(concat(0b1110000, 0b1111111) >> 2) = 0b0011111
 357
 358 declare i7 @llvm.fshr.i7(i7, i7, i7)
 359 define i7 @fshr_i7_const_fold() {
 360 ; CHECK-LABEL: fshr_i7_const_fold:
 361 ; CHECK:       // %bb.0:
 362 ; CHECK-NEXT:    mov w0, #31 // =0x1f
 363 ; CHECK-NEXT:    ret
 364   %f = call i7 @llvm.fshr.i7(i7 112, i7 127, i7 2)
 365   ret i7 %f
 366 }
 367
 368 define i8 @fshr_i8_const_fold_overshift_1() {
 369 ; CHECK-SD-LABEL: fshr_i8_const_fold_overshift_1:
 370 ; CHECK-SD:       // %bb.0:
 371 ; CHECK-SD-NEXT:    mov w0, #254 // =0xfe
 372 ; CHECK-SD-NEXT:    ret
 373 ;
 374 ; CHECK-GI-LABEL: fshr_i8_const_fold_overshift_1:
 375 ; CHECK-GI:       // %bb.0:
 376 ; CHECK-GI-NEXT:    mov w0, #-2 // =0xfffffffe
 377 ; CHECK-GI-NEXT:    ret
 378   %f = call i8 @llvm.fshr.i8(i8 255, i8 0, i8 15)
 379   ret i8 %f
 380 }
 381
 382 define i8 @fshr_i8_const_fold_overshift_2() {
 383 ; CHECK-SD-LABEL: fshr_i8_const_fold_overshift_2:
 384 ; CHECK-SD:       // %bb.0:
 385 ; CHECK-SD-NEXT:    mov w0, #225 // =0xe1
 386 ; CHECK-SD-NEXT:    ret
 387 ;
 388 ; CHECK-GI-LABEL: fshr_i8_const_fold_overshift_2:
 389 ; CHECK-GI:       // %bb.0:
 390 ; CHECK-GI-NEXT:    mov w0, #481 // =0x1e1
 391 ; CHECK-GI-NEXT:    ret
 392   %f = call i8 @llvm.fshr.i8(i8 15, i8 15, i8 11)
 393   ret i8 %f
 394 }
 395
 396 define i8 @fshr_i8_const_fold_overshift_3() {
 397 ; CHECK-LABEL: fshr_i8_const_fold_overshift_3:
 398 ; CHECK:       // %bb.0:
 399 ; CHECK-NEXT:    mov w0, #255 // =0xff
 400 ; CHECK-NEXT:    ret
 401   %f = call i8 @llvm.fshr.i8(i8 0, i8 255, i8 8)
 402   ret i8 %f
 403 }
 404
 405 ; With constant shift amount, this is 'extr'.
 406
 407 define i32 @fshr_i32_const_shift(i32 %x, i32 %y) {
 408 ; CHECK-LABEL: fshr_i32_const_shift:
 409 ; CHECK:       // %bb.0:
 410 ; CHECK-NEXT:    extr w0, w0, w1, #9
 411 ; CHECK-NEXT:    ret
 412   %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 9)
 413   ret i32 %f
 414 }
 415
 416 ; Check modulo math on shift amount. 41-32=9.
 417
 418 define i32 @fshr_i32_const_overshift(i32 %x, i32 %y) {
 419 ; CHECK-LABEL: fshr_i32_const_overshift:
 420 ; CHECK:       // %bb.0:
 421 ; CHECK-NEXT:    extr w0, w0, w1, #9
 422 ; CHECK-NEXT:    ret
 423   %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 41)
 424   ret i32 %f
 425 }
 426
 427 ; 64-bit should also work. 105-64 = 41.
 428
 429 define i64 @fshr_i64_const_overshift(i64 %x, i64 %y) {
 430 ; CHECK-LABEL: fshr_i64_const_overshift:
 431 ; CHECK:       // %bb.0:
 432 ; CHECK-NEXT:    extr x0, x0, x1, #41
 433 ; CHECK-NEXT:    ret
 434   %f = call i64 @llvm.fshr.i64(i64 %x, i64 %y, i64 105)
 435   ret i64 %f
 436 }
 437
 438 ; This should work without any node-specific logic.
 439
 440 define i8 @fshr_i8_const_fold() {
 441 ; CHECK-SD-LABEL: fshr_i8_const_fold:
 442 ; CHECK-SD:       // %bb.0:
 443 ; CHECK-SD-NEXT:    mov w0, #254 // =0xfe
 444 ; CHECK-SD-NEXT:    ret
 445 ;
 446 ; CHECK-GI-LABEL: fshr_i8_const_fold:
 447 ; CHECK-GI:       // %bb.0:
 448 ; CHECK-GI-NEXT:    mov w0, #-2 // =0xfffffffe
 449 ; CHECK-GI-NEXT:    ret
 450   %f = call i8 @llvm.fshr.i8(i8 255, i8 0, i8 7)
 451   ret i8 %f
 452 }
 453
 454 define i32 @fshl_i32_shift_by_bitwidth(i32 %x, i32 %y) {
 455 ; CHECK-LABEL: fshl_i32_shift_by_bitwidth:
 456 ; CHECK:       // %bb.0:
 457 ; CHECK-NEXT:    ret
 458   %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 32)
 459   ret i32 %f
 460 }
 461
 462 define i32 @fshr_i32_shift_by_bitwidth(i32 %x, i32 %y) {
 463 ; CHECK-LABEL: fshr_i32_shift_by_bitwidth:
 464 ; CHECK:       // %bb.0:
 465 ; CHECK-NEXT:    mov w0, w1
 466 ; CHECK-NEXT:    ret
 467   %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 32)
 468   ret i32 %f
 469 }
 470
 471 define <4 x i32> @fshl_v4i32_shift_by_bitwidth(<4 x i32> %x, <4 x i32> %y) {
 472 ; CHECK-LABEL: fshl_v4i32_shift_by_bitwidth:
 473 ; CHECK:       // %bb.0:
 474 ; CHECK-NEXT:    ret
 475   %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 32, i32 32, i32 32, i32 32>)
 476   ret <4 x i32> %f
 477 }
 478
 479 define <4 x i32> @fshr_v4i32_shift_by_bitwidth(<4 x i32> %x, <4 x i32> %y) {
 480 ; CHECK-LABEL: fshr_v4i32_shift_by_bitwidth:
 481 ; CHECK:       // %bb.0:
 482 ; CHECK-NEXT:    mov v0.16b, v1.16b
 483 ; CHECK-NEXT:    ret
 484   %f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 32, i32 32, i32 32, i32 32>)
 485   ret <4 x i32> %f
 486 }
 487
 488 define i32 @or_shl_fshl(i32 %x, i32 %y, i32 %s) {
 489 ; CHECK-SD-LABEL: or_shl_fshl:
 490 ; CHECK-SD:       // %bb.0:
 491 ; CHECK-SD-NEXT:    mov w8, w2
 492 ; CHECK-SD-NEXT:    lsr w9, w1, #1
 493 ; CHECK-SD-NEXT:    lsl w10, w1, w2
 494 ; CHECK-SD-NEXT:    mvn w11, w2
 495 ; CHECK-SD-NEXT:    lsl w8, w0, w8
 496 ; CHECK-SD-NEXT:    lsr w9, w9, w11
 497 ; CHECK-SD-NEXT:    orr w8, w8, w10
 498 ; CHECK-SD-NEXT:    orr w0, w8, w9
 499 ; CHECK-SD-NEXT:    ret
 500 ;
 501 ; CHECK-GI-LABEL: or_shl_fshl:
 502 ; CHECK-GI:       // %bb.0:
 503 ; CHECK-GI-NEXT:    mov w8, #31 // =0x1f
 504 ; CHECK-GI-NEXT:    and w9, w2, #0x1f
 505 ; CHECK-GI-NEXT:    lsr w10, w1, #1
 506 ; CHECK-GI-NEXT:    lsl w11, w1, w2
 507 ; CHECK-GI-NEXT:    bic w8, w8, w2
 508 ; CHECK-GI-NEXT:    lsl w9, w0, w9
 509 ; CHECK-GI-NEXT:    lsr w8, w10, w8
 510 ; CHECK-GI-NEXT:    orr w9, w9, w11
 511 ; CHECK-GI-NEXT:    orr w0, w9, w8
 512 ; CHECK-GI-NEXT:    ret
 513   %shy = shl i32 %y, %s
 514   %fun = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %s)
 515   %or = or i32 %fun, %shy
 516   ret i32 %or
 517 }
 518
 519 define i32 @or_shl_rotl(i32 %x, i32 %y, i32 %s) {
 520 ; CHECK-LABEL: or_shl_rotl:
 521 ; CHECK:       // %bb.0:
 522 ; CHECK-NEXT:    neg w8, w2
 523 ; CHECK-NEXT:    lsl w9, w0, w2
 524 ; CHECK-NEXT:    ror w8, w1, w8
 525 ; CHECK-NEXT:    orr w0, w8, w9
 526 ; CHECK-NEXT:    ret
 527   %shx = shl i32 %x, %s
 528   %rot = call i32 @llvm.fshl.i32(i32 %y, i32 %y, i32 %s)
 529   %or = or i32 %rot, %shx
 530   ret i32 %or
 531 }
 532
 533 define i32 @or_shl_fshl_commute(i32 %x, i32 %y, i32 %s) {
 534 ; CHECK-SD-LABEL: or_shl_fshl_commute:
 535 ; CHECK-SD:       // %bb.0:
 536 ; CHECK-SD-NEXT:    mov w8, w2
 537 ; CHECK-SD-NEXT:    lsr w9, w1, #1
 538 ; CHECK-SD-NEXT:    lsl w10, w1, w2
 539 ; CHECK-SD-NEXT:    mvn w11, w2
 540 ; CHECK-SD-NEXT:    lsl w8, w0, w8
 541 ; CHECK-SD-NEXT:    lsr w9, w9, w11
 542 ; CHECK-SD-NEXT:    orr w8, w10, w8
 543 ; CHECK-SD-NEXT:    orr w0, w8, w9
 544 ; CHECK-SD-NEXT:    ret
 545 ;
 546 ; CHECK-GI-LABEL: or_shl_fshl_commute:
 547 ; CHECK-GI:       // %bb.0:
 548 ; CHECK-GI-NEXT:    mov w8, #31 // =0x1f
 549 ; CHECK-GI-NEXT:    and w9, w2, #0x1f
 550 ; CHECK-GI-NEXT:    lsr w10, w1, #1
 551 ; CHECK-GI-NEXT:    lsl w11, w1, w2
 552 ; CHECK-GI-NEXT:    bic w8, w8, w2
 553 ; CHECK-GI-NEXT:    lsl w9, w0, w9
 554 ; CHECK-GI-NEXT:    lsr w8, w10, w8
 555 ; CHECK-GI-NEXT:    orr w9, w11, w9
 556 ; CHECK-GI-NEXT:    orr w0, w9, w8
 557 ; CHECK-GI-NEXT:    ret
 558   %shy = shl i32 %y, %s
 559   %fun = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %s)
 560   %or = or i32 %shy, %fun
 561   ret i32 %or
 562 }
 563
 564 define i32 @or_shl_rotl_commute(i32 %x, i32 %y, i32 %s) {
 565 ; CHECK-LABEL: or_shl_rotl_commute:
 566 ; CHECK:       // %bb.0:
 567 ; CHECK-NEXT:    neg w8, w2
 568 ; CHECK-NEXT:    lsl w9, w0, w2
 569 ; CHECK-NEXT:    ror w8, w1, w8
 570 ; CHECK-NEXT:    orr w0, w9, w8
 571 ; CHECK-NEXT:    ret
 572   %shx = shl i32 %x, %s
 573   %rot = call i32 @llvm.fshl.i32(i32 %y, i32 %y, i32 %s)
 574   %or = or i32 %shx, %rot
 575   ret i32 %or
 576 }
 577
 578 define i32 @or_lshr_fshr(i32 %x, i32 %y, i32 %s) {
 579 ; CHECK-SD-LABEL: or_lshr_fshr:
 580 ; CHECK-SD:       // %bb.0:
 581 ; CHECK-SD-NEXT:    mov w8, w2
 582 ; CHECK-SD-NEXT:    lsl w9, w1, #1
 583 ; CHECK-SD-NEXT:    lsr w10, w1, w2
 584 ; CHECK-SD-NEXT:    lsr w8, w0, w8
 585 ; CHECK-SD-NEXT:    mvn w11, w2
 586 ; CHECK-SD-NEXT:    lsl w9, w9, w11
 587 ; CHECK-SD-NEXT:    orr w8, w8, w10
 588 ; CHECK-SD-NEXT:    orr w0, w9, w8
 589 ; CHECK-SD-NEXT:    ret
 590 ;
 591 ; CHECK-GI-LABEL: or_lshr_fshr:
 592 ; CHECK-GI:       // %bb.0:
 593 ; CHECK-GI-NEXT:    mov w8, #31 // =0x1f
 594 ; CHECK-GI-NEXT:    and w9, w2, #0x1f
 595 ; CHECK-GI-NEXT:    lsl w10, w1, #1
 596 ; CHECK-GI-NEXT:    lsr w11, w1, w2
 597 ; CHECK-GI-NEXT:    bic w8, w8, w2
 598 ; CHECK-GI-NEXT:    lsr w9, w0, w9
 599 ; CHECK-GI-NEXT:    lsl w8, w10, w8
 600 ; CHECK-GI-NEXT:    orr w9, w9, w11
 601 ; CHECK-GI-NEXT:    orr w0, w8, w9
 602 ; CHECK-GI-NEXT:    ret
 603   %shy = lshr i32 %y, %s
 604   %fun = call i32 @llvm.fshr.i32(i32 %y, i32 %x, i32 %s)
 605   %or = or i32 %fun, %shy
 606   ret i32 %or
 607 }
 608
 609 define i32 @or_lshr_rotr(i32 %x, i32 %y, i32 %s) {
 610 ; CHECK-LABEL: or_lshr_rotr:
 611 ; CHECK:       // %bb.0:
 612 ; CHECK-NEXT:    lsr w8, w0, w2
 613 ; CHECK-NEXT:    ror w9, w1, w2
 614 ; CHECK-NEXT:    orr w0, w9, w8
 615 ; CHECK-NEXT:    ret
 616   %shx = lshr i32 %x, %s
 617   %rot = call i32 @llvm.fshr.i32(i32 %y, i32 %y, i32 %s)
 618   %or = or i32 %rot, %shx
 619   ret i32 %or
 620 }
 621
 622 define i32 @or_lshr_fshr_commute(i32 %x, i32 %y, i32 %s) {
 623 ; CHECK-SD-LABEL: or_lshr_fshr_commute:
 624 ; CHECK-SD:       // %bb.0:
 625 ; CHECK-SD-NEXT:    mov w8, w2
 626 ; CHECK-SD-NEXT:    lsl w9, w1, #1
 627 ; CHECK-SD-NEXT:    lsr w10, w1, w2
 628 ; CHECK-SD-NEXT:    lsr w8, w0, w8
 629 ; CHECK-SD-NEXT:    mvn w11, w2
 630 ; CHECK-SD-NEXT:    lsl w9, w9, w11
 631 ; CHECK-SD-NEXT:    orr w8, w10, w8
 632 ; CHECK-SD-NEXT:    orr w0, w8, w9
 633 ; CHECK-SD-NEXT:    ret
 634 ;
 635 ; CHECK-GI-LABEL: or_lshr_fshr_commute:
 636 ; CHECK-GI:       // %bb.0:
 637 ; CHECK-GI-NEXT:    mov w8, #31 // =0x1f
 638 ; CHECK-GI-NEXT:    and w9, w2, #0x1f
 639 ; CHECK-GI-NEXT:    lsl w10, w1, #1
 640 ; CHECK-GI-NEXT:    lsr w11, w1, w2
 641 ; CHECK-GI-NEXT:    bic w8, w8, w2
 642 ; CHECK-GI-NEXT:    lsr w9, w0, w9
 643 ; CHECK-GI-NEXT:    lsl w8, w10, w8
 644 ; CHECK-GI-NEXT:    orr w9, w11, w9
 645 ; CHECK-GI-NEXT:    orr w0, w9, w8
 646 ; CHECK-GI-NEXT:    ret
 647   %shy = lshr i32 %y, %s
 648   %fun = call i32 @llvm.fshr.i32(i32 %y, i32 %x, i32 %s)
 649   %or = or i32 %shy, %fun
 650   ret i32 %or
 651 }
 652
 653 define i32 @or_lshr_rotr_commute(i32 %x, i32 %y, i32 %s) {
 654 ; CHECK-LABEL: or_lshr_rotr_commute:
 655 ; CHECK:       // %bb.0:
 656 ; CHECK-NEXT:    lsr w8, w0, w2
 657 ; CHECK-NEXT:    ror w9, w1, w2
 658 ; CHECK-NEXT:    orr w0, w8, w9
 659 ; CHECK-NEXT:    ret
 660   %shx = lshr i32 %x, %s
 661   %rot = call i32 @llvm.fshr.i32(i32 %y, i32 %y, i32 %s)
 662   %or = or i32 %shx, %rot
 663   ret i32 %or
 664 }
 665
 666 define i32 @or_shl_fshl_simplify(i32 %x, i32 %y, i32 %s) {
 667 ; CHECK-SD-LABEL: or_shl_fshl_simplify:
 668 ; CHECK-SD:       // %bb.0:
 669 ; CHECK-SD-NEXT:    lsr w8, w0, #1
 670 ; CHECK-SD-NEXT:    // kill: def $w2 killed $w2 def $x2
 671 ; CHECK-SD-NEXT:    mvn w9, w2
 672 ; CHECK-SD-NEXT:    lsl w10, w1, w2
 673 ; CHECK-SD-NEXT:    lsr w8, w8, w9
 674 ; CHECK-SD-NEXT:    orr w0, w10, w8
 675 ; CHECK-SD-NEXT:    ret
 676 ;
 677 ; CHECK-GI-LABEL: or_shl_fshl_simplify:
 678 ; CHECK-GI:       // %bb.0:
 679 ; CHECK-GI-NEXT:    mov w8, #31 // =0x1f
 680 ; CHECK-GI-NEXT:    and w9, w2, #0x1f
 681 ; CHECK-GI-NEXT:    lsr w10, w0, #1
 682 ; CHECK-GI-NEXT:    lsl w11, w1, w2
 683 ; CHECK-GI-NEXT:    bic w8, w8, w2
 684 ; CHECK-GI-NEXT:    lsl w9, w1, w9
 685 ; CHECK-GI-NEXT:    lsr w8, w10, w8
 686 ; CHECK-GI-NEXT:    orr w9, w9, w11
 687 ; CHECK-GI-NEXT:    orr w0, w9, w8
 688 ; CHECK-GI-NEXT:    ret
 689   %shy = shl i32 %y, %s
 690   %fun = call i32 @llvm.fshl.i32(i32 %y, i32 %x, i32 %s)
 691   %or = or i32 %fun, %shy
 692   ret i32 %or
 693 }
 694
 695 define i32 @or_lshr_fshr_simplify(i32 %x, i32 %y, i32 %s) {
 696 ; CHECK-SD-LABEL: or_lshr_fshr_simplify:
 697 ; CHECK-SD:       // %bb.0:
 698 ; CHECK-SD-NEXT:    lsl w8, w0, #1
 699 ; CHECK-SD-NEXT:    // kill: def $w2 killed $w2 def $x2
 700 ; CHECK-SD-NEXT:    mvn w9, w2
 701 ; CHECK-SD-NEXT:    lsr w10, w1, w2
 702 ; CHECK-SD-NEXT:    lsl w8, w8, w9
 703 ; CHECK-SD-NEXT:    orr w0, w8, w10
 704 ; CHECK-SD-NEXT:    ret
 705 ;
 706 ; CHECK-GI-LABEL: or_lshr_fshr_simplify:
 707 ; CHECK-GI:       // %bb.0:
 708 ; CHECK-GI-NEXT:    mov w8, #31 // =0x1f
 709 ; CHECK-GI-NEXT:    and w9, w2, #0x1f
 710 ; CHECK-GI-NEXT:    lsl w10, w0, #1
 711 ; CHECK-GI-NEXT:    lsr w11, w1, w2
 712 ; CHECK-GI-NEXT:    bic w8, w8, w2
 713 ; CHECK-GI-NEXT:    lsr w9, w1, w9
 714 ; CHECK-GI-NEXT:    lsl w8, w10, w8
 715 ; CHECK-GI-NEXT:    orr w9, w11, w9
 716 ; CHECK-GI-NEXT:    orr w0, w9, w8
 717 ; CHECK-GI-NEXT:    ret
 718   %shy = lshr i32 %y, %s
 719   %fun = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 %s)
 720   %or = or i32 %shy, %fun
 721   ret i32 %or
 722 }