llvm/test/CodeGen/AArch64/funnel-shift.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
   2 ; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s
   3
   4 declare i8 @llvm.fshl.i8(i8, i8, i8)
   5 declare i16 @llvm.fshl.i16(i16, i16, i16)
   6 declare i32 @llvm.fshl.i32(i32, i32, i32)
   7 declare i64 @llvm.fshl.i64(i64, i64, i64)
   8 declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
   9
  10 declare i8 @llvm.fshr.i8(i8, i8, i8)
  11 declare i16 @llvm.fshr.i16(i16, i16, i16)
  12 declare i32 @llvm.fshr.i32(i32, i32, i32)
  13 declare i64 @llvm.fshr.i64(i64, i64, i64)
  14 declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
  15
  16 ; General case - all operands can be variables.
  17
  18 define i32 @fshl_i32(i32 %x, i32 %y, i32 %z) {
  19 ; CHECK-LABEL: fshl_i32:
  20 ; CHECK:       // %bb.0:
  21 ; CHECK-NEXT:    // kill: def $w2 killed $w2 def $x2
  22 ; CHECK-NEXT:    mvn w9, w2
  23 ; CHECK-NEXT:    lsr w10, w1, #1
  24 ; CHECK-NEXT:    lsl w8, w0, w2
  25 ; CHECK-NEXT:    lsr w9, w10, w9
  26 ; CHECK-NEXT:    orr w0, w8, w9
  27 ; CHECK-NEXT:    ret
  28   %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %z)
  29   ret i32 %f
  30 }
  31
  32 define i64 @fshl_i64(i64 %x, i64 %y, i64 %z) {
  33 ; CHECK-LABEL: fshl_i64:
  34 ; CHECK:       // %bb.0:
  35 ; CHECK-NEXT:    mvn w9, w2
  36 ; CHECK-NEXT:    lsr x10, x1, #1
  37 ; CHECK-NEXT:    lsl x8, x0, x2
  38 ; CHECK-NEXT:    lsr x9, x10, x9
  39 ; CHECK-NEXT:    orr x0, x8, x9
  40 ; CHECK-NEXT:    ret
  41   %f = call i64 @llvm.fshl.i64(i64 %x, i64 %y, i64 %z)
  42   ret i64 %f
  43 }
  44
  45 ; Verify that weird types are minimally supported.
  46 declare i37 @llvm.fshl.i37(i37, i37, i37)
  47 define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) {
  48 ; CHECK-LABEL: fshl_i37:
  49 ; CHECK:       // %bb.0:
  50 ; CHECK-NEXT:    mov x8, #31883
  51 ; CHECK-NEXT:    movk x8, #3542, lsl #16
  52 ; CHECK-NEXT:    movk x8, #51366, lsl #32
  53 ; CHECK-NEXT:    movk x8, #56679, lsl #48
  54 ; CHECK-NEXT:    umulh x8, x2, x8
  55 ; CHECK-NEXT:    mov w9, #37
  56 ; CHECK-NEXT:    ubfx x8, x8, #5, #27
  57 ; CHECK-NEXT:    msub w8, w8, w9, w2
  58 ; CHECK-NEXT:    lsl x9, x0, x8
  59 ; CHECK-NEXT:    mvn w8, w8
  60 ; CHECK-NEXT:    ubfiz x10, x1, #26, #37
  61 ; CHECK-NEXT:    lsr x8, x10, x8
  62 ; CHECK-NEXT:    orr x0, x9, x8
  63 ; CHECK-NEXT:    ret
  64   %f = call i37 @llvm.fshl.i37(i37 %x, i37 %y, i37 %z)
  65   ret i37 %f
  66 }
  67
  68 ; extract(concat(0b1110000, 0b1111111) << 2) = 0b1000011
  69
  70 declare i7 @llvm.fshl.i7(i7, i7, i7)
  71 define i7 @fshl_i7_const_fold() {
  72 ; CHECK-LABEL: fshl_i7_const_fold:
  73 ; CHECK:       // %bb.0:
  74 ; CHECK-NEXT:    mov w0, #67
  75 ; CHECK-NEXT:    ret
  76   %f = call i7 @llvm.fshl.i7(i7 112, i7 127, i7 2)
  77   ret i7 %f
  78 }
  79
  80 define i8 @fshl_i8_const_fold_overshift_1() {
  81 ; CHECK-LABEL: fshl_i8_const_fold_overshift_1:
  82 ; CHECK:       // %bb.0:
  83 ; CHECK-NEXT:    mov w0, #128
  84 ; CHECK-NEXT:    ret
  85   %f = call i8 @llvm.fshl.i8(i8 255, i8 0, i8 15)
  86   ret i8 %f
  87 }
  88
  89 define i8 @fshl_i8_const_fold_overshift_2() {
  90 ; CHECK-LABEL: fshl_i8_const_fold_overshift_2:
  91 ; CHECK:       // %bb.0:
  92 ; CHECK-NEXT:    mov w0, #120
  93 ; CHECK-NEXT:    ret
  94   %f = call i8 @llvm.fshl.i8(i8 15, i8 15, i8 11)
  95   ret i8 %f
  96 }
  97
  98 define i8 @fshl_i8_const_fold_overshift_3() {
  99 ; CHECK-LABEL: fshl_i8_const_fold_overshift_3:
 100 ; CHECK:       // %bb.0:
 101 ; CHECK-NEXT:    mov w0, wzr
 102 ; CHECK-NEXT:    ret
 103   %f = call i8 @llvm.fshl.i8(i8 0, i8 225, i8 8)
 104   ret i8 %f
 105 }
 106
 107 ; With constant shift amount, this is 'extr'.
 108
 109 define i32 @fshl_i32_const_shift(i32 %x, i32 %y) {
 110 ; CHECK-LABEL: fshl_i32_const_shift:
 111 ; CHECK:       // %bb.0:
 112 ; CHECK-NEXT:    extr w0, w0, w1, #23
 113 ; CHECK-NEXT:    ret
 114   %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 9)
 115   ret i32 %f
 116 }
 117
 118 ; Check modulo math on shift amount.
 119
 120 define i32 @fshl_i32_const_overshift(i32 %x, i32 %y) {
 121 ; CHECK-LABEL: fshl_i32_const_overshift:
 122 ; CHECK:       // %bb.0:
 123 ; CHECK-NEXT:    extr w0, w0, w1, #23
 124 ; CHECK-NEXT:    ret
 125   %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 41)
 126   ret i32 %f
 127 }
 128
 129 ; 64-bit should also work.
 130
 131 define i64 @fshl_i64_const_overshift(i64 %x, i64 %y) {
 132 ; CHECK-LABEL: fshl_i64_const_overshift:
 133 ; CHECK:       // %bb.0:
 134 ; CHECK-NEXT:    extr x0, x0, x1, #23
 135 ; CHECK-NEXT:    ret
 136   %f = call i64 @llvm.fshl.i64(i64 %x, i64 %y, i64 105)
 137   ret i64 %f
 138 }
 139
 140 ; This should work without any node-specific logic.
 141
 142 define i8 @fshl_i8_const_fold() {
 143 ; CHECK-LABEL: fshl_i8_const_fold:
 144 ; CHECK:       // %bb.0:
 145 ; CHECK-NEXT:    mov w0, #128
 146 ; CHECK-NEXT:    ret
 147   %f = call i8 @llvm.fshl.i8(i8 255, i8 0, i8 7)
 148   ret i8 %f
 149 }
 150
 151 ; Repeat everything for funnel shift right.
 152
 153 ; General case - all operands can be variables.
 154
 155 define i32 @fshr_i32(i32 %x, i32 %y, i32 %z) {
 156 ; CHECK-LABEL: fshr_i32:
 157 ; CHECK:       // %bb.0:
 158 ; CHECK-NEXT:    // kill: def $w2 killed $w2 def $x2
 159 ; CHECK-NEXT:    mvn w9, w2
 160 ; CHECK-NEXT:    lsl w10, w0, #1
 161 ; CHECK-NEXT:    lsr w8, w1, w2
 162 ; CHECK-NEXT:    lsl w9, w10, w9
 163 ; CHECK-NEXT:    orr w0, w9, w8
 164 ; CHECK-NEXT:    ret
 165   %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 %z)
 166   ret i32 %f
 167 }
 168
 169 define i64 @fshr_i64(i64 %x, i64 %y, i64 %z) {
 170 ; CHECK-LABEL: fshr_i64:
 171 ; CHECK:       // %bb.0:
 172 ; CHECK-NEXT:    mvn w9, w2
 173 ; CHECK-NEXT:    lsl x10, x0, #1
 174 ; CHECK-NEXT:    lsr x8, x1, x2
 175 ; CHECK-NEXT:    lsl x9, x10, x9
 176 ; CHECK-NEXT:    orr x0, x9, x8
 177 ; CHECK-NEXT:    ret
 178   %f = call i64 @llvm.fshr.i64(i64 %x, i64 %y, i64 %z)
 179   ret i64 %f
 180 }
 181
 182 ; Verify that weird types are minimally supported.
 183 declare i37 @llvm.fshr.i37(i37, i37, i37)
 184 define i37 @fshr_i37(i37 %x, i37 %y, i37 %z) {
 185 ; CHECK-LABEL: fshr_i37:
 186 ; CHECK:       // %bb.0:
 187 ; CHECK-NEXT:    mov x8, #31883
 188 ; CHECK-NEXT:    movk x8, #3542, lsl #16
 189 ; CHECK-NEXT:    movk x8, #51366, lsl #32
 190 ; CHECK-NEXT:    movk x8, #56679, lsl #48
 191 ; CHECK-NEXT:    umulh x8, x2, x8
 192 ; CHECK-NEXT:    mov w9, #37
 193 ; CHECK-NEXT:    lsr x8, x8, #5
 194 ; CHECK-NEXT:    msub w8, w8, w9, w2
 195 ; CHECK-NEXT:    lsl x10, x1, #27
 196 ; CHECK-NEXT:    add w8, w8, #27
 197 ; CHECK-NEXT:    lsr x9, x10, x8
 198 ; CHECK-NEXT:    mvn w8, w8
 199 ; CHECK-NEXT:    lsl x10, x0, #1
 200 ; CHECK-NEXT:    lsl x8, x10, x8
 201 ; CHECK-NEXT:    orr x0, x8, x9
 202 ; CHECK-NEXT:    ret
 203   %f = call i37 @llvm.fshr.i37(i37 %x, i37 %y, i37 %z)
 204   ret i37 %f
 205 }
 206
 207 ; extract(concat(0b1110000, 0b1111111) >> 2) = 0b0011111
 208
 209 declare i7 @llvm.fshr.i7(i7, i7, i7)
 210 define i7 @fshr_i7_const_fold() {
 211 ; CHECK-LABEL: fshr_i7_const_fold:
 212 ; CHECK:       // %bb.0:
 213 ; CHECK-NEXT:    mov w0, #31
 214 ; CHECK-NEXT:    ret
 215   %f = call i7 @llvm.fshr.i7(i7 112, i7 127, i7 2)
 216   ret i7 %f
 217 }
 218
 219 define i8 @fshr_i8_const_fold_overshift_1() {
 220 ; CHECK-LABEL: fshr_i8_const_fold_overshift_1:
 221 ; CHECK:       // %bb.0:
 222 ; CHECK-NEXT:    mov w0, #254
 223 ; CHECK-NEXT:    ret
 224   %f = call i8 @llvm.fshr.i8(i8 255, i8 0, i8 15)
 225   ret i8 %f
 226 }
 227
 228 define i8 @fshr_i8_const_fold_overshift_2() {
 229 ; CHECK-LABEL: fshr_i8_const_fold_overshift_2:
 230 ; CHECK:       // %bb.0:
 231 ; CHECK-NEXT:    mov w0, #225
 232 ; CHECK-NEXT:    ret
 233   %f = call i8 @llvm.fshr.i8(i8 15, i8 15, i8 11)
 234   ret i8 %f
 235 }
 236
 237 define i8 @fshr_i8_const_fold_overshift_3() {
 238 ; CHECK-LABEL: fshr_i8_const_fold_overshift_3:
 239 ; CHECK:       // %bb.0:
 240 ; CHECK-NEXT:    mov w0, #255
 241 ; CHECK-NEXT:    ret
 242   %f = call i8 @llvm.fshr.i8(i8 0, i8 255, i8 8)
 243   ret i8 %f
 244 }
 245
 246 ; With constant shift amount, this is 'extr'.
 247
 248 define i32 @fshr_i32_const_shift(i32 %x, i32 %y) {
 249 ; CHECK-LABEL: fshr_i32_const_shift:
 250 ; CHECK:       // %bb.0:
 251 ; CHECK-NEXT:    extr w0, w0, w1, #9
 252 ; CHECK-NEXT:    ret
 253   %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 9)
 254   ret i32 %f
 255 }
 256
 257 ; Check modulo math on shift amount. 41-32=9.
 258
 259 define i32 @fshr_i32_const_overshift(i32 %x, i32 %y) {
 260 ; CHECK-LABEL: fshr_i32_const_overshift:
 261 ; CHECK:       // %bb.0:
 262 ; CHECK-NEXT:    extr w0, w0, w1, #9
 263 ; CHECK-NEXT:    ret
 264   %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 41)
 265   ret i32 %f
 266 }
 267
 268 ; 64-bit should also work. 105-64 = 41.
 269
 270 define i64 @fshr_i64_const_overshift(i64 %x, i64 %y) {
 271 ; CHECK-LABEL: fshr_i64_const_overshift:
 272 ; CHECK:       // %bb.0:
 273 ; CHECK-NEXT:    extr x0, x0, x1, #41
 274 ; CHECK-NEXT:    ret
 275   %f = call i64 @llvm.fshr.i64(i64 %x, i64 %y, i64 105)
 276   ret i64 %f
 277 }
 278
 279 ; This should work without any node-specific logic.
 280
 281 define i8 @fshr_i8_const_fold() {
 282 ; CHECK-LABEL: fshr_i8_const_fold:
 283 ; CHECK:       // %bb.0:
 284 ; CHECK-NEXT:    mov w0, #254
 285 ; CHECK-NEXT:    ret
 286   %f = call i8 @llvm.fshr.i8(i8 255, i8 0, i8 7)
 287   ret i8 %f
 288 }
 289
 290 define i32 @fshl_i32_shift_by_bitwidth(i32 %x, i32 %y) {
 291 ; CHECK-LABEL: fshl_i32_shift_by_bitwidth:
 292 ; CHECK:       // %bb.0:
 293 ; CHECK-NEXT:    ret
 294   %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 32)
 295   ret i32 %f
 296 }
 297
 298 define i32 @fshr_i32_shift_by_bitwidth(i32 %x, i32 %y) {
 299 ; CHECK-LABEL: fshr_i32_shift_by_bitwidth:
 300 ; CHECK:       // %bb.0:
 301 ; CHECK-NEXT:    mov w0, w1
 302 ; CHECK-NEXT:    ret
 303   %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 32)
 304   ret i32 %f
 305 }
 306
 307 define <4 x i32> @fshl_v4i32_shift_by_bitwidth(<4 x i32> %x, <4 x i32> %y) {
 308 ; CHECK-LABEL: fshl_v4i32_shift_by_bitwidth:
 309 ; CHECK:       // %bb.0:
 310 ; CHECK-NEXT:    ret
 311   %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 32, i32 32, i32 32, i32 32>)
 312   ret <4 x i32> %f
 313 }
 314
 315 define <4 x i32> @fshr_v4i32_shift_by_bitwidth(<4 x i32> %x, <4 x i32> %y) {
 316 ; CHECK-LABEL: fshr_v4i32_shift_by_bitwidth:
 317 ; CHECK:       // %bb.0:
 318 ; CHECK-NEXT:    mov v0.16b, v1.16b
 319 ; CHECK-NEXT:    ret
 320   %f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 32, i32 32, i32 32, i32 32>)
 321   ret <4 x i32> %f
 322 }
 323