llvm/test/Transforms/InstCombine/rotate.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
   2 ; RUN: opt < %s -passes=instcombine -S | FileCheck %s
   3 ; RUN: opt < %s -passes=instcombine -use-constant-int-for-fixed-length-splat -S | FileCheck %s
   4
   5 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
   6
   7 ; Canonicalize rotate by constant to funnel shift intrinsics.
   8 ; This should help cost modeling for vectorization, inlining, etc.
   9 ; If a target does not have a rotate instruction, the expansion will
  10 ; be exactly these same 3 basic ops (shl/lshr/or).
  11
  12 define i32 @rotl_i32_constant(i32 %x) {
  13 ; CHECK-LABEL: @rotl_i32_constant(
  14 ; CHECK-NEXT:    [[R:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[X]], i32 11)
  15 ; CHECK-NEXT:    ret i32 [[R]]
  16 ;
  17   %shl = shl i32 %x, 11
  18   %shr = lshr i32 %x, 21
  19   %r = or i32 %shr, %shl
  20   ret i32 %r
  21 }
  22
  23 define i42 @rotr_i42_constant(i42 %x) {
  24 ; CHECK-LABEL: @rotr_i42_constant(
  25 ; CHECK-NEXT:    [[R:%.*]] = call i42 @llvm.fshl.i42(i42 [[X:%.*]], i42 [[X]], i42 31)
  26 ; CHECK-NEXT:    ret i42 [[R]]
  27 ;
  28   %shl = shl i42 %x, 31
  29   %shr = lshr i42 %x, 11
  30   %r = or i42 %shr, %shl
  31   ret i42 %r
  32 }
  33
  34 define i8 @rotr_i8_constant_commute(i8 %x) {
  35 ; CHECK-LABEL: @rotr_i8_constant_commute(
  36 ; CHECK-NEXT:    [[R:%.*]] = call i8 @llvm.fshl.i8(i8 [[X:%.*]], i8 [[X]], i8 5)
  37 ; CHECK-NEXT:    ret i8 [[R]]
  38 ;
  39   %shl = shl i8 %x, 5
  40   %shr = lshr i8 %x, 3
  41   %r = or i8 %shl, %shr
  42   ret i8 %r
  43 }
  44
  45 define i88 @rotl_i88_constant_commute(i88 %x) {
  46 ; CHECK-LABEL: @rotl_i88_constant_commute(
  47 ; CHECK-NEXT:    [[R:%.*]] = call i88 @llvm.fshl.i88(i88 [[X:%.*]], i88 [[X]], i88 44)
  48 ; CHECK-NEXT:    ret i88 [[R]]
  49 ;
  50   %shl = shl i88 %x, 44
  51   %shr = lshr i88 %x, 44
  52   %r = or i88 %shl, %shr
  53   ret i88 %r
  54 }
  55
  56 ; Vector types are allowed.
  57
  58 define <2 x i16> @rotl_v2i16_constant_splat(<2 x i16> %x) {
  59 ; CHECK-LABEL: @rotl_v2i16_constant_splat(
  60 ; CHECK-NEXT:    [[R:%.*]] = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> [[X:%.*]], <2 x i16> [[X]], <2 x i16> splat (i16 1))
  61 ; CHECK-NEXT:    ret <2 x i16> [[R]]
  62 ;
  63   %shl = shl <2 x i16> %x, <i16 1, i16 1>
  64   %shr = lshr <2 x i16> %x, <i16 15, i16 15>
  65   %r = or <2 x i16> %shl, %shr
  66   ret <2 x i16> %r
  67 }
  68
  69 define <2 x i16> @rotl_v2i16_constant_splat_poison0(<2 x i16> %x) {
  70 ; CHECK-LABEL: @rotl_v2i16_constant_splat_poison0(
  71 ; CHECK-NEXT:    [[R:%.*]] = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> [[X:%.*]], <2 x i16> [[X]], <2 x i16> splat (i16 1))
  72 ; CHECK-NEXT:    ret <2 x i16> [[R]]
  73 ;
  74   %shl = shl <2 x i16> %x, <i16 poison, i16 1>
  75   %shr = lshr <2 x i16> %x, <i16 15, i16 15>
  76   %r = or <2 x i16> %shl, %shr
  77   ret <2 x i16> %r
  78 }
  79
  80 define <2 x i16> @rotl_v2i16_constant_splat_poison1(<2 x i16> %x) {
  81 ; CHECK-LABEL: @rotl_v2i16_constant_splat_poison1(
  82 ; CHECK-NEXT:    [[R:%.*]] = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> [[X:%.*]], <2 x i16> [[X]], <2 x i16> splat (i16 1))
  83 ; CHECK-NEXT:    ret <2 x i16> [[R]]
  84 ;
  85   %shl = shl <2 x i16> %x, <i16 1, i16 1>
  86   %shr = lshr <2 x i16> %x, <i16 15, i16 poison>
  87   %r = or <2 x i16> %shl, %shr
  88   ret <2 x i16> %r
  89 }
  90
  91 ; Non-power-of-2 vector types are allowed.
  92
  93 define <2 x i17> @rotr_v2i17_constant_splat(<2 x i17> %x) {
  94 ; CHECK-LABEL: @rotr_v2i17_constant_splat(
  95 ; CHECK-NEXT:    [[R:%.*]] = call <2 x i17> @llvm.fshl.v2i17(<2 x i17> [[X:%.*]], <2 x i17> [[X]], <2 x i17> splat (i17 12))
  96 ; CHECK-NEXT:    ret <2 x i17> [[R]]
  97 ;
  98   %shl = shl <2 x i17> %x, <i17 12, i17 12>
  99   %shr = lshr <2 x i17> %x, <i17 5, i17 5>
 100   %r = or <2 x i17> %shr, %shl
 101   ret <2 x i17> %r
 102 }
 103
 104 define <2 x i17> @rotr_v2i17_constant_splat_poison0(<2 x i17> %x) {
 105 ; CHECK-LABEL: @rotr_v2i17_constant_splat_poison0(
 106 ; CHECK-NEXT:    [[R:%.*]] = call <2 x i17> @llvm.fshl.v2i17(<2 x i17> [[X:%.*]], <2 x i17> [[X]], <2 x i17> splat (i17 12))
 107 ; CHECK-NEXT:    ret <2 x i17> [[R]]
 108 ;
 109   %shl = shl <2 x i17> %x, <i17 12, i17 poison>
 110   %shr = lshr <2 x i17> %x, <i17 poison, i17 5>
 111   %r = or <2 x i17> %shr, %shl
 112   ret <2 x i17> %r
 113 }
 114
 115 define <2 x i17> @rotr_v2i17_constant_splat_poison1(<2 x i17> %x) {
 116 ; CHECK-LABEL: @rotr_v2i17_constant_splat_poison1(
 117 ; CHECK-NEXT:    [[R:%.*]] = call <2 x i17> @llvm.fshl.v2i17(<2 x i17> [[X:%.*]], <2 x i17> [[X]], <2 x i17> splat (i17 12))
 118 ; CHECK-NEXT:    ret <2 x i17> [[R]]
 119 ;
 120   %shl = shl <2 x i17> %x, <i17 12, i17 poison>
 121   %shr = lshr <2 x i17> %x, <i17 5, i17 poison>
 122   %r = or <2 x i17> %shr, %shl
 123   ret <2 x i17> %r
 124 }
 125
 126 ; Allow arbitrary shift constants.
 127 ; Support poison elements.
 128
 129 define <2 x i32> @rotr_v2i32_constant_nonsplat(<2 x i32> %x) {
 130 ; CHECK-LABEL: @rotr_v2i32_constant_nonsplat(
 131 ; CHECK-NEXT:    [[R:%.*]] = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> [[X:%.*]], <2 x i32> [[X]], <2 x i32> <i32 17, i32 19>)
 132 ; CHECK-NEXT:    ret <2 x i32> [[R]]
 133 ;
 134   %shl = shl <2 x i32> %x, <i32 17, i32 19>
 135   %shr = lshr <2 x i32> %x, <i32 15, i32 13>
 136   %r = or <2 x i32> %shl, %shr
 137   ret <2 x i32> %r
 138 }
 139
 140 define <2 x i32> @rotr_v2i32_constant_nonsplat_poison0(<2 x i32> %x) {
 141 ; CHECK-LABEL: @rotr_v2i32_constant_nonsplat_poison0(
 142 ; CHECK-NEXT:    [[R:%.*]] = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> [[X:%.*]], <2 x i32> [[X]], <2 x i32> <i32 poison, i32 19>)
 143 ; CHECK-NEXT:    ret <2 x i32> [[R]]
 144 ;
 145   %shl = shl <2 x i32> %x, <i32 poison, i32 19>
 146   %shr = lshr <2 x i32> %x, <i32 15, i32 13>
 147   %r = or <2 x i32> %shl, %shr
 148   ret <2 x i32> %r
 149 }
 150
 151 define <2 x i32> @rotr_v2i32_constant_nonsplat_poison1(<2 x i32> %x) {
 152 ; CHECK-LABEL: @rotr_v2i32_constant_nonsplat_poison1(
 153 ; CHECK-NEXT:    [[R:%.*]] = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> [[X:%.*]], <2 x i32> [[X]], <2 x i32> <i32 17, i32 0>)
 154 ; CHECK-NEXT:    ret <2 x i32> [[R]]
 155 ;
 156   %shl = shl <2 x i32> %x, <i32 17, i32 19>
 157   %shr = lshr <2 x i32> %x, <i32 15, i32 poison>
 158   %r = or <2 x i32> %shl, %shr
 159   ret <2 x i32> %r
 160 }
 161
 162 define <2 x i36> @rotl_v2i36_constant_nonsplat(<2 x i36> %x) {
 163 ; CHECK-LABEL: @rotl_v2i36_constant_nonsplat(
 164 ; CHECK-NEXT:    [[R:%.*]] = call <2 x i36> @llvm.fshl.v2i36(<2 x i36> [[X:%.*]], <2 x i36> [[X]], <2 x i36> <i36 21, i36 11>)
 165 ; CHECK-NEXT:    ret <2 x i36> [[R]]
 166 ;
 167   %shl = shl <2 x i36> %x, <i36 21, i36 11>
 168   %shr = lshr <2 x i36> %x, <i36 15, i36 25>
 169   %r = or <2 x i36> %shl, %shr
 170   ret <2 x i36> %r
 171 }
 172
 173 define <3 x i36> @rotl_v3i36_constant_nonsplat_poison0(<3 x i36> %x) {
 174 ; CHECK-LABEL: @rotl_v3i36_constant_nonsplat_poison0(
 175 ; CHECK-NEXT:    [[R:%.*]] = call <3 x i36> @llvm.fshl.v3i36(<3 x i36> [[X:%.*]], <3 x i36> [[X]], <3 x i36> <i36 21, i36 11, i36 poison>)
 176 ; CHECK-NEXT:    ret <3 x i36> [[R]]
 177 ;
 178   %shl = shl <3 x i36> %x, <i36 21, i36 11, i36 poison>
 179   %shr = lshr <3 x i36> %x, <i36 15, i36 25, i36 poison>
 180   %r = or <3 x i36> %shl, %shr
 181   ret <3 x i36> %r
 182 }
 183
 184 ; The most basic rotate by variable - no guards for UB due to oversized shifts.
 185 ; This cannot be canonicalized to funnel shift target-independently. The safe
 186 ; expansion includes masking for the shift amount that is not included here,
 187 ; so it could be more expensive.
 188
 189 define i32 @rotl_i32(i32 %x, i32 %y) {
 190 ; CHECK-LABEL: @rotl_i32(
 191 ; CHECK-NEXT:    [[SUB:%.*]] = sub i32 32, [[Y:%.*]]
 192 ; CHECK-NEXT:    [[SHL:%.*]] = shl i32 [[X:%.*]], [[Y]]
 193 ; CHECK-NEXT:    [[SHR:%.*]] = lshr i32 [[X]], [[SUB]]
 194 ; CHECK-NEXT:    [[R:%.*]] = or i32 [[SHR]], [[SHL]]
 195 ; CHECK-NEXT:    ret i32 [[R]]
 196 ;
 197   %sub = sub i32 32, %y
 198   %shl = shl i32 %x, %y
 199   %shr = lshr i32 %x, %sub
 200   %r = or i32 %shr, %shl
 201   ret i32 %r
 202 }
 203
 204 ; Non-power-of-2 types should follow the same reasoning. Left/right is determined by subtract.
 205
 206 define i37 @rotr_i37(i37 %x, i37 %y) {
 207 ; CHECK-LABEL: @rotr_i37(
 208 ; CHECK-NEXT:    [[SUB:%.*]] = sub i37 37, [[Y:%.*]]
 209 ; CHECK-NEXT:    [[SHL:%.*]] = shl i37 [[X:%.*]], [[SUB]]
 210 ; CHECK-NEXT:    [[SHR:%.*]] = lshr i37 [[X]], [[Y]]
 211 ; CHECK-NEXT:    [[R:%.*]] = or i37 [[SHR]], [[SHL]]
 212 ; CHECK-NEXT:    ret i37 [[R]]
 213 ;
 214   %sub = sub i37 37, %y
 215   %shl = shl i37 %x, %sub
 216   %shr = lshr i37 %x, %y
 217   %r = or i37 %shr, %shl
 218   ret i37 %r
 219 }
 220
 221 ; Commute 'or' operands.
 222
 223 define i8 @rotr_i8_commute(i8 %x, i8 %y) {
 224 ; CHECK-LABEL: @rotr_i8_commute(
 225 ; CHECK-NEXT:    [[SUB:%.*]] = sub i8 8, [[Y:%.*]]
 226 ; CHECK-NEXT:    [[SHL:%.*]] = shl i8 [[X:%.*]], [[SUB]]
 227 ; CHECK-NEXT:    [[SHR:%.*]] = lshr i8 [[X]], [[Y]]
 228 ; CHECK-NEXT:    [[R:%.*]] = or i8 [[SHL]], [[SHR]]
 229 ; CHECK-NEXT:    ret i8 [[R]]
 230 ;
 231   %sub = sub i8 8, %y
 232   %shl = shl i8 %x, %sub
 233   %shr = lshr i8 %x, %y
 234   %r = or i8 %shl, %shr
 235   ret i8 %r
 236 }
 237
 238 ; Vector types should follow the same rules.
 239
 240 define <4 x i32> @rotl_v4i32(<4 x i32> %x, <4 x i32> %y) {
 241 ; CHECK-LABEL: @rotl_v4i32(
 242 ; CHECK-NEXT:    [[SUB:%.*]] = sub <4 x i32> splat (i32 32), [[Y:%.*]]
 243 ; CHECK-NEXT:    [[SHL:%.*]] = shl <4 x i32> [[X:%.*]], [[Y]]
 244 ; CHECK-NEXT:    [[SHR:%.*]] = lshr <4 x i32> [[X]], [[SUB]]
 245 ; CHECK-NEXT:    [[R:%.*]] = or <4 x i32> [[SHL]], [[SHR]]
 246 ; CHECK-NEXT:    ret <4 x i32> [[R]]
 247 ;
 248   %sub = sub <4 x i32> <i32 32, i32 32, i32 32, i32 32>, %y
 249   %shl = shl <4 x i32> %x, %y
 250   %shr = lshr <4 x i32> %x, %sub
 251   %r = or <4 x i32> %shl, %shr
 252   ret <4 x i32> %r
 253 }
 254
 255 ; Non-power-of-2 vector types should follow the same rules.
 256
 257 define <3 x i42> @rotr_v3i42(<3 x i42> %x, <3 x i42> %y) {
 258 ; CHECK-LABEL: @rotr_v3i42(
 259 ; CHECK-NEXT:    [[SUB:%.*]] = sub <3 x i42> splat (i42 42), [[Y:%.*]]
 260 ; CHECK-NEXT:    [[SHL:%.*]] = shl <3 x i42> [[X:%.*]], [[SUB]]
 261 ; CHECK-NEXT:    [[SHR:%.*]] = lshr <3 x i42> [[X]], [[Y]]
 262 ; CHECK-NEXT:    [[R:%.*]] = or <3 x i42> [[SHR]], [[SHL]]
 263 ; CHECK-NEXT:    ret <3 x i42> [[R]]
 264 ;
 265   %sub = sub <3 x i42> <i42 42, i42 42, i42 42>, %y
 266   %shl = shl <3 x i42> %x, %sub
 267   %shr = lshr <3 x i42> %x, %y
 268   %r = or <3 x i42> %shr, %shl
 269   ret <3 x i42> %r
 270 }
 271
 272 ; This is the canonical pattern for a UB-safe rotate-by-variable with power-of-2-size scalar type.
 273 ; The backend expansion of funnel shift for targets that don't have a rotate instruction should
 274 ; match the original IR, so it is always good to canonicalize to the intrinsics for this pattern.
 275
 276 define i32 @rotl_safe_i32(i32 %x, i32 %y) {
 277 ; CHECK-LABEL: @rotl_safe_i32(
 278 ; CHECK-NEXT:    [[R:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[X]], i32 [[Y:%.*]])
 279 ; CHECK-NEXT:    ret i32 [[R]]
 280 ;
 281   %negy = sub i32 0, %y
 282   %ymask = and i32 %y, 31
 283   %negymask = and i32 %negy, 31
 284   %shl = shl i32 %x, %ymask
 285   %shr = lshr i32 %x, %negymask
 286   %r = or i32 %shr, %shl
 287   ret i32 %r
 288 }
 289
 290 ; Extra uses don't change anything.
 291
 292 define i16 @rotl_safe_i16_commute_extra_use(i16 %x, i16 %y, ptr %p) {
 293 ; CHECK-LABEL: @rotl_safe_i16_commute_extra_use(
 294 ; CHECK-NEXT:    [[NEGY:%.*]] = sub i16 0, [[Y:%.*]]
 295 ; CHECK-NEXT:    [[NEGYMASK:%.*]] = and i16 [[NEGY]], 15
 296 ; CHECK-NEXT:    store i16 [[NEGYMASK]], ptr [[P:%.*]], align 2
 297 ; CHECK-NEXT:    [[R:%.*]] = call i16 @llvm.fshl.i16(i16 [[X:%.*]], i16 [[X]], i16 [[Y]])
 298 ; CHECK-NEXT:    ret i16 [[R]]
 299 ;
 300   %negy = sub i16 0, %y
 301   %ymask = and i16 %y, 15
 302   %negymask = and i16 %negy, 15
 303   store i16 %negymask, ptr %p
 304   %shl = shl i16 %x, %ymask
 305   %shr = lshr i16 %x, %negymask
 306   %r = or i16 %shl, %shr
 307   ret i16 %r
 308 }
 309
 310 ; Left/right is determined by the negation.
 311
 312 define i64 @rotr_safe_i64(i64 %x, i64 %y) {
 313 ; CHECK-LABEL: @rotr_safe_i64(
 314 ; CHECK-NEXT:    [[R:%.*]] = call i64 @llvm.fshr.i64(i64 [[X:%.*]], i64 [[X]], i64 [[Y:%.*]])
 315 ; CHECK-NEXT:    ret i64 [[R]]
 316 ;
 317   %negy = sub i64 0, %y
 318   %ymask = and i64 %y, 63
 319   %negymask = and i64 %negy, 63
 320   %shl = shl i64 %x, %negymask
 321   %shr = lshr i64 %x, %ymask
 322   %r = or i64 %shr, %shl
 323   ret i64 %r
 324 }
 325
 326 ; Extra uses don't change anything.
 327
 328 define i8 @rotr_safe_i8_commute_extra_use(i8 %x, i8 %y, ptr %p) {
 329 ; CHECK-LABEL: @rotr_safe_i8_commute_extra_use(
 330 ; CHECK-NEXT:    [[NEGY:%.*]] = sub i8 0, [[Y:%.*]]
 331 ; CHECK-NEXT:    [[YMASK:%.*]] = and i8 [[Y]], 7
 332 ; CHECK-NEXT:    [[NEGYMASK:%.*]] = and i8 [[NEGY]], 7
 333 ; CHECK-NEXT:    [[SHL:%.*]] = shl i8 [[X:%.*]], [[NEGYMASK]]
 334 ; CHECK-NEXT:    [[SHR:%.*]] = lshr i8 [[X]], [[YMASK]]
 335 ; CHECK-NEXT:    store i8 [[SHR]], ptr [[P:%.*]], align 1
 336 ; CHECK-NEXT:    [[R:%.*]] = or i8 [[SHL]], [[SHR]]
 337 ; CHECK-NEXT:    ret i8 [[R]]
 338 ;
 339   %negy = sub i8 0, %y
 340   %ymask = and i8 %y, 7
 341   %negymask = and i8 %negy, 7
 342   %shl = shl i8 %x, %negymask
 343   %shr = lshr i8 %x, %ymask
 344   store i8 %shr, ptr %p
 345   %r = or i8 %shl, %shr
 346   ret i8 %r
 347 }
 348
 349 ; Vectors follow the same rules.
 350
 351 define <2 x i32> @rotl_safe_v2i32(<2 x i32> %x, <2 x i32> %y) {
 352 ; CHECK-LABEL: @rotl_safe_v2i32(
 353 ; CHECK-NEXT:    [[R:%.*]] = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> [[X:%.*]], <2 x i32> [[X]], <2 x i32> [[Y:%.*]])
 354 ; CHECK-NEXT:    ret <2 x i32> [[R]]
 355 ;
 356   %negy = sub <2 x i32> zeroinitializer, %y
 357   %ymask = and <2 x i32> %y, <i32 31, i32 31>
 358   %negymask = and <2 x i32> %negy, <i32 31, i32 31>
 359   %shl = shl <2 x i32> %x, %ymask
 360   %shr = lshr <2 x i32> %x, %negymask
 361   %r = or <2 x i32> %shr, %shl
 362   ret <2 x i32> %r
 363 }
 364
 365 ; Vectors follow the same rules.
 366
 367 define <3 x i16> @rotr_safe_v3i16(<3 x i16> %x, <3 x i16> %y) {
 368 ; CHECK-LABEL: @rotr_safe_v3i16(
 369 ; CHECK-NEXT:    [[R:%.*]] = call <3 x i16> @llvm.fshr.v3i16(<3 x i16> [[X:%.*]], <3 x i16> [[X]], <3 x i16> [[Y:%.*]])
 370 ; CHECK-NEXT:    ret <3 x i16> [[R]]
 371 ;
 372   %negy = sub <3 x i16> zeroinitializer, %y
 373   %ymask = and <3 x i16> %y, <i16 15, i16 15, i16 15>
 374   %negymask = and <3 x i16> %negy, <i16 15, i16 15, i16 15>
 375   %shl = shl <3 x i16> %x, %negymask
 376   %shr = lshr <3 x i16> %x, %ymask
 377   %r = or <3 x i16> %shr, %shl
 378   ret <3 x i16> %r
 379 }
 380
 381 ; These are optionally UB-free rotate left/right patterns that are narrowed to a smaller bitwidth.
 382 ; See PR34046, PR16726, and PR39624 for motivating examples:
 383 ; https://bugs.llvm.org/show_bug.cgi?id=34046
 384 ; https://bugs.llvm.org/show_bug.cgi?id=16726
 385 ; https://bugs.llvm.org/show_bug.cgi?id=39624
 386
 387 define i16 @rotate_left_16bit(i16 %v, i32 %shift) {
 388 ; CHECK-LABEL: @rotate_left_16bit(
 389 ; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[SHIFT:%.*]] to i16
 390 ; CHECK-NEXT:    [[CONV2:%.*]] = call i16 @llvm.fshl.i16(i16 [[V:%.*]], i16 [[V]], i16 [[TMP1]])
 391 ; CHECK-NEXT:    ret i16 [[CONV2]]
 392 ;
 393   %and = and i32 %shift, 15
 394   %conv = zext i16 %v to i32
 395   %shl = shl i32 %conv, %and
 396   %sub = sub i32 16, %and
 397   %shr = lshr i32 %conv, %sub
 398   %or = or i32 %shr, %shl
 399   %conv2 = trunc i32 %or to i16
 400   ret i16 %conv2
 401 }
 402
 403 ; Commute the 'or' operands and try a vector type.
 404
 405 define <2 x i16> @rotate_left_commute_16bit_vec(<2 x i16> %v, <2 x i32> %shift) {
 406 ; CHECK-LABEL: @rotate_left_commute_16bit_vec(
 407 ; CHECK-NEXT:    [[TMP1:%.*]] = trunc <2 x i32> [[SHIFT:%.*]] to <2 x i16>
 408 ; CHECK-NEXT:    [[CONV2:%.*]] = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> [[V:%.*]], <2 x i16> [[V]], <2 x i16> [[TMP1]])
 409 ; CHECK-NEXT:    ret <2 x i16> [[CONV2]]
 410 ;
 411   %and = and <2 x i32> %shift, <i32 15, i32 15>
 412   %conv = zext <2 x i16> %v to <2 x i32>
 413   %shl = shl <2 x i32> %conv, %and
 414   %sub = sub <2 x i32> <i32 16, i32 16>, %and
 415   %shr = lshr <2 x i32> %conv, %sub
 416   %or = or <2 x i32> %shl, %shr
 417   %conv2 = trunc <2 x i32> %or to <2 x i16>
 418   ret <2 x i16> %conv2
 419 }
 420
 421 ; Change the size, rotation direction (the subtract is on the left-shift), and mask op.
 422
 423 define i8 @rotate_right_8bit(i8 %v, i3 %shift) {
 424 ; CHECK-LABEL: @rotate_right_8bit(
 425 ; CHECK-NEXT:    [[TMP1:%.*]] = zext i3 [[SHIFT:%.*]] to i8
 426 ; CHECK-NEXT:    [[CONV2:%.*]] = call i8 @llvm.fshr.i8(i8 [[V:%.*]], i8 [[V]], i8 [[TMP1]])
 427 ; CHECK-NEXT:    ret i8 [[CONV2]]
 428 ;
 429   %and = zext i3 %shift to i32
 430   %conv = zext i8 %v to i32
 431   %shr = lshr i32 %conv, %and
 432   %sub = sub i32 8, %and
 433   %shl = shl i32 %conv, %sub
 434   %or = or i32 %shl, %shr
 435   %conv2 = trunc i32 %or to i8
 436   ret i8 %conv2
 437 }
 438
 439 ; The right-shifted value does not need to be a zexted value; here it is masked.
 440 ; The shift mask could be less than the bitwidth, but this is still ok.
 441
 442 define i8 @rotate_right_commute_8bit_unmasked_shl(i32 %v, i32 %shift) {
 443 ; CHECK-LABEL: @rotate_right_commute_8bit_unmasked_shl(
 444 ; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[SHIFT:%.*]] to i8
 445 ; CHECK-NEXT:    [[TMP2:%.*]] = and i8 [[TMP1]], 3
 446 ; CHECK-NEXT:    [[TMP3:%.*]] = trunc i32 [[V:%.*]] to i8
 447 ; CHECK-NEXT:    [[TMP4:%.*]] = trunc i32 [[V]] to i8
 448 ; CHECK-NEXT:    [[CONV2:%.*]] = call i8 @llvm.fshr.i8(i8 [[TMP3]], i8 [[TMP4]], i8 [[TMP2]])
 449 ; CHECK-NEXT:    ret i8 [[CONV2]]
 450 ;
 451   %and = and i32 %shift, 3
 452   %conv = and i32 %v, 255
 453   %shr = lshr i32 %conv, %and
 454   %sub = sub i32 8, %and
 455   %shl = shl i32 %conv, %sub
 456   %or = or i32 %shr, %shl
 457   %conv2 = trunc i32 %or to i8
 458   ret i8 %conv2
 459 }
 460
 461 ; The left-shifted value does not need to be masked at all.
 462
 463 define i8 @rotate_right_commute_8bit(i32 %v, i32 %shift) {
 464 ; CHECK-LABEL: @rotate_right_commute_8bit(
 465 ; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[SHIFT:%.*]] to i8
 466 ; CHECK-NEXT:    [[TMP2:%.*]] = and i8 [[TMP1]], 3
 467 ; CHECK-NEXT:    [[TMP3:%.*]] = trunc i32 [[V:%.*]] to i8
 468 ; CHECK-NEXT:    [[TMP4:%.*]] = trunc i32 [[V]] to i8
 469 ; CHECK-NEXT:    [[CONV2:%.*]] = call i8 @llvm.fshr.i8(i8 [[TMP3]], i8 [[TMP4]], i8 [[TMP2]])
 470 ; CHECK-NEXT:    ret i8 [[CONV2]]
 471 ;
 472   %and = and i32 %shift, 3
 473   %conv = and i32 %v, 255
 474   %shr = lshr i32 %conv, %and
 475   %sub = sub i32 8, %and
 476   %shl = shl i32 %v, %sub
 477   %or = or i32 %shr, %shl
 478   %conv2 = trunc i32 %or to i8
 479   ret i8 %conv2
 480 }
 481
 482 ; If the original source does not mask the shift amount,
 483 ; we still do the transform by adding masks to make it safe.
 484
 485 define i8 @rotate8_not_safe(i8 %v, i32 %shamt) {
 486 ; CHECK-LABEL: @rotate8_not_safe(
 487 ; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[SHAMT:%.*]] to i8
 488 ; CHECK-NEXT:    [[RET:%.*]] = call i8 @llvm.fshl.i8(i8 [[V:%.*]], i8 [[V]], i8 [[TMP1]])
 489 ; CHECK-NEXT:    ret i8 [[RET]]
 490 ;
 491   %conv = zext i8 %v to i32
 492   %sub = sub i32 8, %shamt
 493   %shr = lshr i32 %conv, %sub
 494   %shl = shl i32 %conv, %shamt
 495   %or = or i32 %shr, %shl
 496   %ret = trunc i32 %or to i8
 497   ret i8 %ret
 498 }
 499
 500 ; A non-power-of-2 destination type can't be masked as above.
 501
 502 define i9 @rotate9_not_safe(i9 %v, i32 %shamt) {
 503 ; CHECK-LABEL: @rotate9_not_safe(
 504 ; CHECK-NEXT:    [[CONV:%.*]] = zext i9 [[V:%.*]] to i32
 505 ; CHECK-NEXT:    [[SUB:%.*]] = sub i32 9, [[SHAMT:%.*]]
 506 ; CHECK-NEXT:    [[SHR:%.*]] = lshr i32 [[CONV]], [[SUB]]
 507 ; CHECK-NEXT:    [[SHL:%.*]] = shl i32 [[CONV]], [[SHAMT]]
 508 ; CHECK-NEXT:    [[OR:%.*]] = or i32 [[SHR]], [[SHL]]
 509 ; CHECK-NEXT:    [[RET:%.*]] = trunc i32 [[OR]] to i9
 510 ; CHECK-NEXT:    ret i9 [[RET]]
 511 ;
 512   %conv = zext i9 %v to i32
 513   %sub = sub i32 9, %shamt
 514   %shr = lshr i32 %conv, %sub
 515   %shl = shl i32 %conv, %shamt
 516   %or = or i32 %shr, %shl
 517   %ret = trunc i32 %or to i9
 518   ret i9 %ret
 519 }
 520
 521 ; We should narrow (v << (s & 15)) | (v >> (-s & 15))
 522 ; when both v and s have been promoted.
 523
 524 define i16 @rotateleft_16_neg_mask(i16 %v, i16 %shamt) {
 525 ; CHECK-LABEL: @rotateleft_16_neg_mask(
 526 ; CHECK-NEXT:    [[OR:%.*]] = call i16 @llvm.fshl.i16(i16 [[V:%.*]], i16 [[V]], i16 [[SHAMT:%.*]])
 527 ; CHECK-NEXT:    ret i16 [[OR]]
 528 ;
 529   %neg = sub i16 0, %shamt
 530   %lshamt = and i16 %shamt, 15
 531   %lshamtconv = zext i16 %lshamt to i32
 532   %rshamt = and i16 %neg, 15
 533   %rshamtconv = zext i16 %rshamt to i32
 534   %conv = zext i16 %v to i32
 535   %shl = shl i32 %conv, %lshamtconv
 536   %shr = lshr i32 %conv, %rshamtconv
 537   %or = or i32 %shr, %shl
 538   %ret = trunc i32 %or to i16
 539   ret i16 %ret
 540 }
 541
 542 define i16 @rotateleft_16_neg_mask_commute(i16 %v, i16 %shamt) {
 543 ; CHECK-LABEL: @rotateleft_16_neg_mask_commute(
 544 ; CHECK-NEXT:    [[OR:%.*]] = call i16 @llvm.fshl.i16(i16 [[V:%.*]], i16 [[V]], i16 [[SHAMT:%.*]])
 545 ; CHECK-NEXT:    ret i16 [[OR]]
 546 ;
 547   %neg = sub i16 0, %shamt
 548   %lshamt = and i16 %shamt, 15
 549   %lshamtconv = zext i16 %lshamt to i32
 550   %rshamt = and i16 %neg, 15
 551   %rshamtconv = zext i16 %rshamt to i32
 552   %conv = zext i16 %v to i32
 553   %shl = shl i32 %conv, %lshamtconv
 554   %shr = lshr i32 %conv, %rshamtconv
 555   %or = or i32 %shl, %shr
 556   %ret = trunc i32 %or to i16
 557   ret i16 %ret
 558 }
 559
 560 define i8 @rotateright_8_neg_mask(i8 %v, i8 %shamt) {
 561 ; CHECK-LABEL: @rotateright_8_neg_mask(
 562 ; CHECK-NEXT:    [[OR:%.*]] = call i8 @llvm.fshr.i8(i8 [[V:%.*]], i8 [[V]], i8 [[SHAMT:%.*]])
 563 ; CHECK-NEXT:    ret i8 [[OR]]
 564 ;
 565   %neg = sub i8 0, %shamt
 566   %rshamt = and i8 %shamt, 7
 567   %rshamtconv = zext i8 %rshamt to i32
 568   %lshamt = and i8 %neg, 7
 569   %lshamtconv = zext i8 %lshamt to i32
 570   %conv = zext i8 %v to i32
 571   %shl = shl i32 %conv, %lshamtconv
 572   %shr = lshr i32 %conv, %rshamtconv
 573   %or = or i32 %shr, %shl
 574   %ret = trunc i32 %or to i8
 575   ret i8 %ret
 576 }
 577
 578 define i8 @rotateright_8_neg_mask_commute(i8 %v, i8 %shamt) {
 579 ; CHECK-LABEL: @rotateright_8_neg_mask_commute(
 580 ; CHECK-NEXT:    [[OR:%.*]] = call i8 @llvm.fshr.i8(i8 [[V:%.*]], i8 [[V]], i8 [[SHAMT:%.*]])
 581 ; CHECK-NEXT:    ret i8 [[OR]]
 582 ;
 583   %neg = sub i8 0, %shamt
 584   %rshamt = and i8 %shamt, 7
 585   %rshamtconv = zext i8 %rshamt to i32
 586   %lshamt = and i8 %neg, 7
 587   %lshamtconv = zext i8 %lshamt to i32
 588   %conv = zext i8 %v to i32
 589   %shl = shl i32 %conv, %lshamtconv
 590   %shr = lshr i32 %conv, %rshamtconv
 591   %or = or i32 %shl, %shr
 592   %ret = trunc i32 %or to i8
 593   ret i8 %ret
 594 }
 595
 596 ; The shift amount may already be in the wide type,
 597 ; so we need to truncate it going into the rotate pattern.
 598
 599 define i16 @rotateright_16_neg_mask_wide_amount(i16 %v, i32 %shamt) {
 600 ; CHECK-LABEL: @rotateright_16_neg_mask_wide_amount(
 601 ; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[SHAMT:%.*]] to i16
 602 ; CHECK-NEXT:    [[RET:%.*]] = call i16 @llvm.fshr.i16(i16 [[V:%.*]], i16 [[V]], i16 [[TMP1]])
 603 ; CHECK-NEXT:    ret i16 [[RET]]
 604 ;
 605   %neg = sub i32 0, %shamt
 606   %rshamt = and i32 %shamt, 15
 607   %lshamt = and i32 %neg, 15
 608   %conv = zext i16 %v to i32
 609   %shl = shl i32 %conv, %lshamt
 610   %shr = lshr i32 %conv, %rshamt
 611   %or = or i32 %shr, %shl
 612   %ret = trunc i32 %or to i16
 613   ret i16 %ret
 614 }
 615
 616 define i16 @rotateright_16_neg_mask_wide_amount_commute(i16 %v, i32 %shamt) {
 617 ; CHECK-LABEL: @rotateright_16_neg_mask_wide_amount_commute(
 618 ; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[SHAMT:%.*]] to i16
 619 ; CHECK-NEXT:    [[RET:%.*]] = call i16 @llvm.fshr.i16(i16 [[V:%.*]], i16 [[V]], i16 [[TMP1]])
 620 ; CHECK-NEXT:    ret i16 [[RET]]
 621 ;
 622   %neg = sub i32 0, %shamt
 623   %rshamt = and i32 %shamt, 15
 624   %lshamt = and i32 %neg, 15
 625   %conv = zext i16 %v to i32
 626   %shl = shl i32 %conv, %lshamt
 627   %shr = lshr i32 %conv, %rshamt
 628   %or = or i32 %shl, %shr
 629   %ret = trunc i32 %or to i16
 630   ret i16 %ret
 631 }
 632
 633 define i64 @rotateright_64_zext_neg_mask_amount(i64 %0, i32 %1) {
 634 ; CHECK-LABEL: @rotateright_64_zext_neg_mask_amount(
 635 ; CHECK-NEXT:    [[TMP3:%.*]] = zext i32 [[TMP1:%.*]] to i64
 636 ; CHECK-NEXT:    [[TMP4:%.*]] = call i64 @llvm.fshr.i64(i64 [[TMP0:%.*]], i64 [[TMP0]], i64 [[TMP3]])
 637 ; CHECK-NEXT:    ret i64 [[TMP4]]
 638 ;
 639   %3 = and i32 %1, 63
 640   %4 = zext i32 %3 to i64
 641   %5 = lshr i64 %0, %4
 642   %6 = sub nsw i32 0, %1
 643   %7 = and i32 %6, 63
 644   %8 = zext i32 %7 to i64
 645   %9 = shl i64 %0, %8
 646   %10 = or i64 %5, %9
 647   ret i64 %10
 648 }
 649
 650 define i8 @rotateleft_8_neg_mask_wide_amount(i8 %v, i32 %shamt) {
 651 ; CHECK-LABEL: @rotateleft_8_neg_mask_wide_amount(
 652 ; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[SHAMT:%.*]] to i8
 653 ; CHECK-NEXT:    [[RET:%.*]] = call i8 @llvm.fshl.i8(i8 [[V:%.*]], i8 [[V]], i8 [[TMP1]])
 654 ; CHECK-NEXT:    ret i8 [[RET]]
 655 ;
 656   %neg = sub i32 0, %shamt
 657   %lshamt = and i32 %shamt, 7
 658   %rshamt = and i32 %neg, 7
 659   %conv = zext i8 %v to i32
 660   %shl = shl i32 %conv, %lshamt
 661   %shr = lshr i32 %conv, %rshamt
 662   %or = or i32 %shr, %shl
 663   %ret = trunc i32 %or to i8
 664   ret i8 %ret
 665 }
 666
 667 define i8 @rotateleft_8_neg_mask_wide_amount_commute(i8 %v, i32 %shamt) {
 668 ; CHECK-LABEL: @rotateleft_8_neg_mask_wide_amount_commute(
 669 ; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[SHAMT:%.*]] to i8
 670 ; CHECK-NEXT:    [[RET:%.*]] = call i8 @llvm.fshl.i8(i8 [[V:%.*]], i8 [[V]], i8 [[TMP1]])
 671 ; CHECK-NEXT:    ret i8 [[RET]]
 672 ;
 673   %neg = sub i32 0, %shamt
 674   %lshamt = and i32 %shamt, 7
 675   %rshamt = and i32 %neg, 7
 676   %conv = zext i8 %v to i32
 677   %shl = shl i32 %conv, %lshamt
 678   %shr = lshr i32 %conv, %rshamt
 679   %or = or i32 %shl, %shr
 680   %ret = trunc i32 %or to i8
 681   ret i8 %ret
 682 }
 683
 684 define i64 @rotateleft_64_zext_neg_mask_amount(i64 %0, i32 %1) {
 685 ; CHECK-LABEL: @rotateleft_64_zext_neg_mask_amount(
 686 ; CHECK-NEXT:    [[TMP3:%.*]] = zext i32 [[TMP1:%.*]] to i64
 687 ; CHECK-NEXT:    [[TMP4:%.*]] = call i64 @llvm.fshl.i64(i64 [[TMP0:%.*]], i64 [[TMP0]], i64 [[TMP3]])
 688 ; CHECK-NEXT:    ret i64 [[TMP4]]
 689 ;
 690   %3 = and i32 %1, 63
 691   %4 = zext i32 %3 to i64
 692   %5 = shl i64 %0, %4
 693   %6 = sub nsw i32 0, %1
 694   %7 = and i32 %6, 63
 695   %8 = zext i32 %7 to i64
 696   %9 = lshr i64 %0, %8
 697   %10 = or i64 %5, %9
 698   ret i64 %10
 699 }
 700
 701 ; Non-power-of-2 types. This could be transformed, but it's not a typical rotate pattern.
 702
 703 define i9 @rotateleft_9_neg_mask_wide_amount_commute(i9 %v, i33 %shamt) {
 704 ; CHECK-LABEL: @rotateleft_9_neg_mask_wide_amount_commute(
 705 ; CHECK-NEXT:    [[NEG:%.*]] = sub i33 0, [[SHAMT:%.*]]
 706 ; CHECK-NEXT:    [[LSHAMT:%.*]] = and i33 [[SHAMT]], 8
 707 ; CHECK-NEXT:    [[RSHAMT:%.*]] = and i33 [[NEG]], 8
 708 ; CHECK-NEXT:    [[CONV:%.*]] = zext i9 [[V:%.*]] to i33
 709 ; CHECK-NEXT:    [[SHL:%.*]] = shl nuw nsw i33 [[CONV]], [[LSHAMT]]
 710 ; CHECK-NEXT:    [[SHR:%.*]] = lshr i33 [[CONV]], [[RSHAMT]]
 711 ; CHECK-NEXT:    [[OR:%.*]] = or i33 [[SHL]], [[SHR]]
 712 ; CHECK-NEXT:    [[RET:%.*]] = trunc i33 [[OR]] to i9
 713 ; CHECK-NEXT:    ret i9 [[RET]]
 714 ;
 715   %neg = sub i33 0, %shamt
 716   %lshamt = and i33 %shamt, 8
 717   %rshamt = and i33 %neg, 8
 718   %conv = zext i9 %v to i33
 719   %shl = shl i33 %conv, %lshamt
 720   %shr = lshr i33 %conv, %rshamt
 721   %or = or i33 %shl, %shr
 722   %ret = trunc i33 %or to i9
 723   ret i9 %ret
 724 }
 725
 726 ; Fold or(shl(v,x),lshr(v,bw-x)) iff x < bw
 727
 728 define i64 @rotl_sub_mask(i64 %0, i64 %1) {
 729 ; CHECK-LABEL: @rotl_sub_mask(
 730 ; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.fshl.i64(i64 [[TMP0:%.*]], i64 [[TMP0]], i64 [[TMP1:%.*]])
 731 ; CHECK-NEXT:    ret i64 [[TMP3]]
 732 ;
 733   %3 = and i64 %1, 63
 734   %4 = shl i64 %0, %3
 735   %5 = sub nuw nsw i64 64, %3
 736   %6 = lshr i64 %0, %5
 737   %7 = or i64 %6, %4
 738   ret i64 %7
 739 }
 740
 741 ; Fold or(lshr(v,x),shl(v,bw-x)) iff x < bw
 742
 743 define i64 @rotr_sub_mask(i64 %0, i64 %1) {
 744 ; CHECK-LABEL: @rotr_sub_mask(
 745 ; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.fshr.i64(i64 [[TMP0:%.*]], i64 [[TMP0]], i64 [[TMP1:%.*]])
 746 ; CHECK-NEXT:    ret i64 [[TMP3]]
 747 ;
 748   %3 = and i64 %1, 63
 749   %4 = lshr i64 %0, %3
 750   %5 = sub nuw nsw i64 64, %3
 751   %6 = shl i64 %0, %5
 752   %7 = or i64 %6, %4
 753   ret i64 %7
 754 }
 755
 756 define <2 x i64> @rotr_sub_mask_vector(<2 x i64> %0, <2 x i64> %1) {
 757 ; CHECK-LABEL: @rotr_sub_mask_vector(
 758 ; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> [[TMP0:%.*]], <2 x i64> [[TMP0]], <2 x i64> [[TMP1:%.*]])
 759 ; CHECK-NEXT:    ret <2 x i64> [[TMP3]]
 760 ;
 761   %3 = and <2 x i64> %1, <i64 63, i64 63>
 762   %4 = lshr <2 x i64> %0, %3
 763   %5 = sub nuw nsw <2 x i64> <i64 64, i64 64>, %3
 764   %6 = shl <2 x i64> %0, %5
 765   %7 = or <2 x i64> %6, %4
 766   ret <2 x i64> %7
 767 }
 768
 769 ; Convert select pattern to masked shift that ends in 'or'.
 770
 771 define i32 @rotr_select(i32 %x, i32 %shamt) {
 772 ; CHECK-LABEL: @rotr_select(
 773 ; CHECK-NEXT:    [[R:%.*]] = call i32 @llvm.fshr.i32(i32 [[X:%.*]], i32 [[X]], i32 [[SHAMT:%.*]])
 774 ; CHECK-NEXT:    ret i32 [[R]]
 775 ;
 776   %cmp = icmp eq i32 %shamt, 0
 777   %sub = sub i32 32, %shamt
 778   %shr = lshr i32 %x, %shamt
 779   %shl = shl i32 %x, %sub
 780   %or = or i32 %shr, %shl
 781   %r = select i1 %cmp, i32 %x, i32 %or
 782   ret i32 %r
 783 }
 784
 785 ; Convert select pattern to masked shift that ends in 'or'.
 786
 787 define i8 @rotr_select_commute(i8 %x, i8 %shamt) {
 788 ; CHECK-LABEL: @rotr_select_commute(
 789 ; CHECK-NEXT:    [[R:%.*]] = call i8 @llvm.fshr.i8(i8 [[X:%.*]], i8 [[X]], i8 [[SHAMT:%.*]])
 790 ; CHECK-NEXT:    ret i8 [[R]]
 791 ;
 792   %cmp = icmp eq i8 %shamt, 0
 793   %sub = sub i8 8, %shamt
 794   %shr = lshr i8 %x, %shamt
 795   %shl = shl i8 %x, %sub
 796   %or = or i8 %shl, %shr
 797   %r = select i1 %cmp, i8 %x, i8 %or
 798   ret i8 %r
 799 }
 800
 801 ; Convert select pattern to masked shift that ends in 'or'.
 802
 803 define i16 @rotl_select(i16 %x, i16 %shamt) {
 804 ; CHECK-LABEL: @rotl_select(
 805 ; CHECK-NEXT:    [[R:%.*]] = call i16 @llvm.fshl.i16(i16 [[X:%.*]], i16 [[X]], i16 [[SHAMT:%.*]])
 806 ; CHECK-NEXT:    ret i16 [[R]]
 807 ;
 808   %cmp = icmp eq i16 %shamt, 0
 809   %sub = sub i16 16, %shamt
 810   %shr = lshr i16 %x, %sub
 811   %shl = shl i16 %x, %shamt
 812   %or = or i16 %shr, %shl
 813   %r = select i1 %cmp, i16 %x, i16 %or
 814   ret i16 %r
 815 }
 816
 817 ; Convert select pattern to masked shift that ends in 'or'.
 818
 819 define <2 x i64> @rotl_select_commute(<2 x i64> %x, <2 x i64> %shamt) {
 820 ; CHECK-LABEL: @rotl_select_commute(
 821 ; CHECK-NEXT:    [[R:%.*]] = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> [[X:%.*]], <2 x i64> [[X]], <2 x i64> [[SHAMT:%.*]])
 822 ; CHECK-NEXT:    ret <2 x i64> [[R]]
 823 ;
 824   %cmp = icmp eq <2 x i64> %shamt, zeroinitializer
 825   %sub = sub <2 x i64> <i64 64, i64 64>, %shamt
 826   %shr = lshr <2 x i64> %x, %sub
 827   %shl = shl <2 x i64> %x, %shamt
 828   %or = or <2 x i64> %shl, %shr
 829   %r = select <2 x i1> %cmp, <2 x i64> %x, <2 x i64> %or
 830   ret <2 x i64> %r
 831 }
 832
 833 ; Negative test - the transform is only valid with power-of-2 types.
 834
 835 define i24 @rotl_select_weird_type(i24 %x, i24 %shamt) {
 836 ; CHECK-LABEL: @rotl_select_weird_type(
 837 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i24 [[SHAMT:%.*]], 0
 838 ; CHECK-NEXT:    [[SUB:%.*]] = sub i24 24, [[SHAMT]]
 839 ; CHECK-NEXT:    [[SHR:%.*]] = lshr i24 [[X:%.*]], [[SUB]]
 840 ; CHECK-NEXT:    [[SHL:%.*]] = shl i24 [[X]], [[SHAMT]]
 841 ; CHECK-NEXT:    [[OR:%.*]] = or i24 [[SHL]], [[SHR]]
 842 ; CHECK-NEXT:    [[R:%.*]] = select i1 [[CMP]], i24 [[X]], i24 [[OR]]
 843 ; CHECK-NEXT:    ret i24 [[R]]
 844 ;
 845   %cmp = icmp eq i24 %shamt, 0
 846   %sub = sub i24 24, %shamt
 847   %shr = lshr i24 %x, %sub
 848   %shl = shl i24 %x, %shamt
 849   %or = or i24 %shl, %shr
 850   %r = select i1 %cmp, i24 %x, i24 %or
 851   ret i24 %r
 852 }
 853
 854 define i32 @rotl_select_zext_shamt(i32 %x, i8 %y) {
 855 ; CHECK-LABEL: @rotl_select_zext_shamt(
 856 ; CHECK-NEXT:    [[TMP1:%.*]] = zext i8 [[Y:%.*]] to i32
 857 ; CHECK-NEXT:    [[R:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[X]], i32 [[TMP1]])
 858 ; CHECK-NEXT:    ret i32 [[R]]
 859 ;
 860   %rem = and i8 %y, 31
 861   %cmp = icmp eq i8 %rem, 0
 862   %sh_prom = zext i8 %rem to i32
 863   %sub = sub nuw nsw i8 32, %rem
 864   %sh_prom1 = zext i8 %sub to i32
 865   %shr = lshr i32 %x, %sh_prom1
 866   %shl = shl i32 %x, %sh_prom
 867   %or = or i32 %shl, %shr
 868   %r = select i1 %cmp, i32 %x, i32 %or
 869   ret i32 %r
 870 }
 871
 872 define i64 @rotr_select_zext_shamt(i64 %x, i32 %y) {
 873 ; CHECK-LABEL: @rotr_select_zext_shamt(
 874 ; CHECK-NEXT:    [[TMP1:%.*]] = zext i32 [[Y:%.*]] to i64
 875 ; CHECK-NEXT:    [[R:%.*]] = call i64 @llvm.fshr.i64(i64 [[X:%.*]], i64 [[X]], i64 [[TMP1]])
 876 ; CHECK-NEXT:    ret i64 [[R]]
 877 ;
 878   %rem = and i32 %y, 63
 879   %cmp = icmp eq i32 %rem, 0
 880   %sh_prom = zext i32 %rem to i64
 881   %shr = lshr i64 %x, %sh_prom
 882   %sub = sub nuw nsw i32 64, %rem
 883   %sh_prom1 = zext i32 %sub to i64
 884   %shl = shl i64 %x, %sh_prom1
 885   %or = or i64 %shl, %shr
 886   %r = select i1 %cmp, i64 %x, i64 %or
 887   ret i64 %r
 888 }
 889
 890 ; Test that the transform doesn't crash when there's an "or" with a ConstantExpr operand.
 891
 892 @external_global = external global i8
 893
 894 define i32 @rotl_constant_expr(i32 %shamt) {
 895 ; CHECK-LABEL: @rotl_constant_expr(
 896 ; CHECK-NEXT:    [[SHR:%.*]] = lshr i32 ptrtoint (ptr @external_global to i32), [[SHAMT:%.*]]
 897 ; CHECK-NEXT:    [[SHL:%.*]] = shl i32 ptrtoint (ptr @external_global to i32), 11
 898 ; CHECK-NEXT:    [[R:%.*]] = or i32 [[SHR]], [[SHL]]
 899 ; CHECK-NEXT:    ret i32 [[R]]
 900 ;
 901   %shr = lshr i32 ptrtoint (ptr @external_global to i32), %shamt
 902   %shl = shl i32 ptrtoint (ptr @external_global to i32), 11
 903   %r = or i32 %shr, %shl
 904   ret i32 %r
 905 }
 906
 907 ; PR20750 - https://bugs.llvm.org/show_bug.cgi?id=20750
 908 ; This IR corresponds to C source where the shift amount is a smaller type than the rotated value:
 909 ; unsigned int rotate32_doubleand1(unsigned int v, unsigned char r) { r = r & 31; return (v << r) | (v >> (((32 - r)) & 31)); }
 910
 911 define i32 @rotateleft32_doubleand1(i32 %v, i8 %r) {
 912 ; CHECK-LABEL: @rotateleft32_doubleand1(
 913 ; CHECK-NEXT:    [[Z:%.*]] = zext i8 [[R:%.*]] to i32
 914 ; CHECK-NEXT:    [[OR:%.*]] = call i32 @llvm.fshl.i32(i32 [[V:%.*]], i32 [[V]], i32 [[Z]])
 915 ; CHECK-NEXT:    ret i32 [[OR]]
 916 ;
 917   %m = and i8 %r, 31
 918   %z = zext i8 %m to i32
 919   %neg = sub nsw i32 0, %z
 920   %and2 = and i32 %neg, 31
 921   %shl = shl i32 %v, %z
 922   %shr = lshr i32 %v, %and2
 923   %or = or i32 %shr, %shl
 924   ret i32 %or
 925 }
 926
 927 define i32 @rotateright32_doubleand1(i32 %v, i16 %r) {
 928 ; CHECK-LABEL: @rotateright32_doubleand1(
 929 ; CHECK-NEXT:    [[Z:%.*]] = zext i16 [[R:%.*]] to i32
 930 ; CHECK-NEXT:    [[OR:%.*]] = call i32 @llvm.fshr.i32(i32 [[V:%.*]], i32 [[V]], i32 [[Z]])
 931 ; CHECK-NEXT:    ret i32 [[OR]]
 932 ;
 933   %m = and i16 %r, 31
 934   %z = zext i16 %m to i32
 935   %neg = sub nsw i32 0, %z
 936   %and2 = and i32 %neg, 31
 937   %shl = shl i32 %v, %and2
 938   %shr = lshr i32 %v, %z
 939   %or = or i32 %shr, %shl
 940   ret i32 %or
 941 }
 942
 943 ; TODO: This should be a rotate (funnel-shift).
 944
 945 define i8 @unmasked_shlop_unmasked_shift_amount(i32 %x, i32 %shamt) {
 946 ; CHECK-LABEL: @unmasked_shlop_unmasked_shift_amount(
 947 ; CHECK-NEXT:    [[MASKX:%.*]] = and i32 [[X:%.*]], 255
 948 ; CHECK-NEXT:    [[T4:%.*]] = sub i32 8, [[SHAMT:%.*]]
 949 ; CHECK-NEXT:    [[T5:%.*]] = shl i32 [[X]], [[T4]]
 950 ; CHECK-NEXT:    [[T6:%.*]] = lshr i32 [[MASKX]], [[SHAMT]]
 951 ; CHECK-NEXT:    [[T7:%.*]] = or i32 [[T5]], [[T6]]
 952 ; CHECK-NEXT:    [[T8:%.*]] = trunc i32 [[T7]] to i8
 953 ; CHECK-NEXT:    ret i8 [[T8]]
 954 ;
 955   %maskx = and i32 %x, 255
 956   %t4 = sub i32 8, %shamt
 957   %t5 = shl i32 %x, %t4
 958   %t6 = lshr i32 %maskx, %shamt
 959   %t7 = or i32 %t5, %t6
 960   %t8 = trunc i32 %t7 to i8
 961   ret i8 %t8
 962 }
 963
 964 define i16 @check_rotate_masked_16bit(i8 %shamt, i32 %cond) {
 965 ; CHECK-LABEL: @check_rotate_masked_16bit(
 966 ; CHECK-NEXT:    [[TMP1:%.*]] = zext i8 [[SHAMT:%.*]] to i16
 967 ; CHECK-NEXT:    [[TMP2:%.*]] = trunc i32 [[COND:%.*]] to i16
 968 ; CHECK-NEXT:    [[TMP3:%.*]] = and i16 [[TMP2]], 1
 969 ; CHECK-NEXT:    [[TRUNC:%.*]] = call i16 @llvm.fshr.i16(i16 [[TMP3]], i16 [[TMP3]], i16 [[TMP1]])
 970 ; CHECK-NEXT:    ret i16 [[TRUNC]]
 971 ;
 972   %maskx = and i32 %cond, 1
 973   %masky = and i8 %shamt, 15
 974   %z = zext i8 %masky to i32
 975   %shr = lshr i32 %maskx, %z
 976   %sub = sub i8 0, %shamt
 977   %maskw = and i8 %sub, 15
 978   %z2 = zext i8 %maskw to i32
 979   %shl = shl nuw nsw i32 %maskx, %z2
 980   %or = or i32 %shr, %shl
 981   %trunc = trunc i32 %or to i16
 982   ret i16 %trunc
 983 }