llvm/test/Transforms/InstCombine/rotate.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
   2 ; RUN: opt < %s -instcombine -S | FileCheck %s
   3
   4 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
   5
   6 ; Canonicalize rotate by constant to funnel shift intrinsics.
   7 ; This should help cost modeling for vectorization, inlining, etc.
   8 ; If a target does not have a rotate instruction, the expansion will
   9 ; be exactly these same 3 basic ops (shl/lshr/or).
  10
  11 define i32 @rotl_i32_constant(i32 %x) {
  12 ; CHECK-LABEL: @rotl_i32_constant(
  13 ; CHECK-NEXT:    [[R:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[X]], i32 11)
  14 ; CHECK-NEXT:    ret i32 [[R]]
  15 ;
  16   %shl = shl i32 %x, 11
  17   %shr = lshr i32 %x, 21
  18   %r = or i32 %shr, %shl
  19   ret i32 %r
  20 }
  21
  22 define i42 @rotr_i42_constant(i42 %x) {
  23 ; CHECK-LABEL: @rotr_i42_constant(
  24 ; CHECK-NEXT:    [[R:%.*]] = call i42 @llvm.fshl.i42(i42 [[X:%.*]], i42 [[X]], i42 31)
  25 ; CHECK-NEXT:    ret i42 [[R]]
  26 ;
  27   %shl = shl i42 %x, 31
  28   %shr = lshr i42 %x, 11
  29   %r = or i42 %shr, %shl
  30   ret i42 %r
  31 }
  32
  33 define i8 @rotr_i8_constant_commute(i8 %x) {
  34 ; CHECK-LABEL: @rotr_i8_constant_commute(
  35 ; CHECK-NEXT:    [[R:%.*]] = call i8 @llvm.fshl.i8(i8 [[X:%.*]], i8 [[X]], i8 5)
  36 ; CHECK-NEXT:    ret i8 [[R]]
  37 ;
  38   %shl = shl i8 %x, 5
  39   %shr = lshr i8 %x, 3
  40   %r = or i8 %shl, %shr
  41   ret i8 %r
  42 }
  43
  44 define i88 @rotl_i88_constant_commute(i88 %x) {
  45 ; CHECK-LABEL: @rotl_i88_constant_commute(
  46 ; CHECK-NEXT:    [[R:%.*]] = call i88 @llvm.fshl.i88(i88 [[X:%.*]], i88 [[X]], i88 44)
  47 ; CHECK-NEXT:    ret i88 [[R]]
  48 ;
  49   %shl = shl i88 %x, 44
  50   %shr = lshr i88 %x, 44
  51   %r = or i88 %shl, %shr
  52   ret i88 %r
  53 }
  54
  55 ; Vector types are allowed.
  56
  57 define <2 x i16> @rotl_v2i16_constant_splat(<2 x i16> %x) {
  58 ; CHECK-LABEL: @rotl_v2i16_constant_splat(
  59 ; CHECK-NEXT:    [[R:%.*]] = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> [[X:%.*]], <2 x i16> [[X]], <2 x i16> <i16 1, i16 1>)
  60 ; CHECK-NEXT:    ret <2 x i16> [[R]]
  61 ;
  62   %shl = shl <2 x i16> %x, <i16 1, i16 1>
  63   %shr = lshr <2 x i16> %x, <i16 15, i16 15>
  64   %r = or <2 x i16> %shl, %shr
  65   ret <2 x i16> %r
  66 }
  67
  68 define <2 x i16> @rotl_v2i16_constant_splat_undef0(<2 x i16> %x) {
  69 ; CHECK-LABEL: @rotl_v2i16_constant_splat_undef0(
  70 ; CHECK-NEXT:    [[R:%.*]] = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> [[X:%.*]], <2 x i16> [[X]], <2 x i16> <i16 1, i16 1>)
  71 ; CHECK-NEXT:    ret <2 x i16> [[R]]
  72 ;
  73   %shl = shl <2 x i16> %x, <i16 undef, i16 1>
  74   %shr = lshr <2 x i16> %x, <i16 15, i16 15>
  75   %r = or <2 x i16> %shl, %shr
  76   ret <2 x i16> %r
  77 }
  78
  79 define <2 x i16> @rotl_v2i16_constant_splat_undef1(<2 x i16> %x) {
  80 ; CHECK-LABEL: @rotl_v2i16_constant_splat_undef1(
  81 ; CHECK-NEXT:    [[R:%.*]] = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> [[X:%.*]], <2 x i16> [[X]], <2 x i16> <i16 1, i16 1>)
  82 ; CHECK-NEXT:    ret <2 x i16> [[R]]
  83 ;
  84   %shl = shl <2 x i16> %x, <i16 1, i16 1>
  85   %shr = lshr <2 x i16> %x, <i16 15, i16 undef>
  86   %r = or <2 x i16> %shl, %shr
  87   ret <2 x i16> %r
  88 }
  89
  90 ; Non-power-of-2 vector types are allowed.
  91
  92 define <2 x i17> @rotr_v2i17_constant_splat(<2 x i17> %x) {
  93 ; CHECK-LABEL: @rotr_v2i17_constant_splat(
  94 ; CHECK-NEXT:    [[R:%.*]] = call <2 x i17> @llvm.fshl.v2i17(<2 x i17> [[X:%.*]], <2 x i17> [[X]], <2 x i17> <i17 12, i17 12>)
  95 ; CHECK-NEXT:    ret <2 x i17> [[R]]
  96 ;
  97   %shl = shl <2 x i17> %x, <i17 12, i17 12>
  98   %shr = lshr <2 x i17> %x, <i17 5, i17 5>
  99   %r = or <2 x i17> %shr, %shl
 100   ret <2 x i17> %r
 101 }
 102
 103 define <2 x i17> @rotr_v2i17_constant_splat_undef0(<2 x i17> %x) {
 104 ; CHECK-LABEL: @rotr_v2i17_constant_splat_undef0(
 105 ; CHECK-NEXT:    [[R:%.*]] = call <2 x i17> @llvm.fshl.v2i17(<2 x i17> [[X:%.*]], <2 x i17> [[X]], <2 x i17> <i17 12, i17 12>)
 106 ; CHECK-NEXT:    ret <2 x i17> [[R]]
 107 ;
 108   %shl = shl <2 x i17> %x, <i17 12, i17 undef>
 109   %shr = lshr <2 x i17> %x, <i17 undef, i17 5>
 110   %r = or <2 x i17> %shr, %shl
 111   ret <2 x i17> %r
 112 }
 113
 114 define <2 x i17> @rotr_v2i17_constant_splat_undef1(<2 x i17> %x) {
 115 ; CHECK-LABEL: @rotr_v2i17_constant_splat_undef1(
 116 ; CHECK-NEXT:    [[R:%.*]] = call <2 x i17> @llvm.fshl.v2i17(<2 x i17> [[X:%.*]], <2 x i17> [[X]], <2 x i17> <i17 12, i17 12>)
 117 ; CHECK-NEXT:    ret <2 x i17> [[R]]
 118 ;
 119   %shl = shl <2 x i17> %x, <i17 12, i17 undef>
 120   %shr = lshr <2 x i17> %x, <i17 5, i17 undef>
 121   %r = or <2 x i17> %shr, %shl
 122   ret <2 x i17> %r
 123 }
 124
 125 ; Allow arbitrary shift constants.
 126 ; Support undef elements.
 127
 128 define <2 x i32> @rotr_v2i32_constant_nonsplat(<2 x i32> %x) {
 129 ; CHECK-LABEL: @rotr_v2i32_constant_nonsplat(
 130 ; CHECK-NEXT:    [[R:%.*]] = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> [[X:%.*]], <2 x i32> [[X]], <2 x i32> <i32 17, i32 19>)
 131 ; CHECK-NEXT:    ret <2 x i32> [[R]]
 132 ;
 133   %shl = shl <2 x i32> %x, <i32 17, i32 19>
 134   %shr = lshr <2 x i32> %x, <i32 15, i32 13>
 135   %r = or <2 x i32> %shl, %shr
 136   ret <2 x i32> %r
 137 }
 138
 139 define <2 x i32> @rotr_v2i32_constant_nonsplat_undef0(<2 x i32> %x) {
 140 ; CHECK-LABEL: @rotr_v2i32_constant_nonsplat_undef0(
 141 ; CHECK-NEXT:    [[R:%.*]] = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> [[X:%.*]], <2 x i32> [[X]], <2 x i32> <i32 0, i32 19>)
 142 ; CHECK-NEXT:    ret <2 x i32> [[R]]
 143 ;
 144   %shl = shl <2 x i32> %x, <i32 undef, i32 19>
 145   %shr = lshr <2 x i32> %x, <i32 15, i32 13>
 146   %r = or <2 x i32> %shl, %shr
 147   ret <2 x i32> %r
 148 }
 149
 150 define <2 x i32> @rotr_v2i32_constant_nonsplat_undef1(<2 x i32> %x) {
 151 ; CHECK-LABEL: @rotr_v2i32_constant_nonsplat_undef1(
 152 ; CHECK-NEXT:    [[R:%.*]] = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> [[X:%.*]], <2 x i32> [[X]], <2 x i32> <i32 17, i32 0>)
 153 ; CHECK-NEXT:    ret <2 x i32> [[R]]
 154 ;
 155   %shl = shl <2 x i32> %x, <i32 17, i32 19>
 156   %shr = lshr <2 x i32> %x, <i32 15, i32 undef>
 157   %r = or <2 x i32> %shl, %shr
 158   ret <2 x i32> %r
 159 }
 160
 161 define <2 x i36> @rotl_v2i36_constant_nonsplat(<2 x i36> %x) {
 162 ; CHECK-LABEL: @rotl_v2i36_constant_nonsplat(
 163 ; CHECK-NEXT:    [[R:%.*]] = call <2 x i36> @llvm.fshl.v2i36(<2 x i36> [[X:%.*]], <2 x i36> [[X]], <2 x i36> <i36 21, i36 11>)
 164 ; CHECK-NEXT:    ret <2 x i36> [[R]]
 165 ;
 166   %shl = shl <2 x i36> %x, <i36 21, i36 11>
 167   %shr = lshr <2 x i36> %x, <i36 15, i36 25>
 168   %r = or <2 x i36> %shl, %shr
 169   ret <2 x i36> %r
 170 }
 171
 172 define <3 x i36> @rotl_v3i36_constant_nonsplat_undef0(<3 x i36> %x) {
 173 ; CHECK-LABEL: @rotl_v3i36_constant_nonsplat_undef0(
 174 ; CHECK-NEXT:    [[R:%.*]] = call <3 x i36> @llvm.fshl.v3i36(<3 x i36> [[X:%.*]], <3 x i36> [[X]], <3 x i36> <i36 21, i36 11, i36 0>)
 175 ; CHECK-NEXT:    ret <3 x i36> [[R]]
 176 ;
 177   %shl = shl <3 x i36> %x, <i36 21, i36 11, i36 undef>
 178   %shr = lshr <3 x i36> %x, <i36 15, i36 25, i36 undef>
 179   %r = or <3 x i36> %shl, %shr
 180   ret <3 x i36> %r
 181 }
 182
 183 ; The most basic rotate by variable - no guards for UB due to oversized shifts.
 184 ; This cannot be canonicalized to funnel shift target-independently. The safe
 185 ; expansion includes masking for the shift amount that is not included here,
 186 ; so it could be more expensive.
 187
 188 define i32 @rotl_i32(i32 %x, i32 %y) {
 189 ; CHECK-LABEL: @rotl_i32(
 190 ; CHECK-NEXT:    [[SUB:%.*]] = sub i32 32, [[Y:%.*]]
 191 ; CHECK-NEXT:    [[SHL:%.*]] = shl i32 [[X:%.*]], [[Y]]
 192 ; CHECK-NEXT:    [[SHR:%.*]] = lshr i32 [[X]], [[SUB]]
 193 ; CHECK-NEXT:    [[R:%.*]] = or i32 [[SHR]], [[SHL]]
 194 ; CHECK-NEXT:    ret i32 [[R]]
 195 ;
 196   %sub = sub i32 32, %y
 197   %shl = shl i32 %x, %y
 198   %shr = lshr i32 %x, %sub
 199   %r = or i32 %shr, %shl
 200   ret i32 %r
 201 }
 202
 203 ; Non-power-of-2 types should follow the same reasoning. Left/right is determined by subtract.
 204
 205 define i37 @rotr_i37(i37 %x, i37 %y) {
 206 ; CHECK-LABEL: @rotr_i37(
 207 ; CHECK-NEXT:    [[SUB:%.*]] = sub i37 37, [[Y:%.*]]
 208 ; CHECK-NEXT:    [[SHL:%.*]] = shl i37 [[X:%.*]], [[SUB]]
 209 ; CHECK-NEXT:    [[SHR:%.*]] = lshr i37 [[X]], [[Y]]
 210 ; CHECK-NEXT:    [[R:%.*]] = or i37 [[SHR]], [[SHL]]
 211 ; CHECK-NEXT:    ret i37 [[R]]
 212 ;
 213   %sub = sub i37 37, %y
 214   %shl = shl i37 %x, %sub
 215   %shr = lshr i37 %x, %y
 216   %r = or i37 %shr, %shl
 217   ret i37 %r
 218 }
 219
 220 ; Commute 'or' operands.
 221
 222 define i8 @rotr_i8_commute(i8 %x, i8 %y) {
 223 ; CHECK-LABEL: @rotr_i8_commute(
 224 ; CHECK-NEXT:    [[SUB:%.*]] = sub i8 8, [[Y:%.*]]
 225 ; CHECK-NEXT:    [[SHL:%.*]] = shl i8 [[X:%.*]], [[SUB]]
 226 ; CHECK-NEXT:    [[SHR:%.*]] = lshr i8 [[X]], [[Y]]
 227 ; CHECK-NEXT:    [[R:%.*]] = or i8 [[SHL]], [[SHR]]
 228 ; CHECK-NEXT:    ret i8 [[R]]
 229 ;
 230   %sub = sub i8 8, %y
 231   %shl = shl i8 %x, %sub
 232   %shr = lshr i8 %x, %y
 233   %r = or i8 %shl, %shr
 234   ret i8 %r
 235 }
 236
 237 ; Vector types should follow the same rules.
 238
 239 define <4 x i32> @rotl_v4i32(<4 x i32> %x, <4 x i32> %y) {
 240 ; CHECK-LABEL: @rotl_v4i32(
 241 ; CHECK-NEXT:    [[SUB:%.*]] = sub <4 x i32> <i32 32, i32 32, i32 32, i32 32>, [[Y:%.*]]
 242 ; CHECK-NEXT:    [[SHL:%.*]] = shl <4 x i32> [[X:%.*]], [[Y]]
 243 ; CHECK-NEXT:    [[SHR:%.*]] = lshr <4 x i32> [[X]], [[SUB]]
 244 ; CHECK-NEXT:    [[R:%.*]] = or <4 x i32> [[SHL]], [[SHR]]
 245 ; CHECK-NEXT:    ret <4 x i32> [[R]]
 246 ;
 247   %sub = sub <4 x i32> <i32 32, i32 32, i32 32, i32 32>, %y
 248   %shl = shl <4 x i32> %x, %y
 249   %shr = lshr <4 x i32> %x, %sub
 250   %r = or <4 x i32> %shl, %shr
 251   ret <4 x i32> %r
 252 }
 253
 254 ; Non-power-of-2 vector types should follow the same rules.
 255
 256 define <3 x i42> @rotr_v3i42(<3 x i42> %x, <3 x i42> %y) {
 257 ; CHECK-LABEL: @rotr_v3i42(
 258 ; CHECK-NEXT:    [[SUB:%.*]] = sub <3 x i42> <i42 42, i42 42, i42 42>, [[Y:%.*]]
 259 ; CHECK-NEXT:    [[SHL:%.*]] = shl <3 x i42> [[X:%.*]], [[SUB]]
 260 ; CHECK-NEXT:    [[SHR:%.*]] = lshr <3 x i42> [[X]], [[Y]]
 261 ; CHECK-NEXT:    [[R:%.*]] = or <3 x i42> [[SHR]], [[SHL]]
 262 ; CHECK-NEXT:    ret <3 x i42> [[R]]
 263 ;
 264   %sub = sub <3 x i42> <i42 42, i42 42, i42 42>, %y
 265   %shl = shl <3 x i42> %x, %sub
 266   %shr = lshr <3 x i42> %x, %y
 267   %r = or <3 x i42> %shr, %shl
 268   ret <3 x i42> %r
 269 }
 270
 271 ; This is the canonical pattern for a UB-safe rotate-by-variable with power-of-2-size scalar type.
 272 ; The backend expansion of funnel shift for targets that don't have a rotate instruction should
 273 ; match the original IR, so it is always good to canonicalize to the intrinsics for this pattern.
 274
 275 define i32 @rotl_safe_i32(i32 %x, i32 %y) {
 276 ; CHECK-LABEL: @rotl_safe_i32(
 277 ; CHECK-NEXT:    [[R:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[X]], i32 [[Y:%.*]])
 278 ; CHECK-NEXT:    ret i32 [[R]]
 279 ;
 280   %negy = sub i32 0, %y
 281   %ymask = and i32 %y, 31
 282   %negymask = and i32 %negy, 31
 283   %shl = shl i32 %x, %ymask
 284   %shr = lshr i32 %x, %negymask
 285   %r = or i32 %shr, %shl
 286   ret i32 %r
 287 }
 288
 289 ; Extra uses don't change anything.
 290
 291 define i16 @rotl_safe_i16_commute_extra_use(i16 %x, i16 %y, i16* %p) {
 292 ; CHECK-LABEL: @rotl_safe_i16_commute_extra_use(
 293 ; CHECK-NEXT:    [[NEGY:%.*]] = sub i16 0, [[Y:%.*]]
 294 ; CHECK-NEXT:    [[NEGYMASK:%.*]] = and i16 [[NEGY]], 15
 295 ; CHECK-NEXT:    store i16 [[NEGYMASK]], i16* [[P:%.*]], align 2
 296 ; CHECK-NEXT:    [[R:%.*]] = call i16 @llvm.fshl.i16(i16 [[X:%.*]], i16 [[X]], i16 [[Y]])
 297 ; CHECK-NEXT:    ret i16 [[R]]
 298 ;
 299   %negy = sub i16 0, %y
 300   %ymask = and i16 %y, 15
 301   %negymask = and i16 %negy, 15
 302   store i16 %negymask, i16* %p
 303   %shl = shl i16 %x, %ymask
 304   %shr = lshr i16 %x, %negymask
 305   %r = or i16 %shl, %shr
 306   ret i16 %r
 307 }
 308
 309 ; Left/right is determined by the negation.
 310
 311 define i64 @rotr_safe_i64(i64 %x, i64 %y) {
 312 ; CHECK-LABEL: @rotr_safe_i64(
 313 ; CHECK-NEXT:    [[R:%.*]] = call i64 @llvm.fshr.i64(i64 [[X:%.*]], i64 [[X]], i64 [[Y:%.*]])
 314 ; CHECK-NEXT:    ret i64 [[R]]
 315 ;
 316   %negy = sub i64 0, %y
 317   %ymask = and i64 %y, 63
 318   %negymask = and i64 %negy, 63
 319   %shl = shl i64 %x, %negymask
 320   %shr = lshr i64 %x, %ymask
 321   %r = or i64 %shr, %shl
 322   ret i64 %r
 323 }
 324
 325 ; Extra uses don't change anything.
 326
 327 define i8 @rotr_safe_i8_commute_extra_use(i8 %x, i8 %y, i8* %p) {
 328 ; CHECK-LABEL: @rotr_safe_i8_commute_extra_use(
 329 ; CHECK-NEXT:    [[NEGY:%.*]] = sub i8 0, [[Y:%.*]]
 330 ; CHECK-NEXT:    [[YMASK:%.*]] = and i8 [[Y]], 7
 331 ; CHECK-NEXT:    [[NEGYMASK:%.*]] = and i8 [[NEGY]], 7
 332 ; CHECK-NEXT:    [[SHL:%.*]] = shl i8 [[X:%.*]], [[NEGYMASK]]
 333 ; CHECK-NEXT:    [[SHR:%.*]] = lshr i8 [[X]], [[YMASK]]
 334 ; CHECK-NEXT:    store i8 [[SHR]], i8* [[P:%.*]], align 1
 335 ; CHECK-NEXT:    [[R:%.*]] = or i8 [[SHL]], [[SHR]]
 336 ; CHECK-NEXT:    ret i8 [[R]]
 337 ;
 338   %negy = sub i8 0, %y
 339   %ymask = and i8 %y, 7
 340   %negymask = and i8 %negy, 7
 341   %shl = shl i8 %x, %negymask
 342   %shr = lshr i8 %x, %ymask
 343   store i8 %shr, i8* %p
 344   %r = or i8 %shl, %shr
 345   ret i8 %r
 346 }
 347
 348 ; Vectors follow the same rules.
 349
 350 define <2 x i32> @rotl_safe_v2i32(<2 x i32> %x, <2 x i32> %y) {
 351 ; CHECK-LABEL: @rotl_safe_v2i32(
 352 ; CHECK-NEXT:    [[R:%.*]] = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> [[X:%.*]], <2 x i32> [[X]], <2 x i32> [[Y:%.*]])
 353 ; CHECK-NEXT:    ret <2 x i32> [[R]]
 354 ;
 355   %negy = sub <2 x i32> zeroinitializer, %y
 356   %ymask = and <2 x i32> %y, <i32 31, i32 31>
 357   %negymask = and <2 x i32> %negy, <i32 31, i32 31>
 358   %shl = shl <2 x i32> %x, %ymask
 359   %shr = lshr <2 x i32> %x, %negymask
 360   %r = or <2 x i32> %shr, %shl
 361   ret <2 x i32> %r
 362 }
 363
 364 ; Vectors follow the same rules.
 365
 366 define <3 x i16> @rotr_safe_v3i16(<3 x i16> %x, <3 x i16> %y) {
 367 ; CHECK-LABEL: @rotr_safe_v3i16(
 368 ; CHECK-NEXT:    [[R:%.*]] = call <3 x i16> @llvm.fshr.v3i16(<3 x i16> [[X:%.*]], <3 x i16> [[X]], <3 x i16> [[Y:%.*]])
 369 ; CHECK-NEXT:    ret <3 x i16> [[R]]
 370 ;
 371   %negy = sub <3 x i16> zeroinitializer, %y
 372   %ymask = and <3 x i16> %y, <i16 15, i16 15, i16 15>
 373   %negymask = and <3 x i16> %negy, <i16 15, i16 15, i16 15>
 374   %shl = shl <3 x i16> %x, %negymask
 375   %shr = lshr <3 x i16> %x, %ymask
 376   %r = or <3 x i16> %shr, %shl
 377   ret <3 x i16> %r
 378 }
 379
 380 ; These are optionally UB-free rotate left/right patterns that are narrowed to a smaller bitwidth.
 381 ; See PR34046, PR16726, and PR39624 for motivating examples:
 382 ; https://bugs.llvm.org/show_bug.cgi?id=34046
 383 ; https://bugs.llvm.org/show_bug.cgi?id=16726
 384 ; https://bugs.llvm.org/show_bug.cgi?id=39624
 385
 386 define i16 @rotate_left_16bit(i16 %v, i32 %shift) {
 387 ; CHECK-LABEL: @rotate_left_16bit(
 388 ; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[SHIFT:%.*]] to i16
 389 ; CHECK-NEXT:    [[CONV2:%.*]] = call i16 @llvm.fshl.i16(i16 [[V:%.*]], i16 [[V]], i16 [[TMP1]])
 390 ; CHECK-NEXT:    ret i16 [[CONV2]]
 391 ;
 392   %and = and i32 %shift, 15
 393   %conv = zext i16 %v to i32
 394   %shl = shl i32 %conv, %and
 395   %sub = sub i32 16, %and
 396   %shr = lshr i32 %conv, %sub
 397   %or = or i32 %shr, %shl
 398   %conv2 = trunc i32 %or to i16
 399   ret i16 %conv2
 400 }
 401
 402 ; Commute the 'or' operands and try a vector type.
 403
 404 define <2 x i16> @rotate_left_commute_16bit_vec(<2 x i16> %v, <2 x i32> %shift) {
 405 ; CHECK-LABEL: @rotate_left_commute_16bit_vec(
 406 ; CHECK-NEXT:    [[TMP1:%.*]] = trunc <2 x i32> [[SHIFT:%.*]] to <2 x i16>
 407 ; CHECK-NEXT:    [[CONV2:%.*]] = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> [[V:%.*]], <2 x i16> [[V]], <2 x i16> [[TMP1]])
 408 ; CHECK-NEXT:    ret <2 x i16> [[CONV2]]
 409 ;
 410   %and = and <2 x i32> %shift, <i32 15, i32 15>
 411   %conv = zext <2 x i16> %v to <2 x i32>
 412   %shl = shl <2 x i32> %conv, %and
 413   %sub = sub <2 x i32> <i32 16, i32 16>, %and
 414   %shr = lshr <2 x i32> %conv, %sub
 415   %or = or <2 x i32> %shl, %shr
 416   %conv2 = trunc <2 x i32> %or to <2 x i16>
 417   ret <2 x i16> %conv2
 418 }
 419
 420 ; Change the size, rotation direction (the subtract is on the left-shift), and mask op.
 421
 422 define i8 @rotate_right_8bit(i8 %v, i3 %shift) {
 423 ; CHECK-LABEL: @rotate_right_8bit(
 424 ; CHECK-NEXT:    [[TMP1:%.*]] = zext i3 [[SHIFT:%.*]] to i8
 425 ; CHECK-NEXT:    [[CONV2:%.*]] = call i8 @llvm.fshr.i8(i8 [[V:%.*]], i8 [[V]], i8 [[TMP1]])
 426 ; CHECK-NEXT:    ret i8 [[CONV2]]
 427 ;
 428   %and = zext i3 %shift to i32
 429   %conv = zext i8 %v to i32
 430   %shr = lshr i32 %conv, %and
 431   %sub = sub i32 8, %and
 432   %shl = shl i32 %conv, %sub
 433   %or = or i32 %shl, %shr
 434   %conv2 = trunc i32 %or to i8
 435   ret i8 %conv2
 436 }
 437
 438 ; The right-shifted value does not need to be a zexted value; here it is masked.
 439 ; The shift mask could be less than the bitwidth, but this is still ok.
 440
 441 define i8 @rotate_right_commute_8bit_unmasked_shl(i32 %v, i32 %shift) {
 442 ; CHECK-LABEL: @rotate_right_commute_8bit_unmasked_shl(
 443 ; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[SHIFT:%.*]] to i8
 444 ; CHECK-NEXT:    [[TMP2:%.*]] = and i8 [[TMP1]], 3
 445 ; CHECK-NEXT:    [[TMP3:%.*]] = trunc i32 [[V:%.*]] to i8
 446 ; CHECK-NEXT:    [[TMP4:%.*]] = trunc i32 [[V]] to i8
 447 ; CHECK-NEXT:    [[CONV2:%.*]] = call i8 @llvm.fshr.i8(i8 [[TMP3]], i8 [[TMP4]], i8 [[TMP2]])
 448 ; CHECK-NEXT:    ret i8 [[CONV2]]
 449 ;
 450   %and = and i32 %shift, 3
 451   %conv = and i32 %v, 255
 452   %shr = lshr i32 %conv, %and
 453   %sub = sub i32 8, %and
 454   %shl = shl i32 %conv, %sub
 455   %or = or i32 %shr, %shl
 456   %conv2 = trunc i32 %or to i8
 457   ret i8 %conv2
 458 }
 459
 460 ; The left-shifted value does not need to be masked at all.
 461
 462 define i8 @rotate_right_commute_8bit(i32 %v, i32 %shift) {
 463 ; CHECK-LABEL: @rotate_right_commute_8bit(
 464 ; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[SHIFT:%.*]] to i8
 465 ; CHECK-NEXT:    [[TMP2:%.*]] = and i8 [[TMP1]], 3
 466 ; CHECK-NEXT:    [[TMP3:%.*]] = trunc i32 [[V:%.*]] to i8
 467 ; CHECK-NEXT:    [[TMP4:%.*]] = trunc i32 [[V]] to i8
 468 ; CHECK-NEXT:    [[CONV2:%.*]] = call i8 @llvm.fshr.i8(i8 [[TMP3]], i8 [[TMP4]], i8 [[TMP2]])
 469 ; CHECK-NEXT:    ret i8 [[CONV2]]
 470 ;
 471   %and = and i32 %shift, 3
 472   %conv = and i32 %v, 255
 473   %shr = lshr i32 %conv, %and
 474   %sub = sub i32 8, %and
 475   %shl = shl i32 %v, %sub
 476   %or = or i32 %shr, %shl
 477   %conv2 = trunc i32 %or to i8
 478   ret i8 %conv2
 479 }
 480
 481 ; If the original source does not mask the shift amount,
 482 ; we still do the transform by adding masks to make it safe.
 483
 484 define i8 @rotate8_not_safe(i8 %v, i32 %shamt) {
 485 ; CHECK-LABEL: @rotate8_not_safe(
 486 ; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[SHAMT:%.*]] to i8
 487 ; CHECK-NEXT:    [[RET:%.*]] = call i8 @llvm.fshl.i8(i8 [[V:%.*]], i8 [[V]], i8 [[TMP1]])
 488 ; CHECK-NEXT:    ret i8 [[RET]]
 489 ;
 490   %conv = zext i8 %v to i32
 491   %sub = sub i32 8, %shamt
 492   %shr = lshr i32 %conv, %sub
 493   %shl = shl i32 %conv, %shamt
 494   %or = or i32 %shr, %shl
 495   %ret = trunc i32 %or to i8
 496   ret i8 %ret
 497 }
 498
 499 ; A non-power-of-2 destination type can't be masked as above.
 500
 501 define i9 @rotate9_not_safe(i9 %v, i32 %shamt) {
 502 ; CHECK-LABEL: @rotate9_not_safe(
 503 ; CHECK-NEXT:    [[CONV:%.*]] = zext i9 [[V:%.*]] to i32
 504 ; CHECK-NEXT:    [[SUB:%.*]] = sub i32 9, [[SHAMT:%.*]]
 505 ; CHECK-NEXT:    [[SHR:%.*]] = lshr i32 [[CONV]], [[SUB]]
 506 ; CHECK-NEXT:    [[SHL:%.*]] = shl i32 [[CONV]], [[SHAMT]]
 507 ; CHECK-NEXT:    [[OR:%.*]] = or i32 [[SHR]], [[SHL]]
 508 ; CHECK-NEXT:    [[RET:%.*]] = trunc i32 [[OR]] to i9
 509 ; CHECK-NEXT:    ret i9 [[RET]]
 510 ;
 511   %conv = zext i9 %v to i32
 512   %sub = sub i32 9, %shamt
 513   %shr = lshr i32 %conv, %sub
 514   %shl = shl i32 %conv, %shamt
 515   %or = or i32 %shr, %shl
 516   %ret = trunc i32 %or to i9
 517   ret i9 %ret
 518 }
 519
 520 ; We should narrow (v << (s & 15)) | (v >> (-s & 15))
 521 ; when both v and s have been promoted.
 522
 523 define i16 @rotateleft_16_neg_mask(i16 %v, i16 %shamt) {
 524 ; CHECK-LABEL: @rotateleft_16_neg_mask(
 525 ; CHECK-NEXT:    [[OR:%.*]] = call i16 @llvm.fshl.i16(i16 [[V:%.*]], i16 [[V]], i16 [[SHAMT:%.*]])
 526 ; CHECK-NEXT:    ret i16 [[OR]]
 527 ;
 528   %neg = sub i16 0, %shamt
 529   %lshamt = and i16 %shamt, 15
 530   %lshamtconv = zext i16 %lshamt to i32
 531   %rshamt = and i16 %neg, 15
 532   %rshamtconv = zext i16 %rshamt to i32
 533   %conv = zext i16 %v to i32
 534   %shl = shl i32 %conv, %lshamtconv
 535   %shr = lshr i32 %conv, %rshamtconv
 536   %or = or i32 %shr, %shl
 537   %ret = trunc i32 %or to i16
 538   ret i16 %ret
 539 }
 540
 541 define i16 @rotateleft_16_neg_mask_commute(i16 %v, i16 %shamt) {
 542 ; CHECK-LABEL: @rotateleft_16_neg_mask_commute(
 543 ; CHECK-NEXT:    [[OR:%.*]] = call i16 @llvm.fshl.i16(i16 [[V:%.*]], i16 [[V]], i16 [[SHAMT:%.*]])
 544 ; CHECK-NEXT:    ret i16 [[OR]]
 545 ;
 546   %neg = sub i16 0, %shamt
 547   %lshamt = and i16 %shamt, 15
 548   %lshamtconv = zext i16 %lshamt to i32
 549   %rshamt = and i16 %neg, 15
 550   %rshamtconv = zext i16 %rshamt to i32
 551   %conv = zext i16 %v to i32
 552   %shl = shl i32 %conv, %lshamtconv
 553   %shr = lshr i32 %conv, %rshamtconv
 554   %or = or i32 %shl, %shr
 555   %ret = trunc i32 %or to i16
 556   ret i16 %ret
 557 }
 558
 559 define i8 @rotateright_8_neg_mask(i8 %v, i8 %shamt) {
 560 ; CHECK-LABEL: @rotateright_8_neg_mask(
 561 ; CHECK-NEXT:    [[OR:%.*]] = call i8 @llvm.fshr.i8(i8 [[V:%.*]], i8 [[V]], i8 [[SHAMT:%.*]])
 562 ; CHECK-NEXT:    ret i8 [[OR]]
 563 ;
 564   %neg = sub i8 0, %shamt
 565   %rshamt = and i8 %shamt, 7
 566   %rshamtconv = zext i8 %rshamt to i32
 567   %lshamt = and i8 %neg, 7
 568   %lshamtconv = zext i8 %lshamt to i32
 569   %conv = zext i8 %v to i32
 570   %shl = shl i32 %conv, %lshamtconv
 571   %shr = lshr i32 %conv, %rshamtconv
 572   %or = or i32 %shr, %shl
 573   %ret = trunc i32 %or to i8
 574   ret i8 %ret
 575 }
 576
 577 define i8 @rotateright_8_neg_mask_commute(i8 %v, i8 %shamt) {
 578 ; CHECK-LABEL: @rotateright_8_neg_mask_commute(
 579 ; CHECK-NEXT:    [[OR:%.*]] = call i8 @llvm.fshr.i8(i8 [[V:%.*]], i8 [[V]], i8 [[SHAMT:%.*]])
 580 ; CHECK-NEXT:    ret i8 [[OR]]
 581 ;
 582   %neg = sub i8 0, %shamt
 583   %rshamt = and i8 %shamt, 7
 584   %rshamtconv = zext i8 %rshamt to i32
 585   %lshamt = and i8 %neg, 7
 586   %lshamtconv = zext i8 %lshamt to i32
 587   %conv = zext i8 %v to i32
 588   %shl = shl i32 %conv, %lshamtconv
 589   %shr = lshr i32 %conv, %rshamtconv
 590   %or = or i32 %shl, %shr
 591   %ret = trunc i32 %or to i8
 592   ret i8 %ret
 593 }
 594
 595 ; The shift amount may already be in the wide type,
 596 ; so we need to truncate it going into the rotate pattern.
 597
 598 define i16 @rotateright_16_neg_mask_wide_amount(i16 %v, i32 %shamt) {
 599 ; CHECK-LABEL: @rotateright_16_neg_mask_wide_amount(
 600 ; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[SHAMT:%.*]] to i16
 601 ; CHECK-NEXT:    [[RET:%.*]] = call i16 @llvm.fshr.i16(i16 [[V:%.*]], i16 [[V]], i16 [[TMP1]])
 602 ; CHECK-NEXT:    ret i16 [[RET]]
 603 ;
 604   %neg = sub i32 0, %shamt
 605   %rshamt = and i32 %shamt, 15
 606   %lshamt = and i32 %neg, 15
 607   %conv = zext i16 %v to i32
 608   %shl = shl i32 %conv, %lshamt
 609   %shr = lshr i32 %conv, %rshamt
 610   %or = or i32 %shr, %shl
 611   %ret = trunc i32 %or to i16
 612   ret i16 %ret
 613 }
 614
 615 define i16 @rotateright_16_neg_mask_wide_amount_commute(i16 %v, i32 %shamt) {
 616 ; CHECK-LABEL: @rotateright_16_neg_mask_wide_amount_commute(
 617 ; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[SHAMT:%.*]] to i16
 618 ; CHECK-NEXT:    [[RET:%.*]] = call i16 @llvm.fshr.i16(i16 [[V:%.*]], i16 [[V]], i16 [[TMP1]])
 619 ; CHECK-NEXT:    ret i16 [[RET]]
 620 ;
 621   %neg = sub i32 0, %shamt
 622   %rshamt = and i32 %shamt, 15
 623   %lshamt = and i32 %neg, 15
 624   %conv = zext i16 %v to i32
 625   %shl = shl i32 %conv, %lshamt
 626   %shr = lshr i32 %conv, %rshamt
 627   %or = or i32 %shl, %shr
 628   %ret = trunc i32 %or to i16
 629   ret i16 %ret
 630 }
 631
 632 define i64 @rotateright_64_zext_neg_mask_amount(i64 %0, i32 %1) {
 633 ; CHECK-LABEL: @rotateright_64_zext_neg_mask_amount(
 634 ; CHECK-NEXT:    [[TMP3:%.*]] = zext i32 [[TMP1:%.*]] to i64
 635 ; CHECK-NEXT:    [[TMP4:%.*]] = call i64 @llvm.fshr.i64(i64 [[TMP0:%.*]], i64 [[TMP0]], i64 [[TMP3]])
 636 ; CHECK-NEXT:    ret i64 [[TMP4]]
 637 ;
 638   %3 = and i32 %1, 63
 639   %4 = zext i32 %3 to i64
 640   %5 = lshr i64 %0, %4
 641   %6 = sub nsw i32 0, %1
 642   %7 = and i32 %6, 63
 643   %8 = zext i32 %7 to i64
 644   %9 = shl i64 %0, %8
 645   %10 = or i64 %5, %9
 646   ret i64 %10
 647 }
 648
 649 define i8 @rotateleft_8_neg_mask_wide_amount(i8 %v, i32 %shamt) {
 650 ; CHECK-LABEL: @rotateleft_8_neg_mask_wide_amount(
 651 ; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[SHAMT:%.*]] to i8
 652 ; CHECK-NEXT:    [[RET:%.*]] = call i8 @llvm.fshl.i8(i8 [[V:%.*]], i8 [[V]], i8 [[TMP1]])
 653 ; CHECK-NEXT:    ret i8 [[RET]]
 654 ;
 655   %neg = sub i32 0, %shamt
 656   %lshamt = and i32 %shamt, 7
 657   %rshamt = and i32 %neg, 7
 658   %conv = zext i8 %v to i32
 659   %shl = shl i32 %conv, %lshamt
 660   %shr = lshr i32 %conv, %rshamt
 661   %or = or i32 %shr, %shl
 662   %ret = trunc i32 %or to i8
 663   ret i8 %ret
 664 }
 665
 666 define i8 @rotateleft_8_neg_mask_wide_amount_commute(i8 %v, i32 %shamt) {
 667 ; CHECK-LABEL: @rotateleft_8_neg_mask_wide_amount_commute(
 668 ; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[SHAMT:%.*]] to i8
 669 ; CHECK-NEXT:    [[RET:%.*]] = call i8 @llvm.fshl.i8(i8 [[V:%.*]], i8 [[V]], i8 [[TMP1]])
 670 ; CHECK-NEXT:    ret i8 [[RET]]
 671 ;
 672   %neg = sub i32 0, %shamt
 673   %lshamt = and i32 %shamt, 7
 674   %rshamt = and i32 %neg, 7
 675   %conv = zext i8 %v to i32
 676   %shl = shl i32 %conv, %lshamt
 677   %shr = lshr i32 %conv, %rshamt
 678   %or = or i32 %shl, %shr
 679   %ret = trunc i32 %or to i8
 680   ret i8 %ret
 681 }
 682
 683 define i64 @rotateleft_64_zext_neg_mask_amount(i64 %0, i32 %1) {
 684 ; CHECK-LABEL: @rotateleft_64_zext_neg_mask_amount(
 685 ; CHECK-NEXT:    [[TMP3:%.*]] = zext i32 [[TMP1:%.*]] to i64
 686 ; CHECK-NEXT:    [[TMP4:%.*]] = call i64 @llvm.fshl.i64(i64 [[TMP0:%.*]], i64 [[TMP0]], i64 [[TMP3]])
 687 ; CHECK-NEXT:    ret i64 [[TMP4]]
 688 ;
 689   %3 = and i32 %1, 63
 690   %4 = zext i32 %3 to i64
 691   %5 = shl i64 %0, %4
 692   %6 = sub nsw i32 0, %1
 693   %7 = and i32 %6, 63
 694   %8 = zext i32 %7 to i64
 695   %9 = lshr i64 %0, %8
 696   %10 = or i64 %5, %9
 697   ret i64 %10
 698 }
 699
 700 ; Non-power-of-2 types. This could be transformed, but it's not a typical rotate pattern.
 701
 702 define i9 @rotateleft_9_neg_mask_wide_amount_commute(i9 %v, i33 %shamt) {
 703 ; CHECK-LABEL: @rotateleft_9_neg_mask_wide_amount_commute(
 704 ; CHECK-NEXT:    [[NEG:%.*]] = sub i33 0, [[SHAMT:%.*]]
 705 ; CHECK-NEXT:    [[LSHAMT:%.*]] = and i33 [[SHAMT]], 8
 706 ; CHECK-NEXT:    [[RSHAMT:%.*]] = and i33 [[NEG]], 8
 707 ; CHECK-NEXT:    [[CONV:%.*]] = zext i9 [[V:%.*]] to i33
 708 ; CHECK-NEXT:    [[SHL:%.*]] = shl i33 [[CONV]], [[LSHAMT]]
 709 ; CHECK-NEXT:    [[SHR:%.*]] = lshr i33 [[CONV]], [[RSHAMT]]
 710 ; CHECK-NEXT:    [[OR:%.*]] = or i33 [[SHL]], [[SHR]]
 711 ; CHECK-NEXT:    [[RET:%.*]] = trunc i33 [[OR]] to i9
 712 ; CHECK-NEXT:    ret i9 [[RET]]
 713 ;
 714   %neg = sub i33 0, %shamt
 715   %lshamt = and i33 %shamt, 8
 716   %rshamt = and i33 %neg, 8
 717   %conv = zext i9 %v to i33
 718   %shl = shl i33 %conv, %lshamt
 719   %shr = lshr i33 %conv, %rshamt
 720   %or = or i33 %shl, %shr
 721   %ret = trunc i33 %or to i9
 722   ret i9 %ret
 723 }
 724
 725 ; Fold or(shl(v,x),lshr(v,bw-x)) iff x < bw
 726
 727 define i64 @rotl_sub_mask(i64 %0, i64 %1) {
 728 ; CHECK-LABEL: @rotl_sub_mask(
 729 ; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.fshl.i64(i64 [[TMP0:%.*]], i64 [[TMP0]], i64 [[TMP1:%.*]])
 730 ; CHECK-NEXT:    ret i64 [[TMP3]]
 731 ;
 732   %3 = and i64 %1, 63
 733   %4 = shl i64 %0, %3
 734   %5 = sub nuw nsw i64 64, %3
 735   %6 = lshr i64 %0, %5
 736   %7 = or i64 %6, %4
 737   ret i64 %7
 738 }
 739
 740 ; Fold or(lshr(v,x),shl(v,bw-x)) iff x < bw
 741
 742 define i64 @rotr_sub_mask(i64 %0, i64 %1) {
 743 ; CHECK-LABEL: @rotr_sub_mask(
 744 ; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.fshr.i64(i64 [[TMP0:%.*]], i64 [[TMP0]], i64 [[TMP1:%.*]])
 745 ; CHECK-NEXT:    ret i64 [[TMP3]]
 746 ;
 747   %3 = and i64 %1, 63
 748   %4 = lshr i64 %0, %3
 749   %5 = sub nuw nsw i64 64, %3
 750   %6 = shl i64 %0, %5
 751   %7 = or i64 %6, %4
 752   ret i64 %7
 753 }
 754
 755 define <2 x i64> @rotr_sub_mask_vector(<2 x i64> %0, <2 x i64> %1) {
 756 ; CHECK-LABEL: @rotr_sub_mask_vector(
 757 ; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> [[TMP0:%.*]], <2 x i64> [[TMP0]], <2 x i64> [[TMP1:%.*]])
 758 ; CHECK-NEXT:    ret <2 x i64> [[TMP3]]
 759 ;
 760   %3 = and <2 x i64> %1, <i64 63, i64 63>
 761   %4 = lshr <2 x i64> %0, %3
 762   %5 = sub nuw nsw <2 x i64> <i64 64, i64 64>, %3
 763   %6 = shl <2 x i64> %0, %5
 764   %7 = or <2 x i64> %6, %4
 765   ret <2 x i64> %7
 766 }
 767
 768 ; Convert select pattern to masked shift that ends in 'or'.
 769
 770 define i32 @rotr_select(i32 %x, i32 %shamt) {
 771 ; CHECK-LABEL: @rotr_select(
 772 ; CHECK-NEXT:    [[R:%.*]] = call i32 @llvm.fshr.i32(i32 [[X:%.*]], i32 [[X]], i32 [[SHAMT:%.*]])
 773 ; CHECK-NEXT:    ret i32 [[R]]
 774 ;
 775   %cmp = icmp eq i32 %shamt, 0
 776   %sub = sub i32 32, %shamt
 777   %shr = lshr i32 %x, %shamt
 778   %shl = shl i32 %x, %sub
 779   %or = or i32 %shr, %shl
 780   %r = select i1 %cmp, i32 %x, i32 %or
 781   ret i32 %r
 782 }
 783
 784 ; Convert select pattern to masked shift that ends in 'or'.
 785
 786 define i8 @rotr_select_commute(i8 %x, i8 %shamt) {
 787 ; CHECK-LABEL: @rotr_select_commute(
 788 ; CHECK-NEXT:    [[R:%.*]] = call i8 @llvm.fshr.i8(i8 [[X:%.*]], i8 [[X]], i8 [[SHAMT:%.*]])
 789 ; CHECK-NEXT:    ret i8 [[R]]
 790 ;
 791   %cmp = icmp eq i8 %shamt, 0
 792   %sub = sub i8 8, %shamt
 793   %shr = lshr i8 %x, %shamt
 794   %shl = shl i8 %x, %sub
 795   %or = or i8 %shl, %shr
 796   %r = select i1 %cmp, i8 %x, i8 %or
 797   ret i8 %r
 798 }
 799
 800 ; Convert select pattern to masked shift that ends in 'or'.
 801
 802 define i16 @rotl_select(i16 %x, i16 %shamt) {
 803 ; CHECK-LABEL: @rotl_select(
 804 ; CHECK-NEXT:    [[R:%.*]] = call i16 @llvm.fshl.i16(i16 [[X:%.*]], i16 [[X]], i16 [[SHAMT:%.*]])
 805 ; CHECK-NEXT:    ret i16 [[R]]
 806 ;
 807   %cmp = icmp eq i16 %shamt, 0
 808   %sub = sub i16 16, %shamt
 809   %shr = lshr i16 %x, %sub
 810   %shl = shl i16 %x, %shamt
 811   %or = or i16 %shr, %shl
 812   %r = select i1 %cmp, i16 %x, i16 %or
 813   ret i16 %r
 814 }
 815
 816 ; Convert select pattern to masked shift that ends in 'or'.
 817
 818 define <2 x i64> @rotl_select_commute(<2 x i64> %x, <2 x i64> %shamt) {
 819 ; CHECK-LABEL: @rotl_select_commute(
 820 ; CHECK-NEXT:    [[R:%.*]] = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> [[X:%.*]], <2 x i64> [[X]], <2 x i64> [[SHAMT:%.*]])
 821 ; CHECK-NEXT:    ret <2 x i64> [[R]]
 822 ;
 823   %cmp = icmp eq <2 x i64> %shamt, zeroinitializer
 824   %sub = sub <2 x i64> <i64 64, i64 64>, %shamt
 825   %shr = lshr <2 x i64> %x, %sub
 826   %shl = shl <2 x i64> %x, %shamt
 827   %or = or <2 x i64> %shl, %shr
 828   %r = select <2 x i1> %cmp, <2 x i64> %x, <2 x i64> %or
 829   ret <2 x i64> %r
 830 }
 831
 832 ; Negative test - the transform is only valid with power-of-2 types.
 833
 834 define i24 @rotl_select_weird_type(i24 %x, i24 %shamt) {
 835 ; CHECK-LABEL: @rotl_select_weird_type(
 836 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i24 [[SHAMT:%.*]], 0
 837 ; CHECK-NEXT:    [[SUB:%.*]] = sub i24 24, [[SHAMT]]
 838 ; CHECK-NEXT:    [[SHR:%.*]] = lshr i24 [[X:%.*]], [[SUB]]
 839 ; CHECK-NEXT:    [[SHL:%.*]] = shl i24 [[X]], [[SHAMT]]
 840 ; CHECK-NEXT:    [[OR:%.*]] = or i24 [[SHL]], [[SHR]]
 841 ; CHECK-NEXT:    [[R:%.*]] = select i1 [[CMP]], i24 [[X]], i24 [[OR]]
 842 ; CHECK-NEXT:    ret i24 [[R]]
 843 ;
 844   %cmp = icmp eq i24 %shamt, 0
 845   %sub = sub i24 24, %shamt
 846   %shr = lshr i24 %x, %sub
 847   %shl = shl i24 %x, %shamt
 848   %or = or i24 %shl, %shr
 849   %r = select i1 %cmp, i24 %x, i24 %or
 850   ret i24 %r
 851 }
 852
 853 define i32 @rotl_select_zext_shamt(i32 %x, i8 %y) {
 854 ; CHECK-LABEL: @rotl_select_zext_shamt(
 855 ; CHECK-NEXT:    [[TMP1:%.*]] = zext i8 [[Y:%.*]] to i32
 856 ; CHECK-NEXT:    [[R:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[X]], i32 [[TMP1]])
 857 ; CHECK-NEXT:    ret i32 [[R]]
 858 ;
 859   %rem = and i8 %y, 31
 860   %cmp = icmp eq i8 %rem, 0
 861   %sh_prom = zext i8 %rem to i32
 862   %sub = sub nuw nsw i8 32, %rem
 863   %sh_prom1 = zext i8 %sub to i32
 864   %shr = lshr i32 %x, %sh_prom1
 865   %shl = shl i32 %x, %sh_prom
 866   %or = or i32 %shl, %shr
 867   %r = select i1 %cmp, i32 %x, i32 %or
 868   ret i32 %r
 869 }
 870
 871 define i64 @rotr_select_zext_shamt(i64 %x, i32 %y) {
 872 ; CHECK-LABEL: @rotr_select_zext_shamt(
 873 ; CHECK-NEXT:    [[TMP1:%.*]] = zext i32 [[Y:%.*]] to i64
 874 ; CHECK-NEXT:    [[R:%.*]] = call i64 @llvm.fshr.i64(i64 [[X:%.*]], i64 [[X]], i64 [[TMP1]])
 875 ; CHECK-NEXT:    ret i64 [[R]]
 876 ;
 877   %rem = and i32 %y, 63
 878   %cmp = icmp eq i32 %rem, 0
 879   %sh_prom = zext i32 %rem to i64
 880   %shr = lshr i64 %x, %sh_prom
 881   %sub = sub nuw nsw i32 64, %rem
 882   %sh_prom1 = zext i32 %sub to i64
 883   %shl = shl i64 %x, %sh_prom1
 884   %or = or i64 %shl, %shr
 885   %r = select i1 %cmp, i64 %x, i64 %or
 886   ret i64 %r
 887 }
 888
 889 ; Test that the transform doesn't crash when there's an "or" with a ConstantExpr operand.
 890
 891 @external_global = external global i8
 892
 893 define i32 @rotl_constant_expr(i32 %shamt) {
 894 ; CHECK-LABEL: @rotl_constant_expr(
 895 ; CHECK-NEXT:    [[SHR:%.*]] = lshr i32 ptrtoint (i8* @external_global to i32), [[SHAMT:%.*]]
 896 ; CHECK-NEXT:    [[R:%.*]] = or i32 [[SHR]], shl (i32 ptrtoint (i8* @external_global to i32), i32 11)
 897 ; CHECK-NEXT:    ret i32 [[R]]
 898 ;
 899   %shr = lshr i32 ptrtoint (i8* @external_global to i32), %shamt
 900   %r = or i32 %shr, shl (i32 ptrtoint (i8* @external_global to i32), i32 11)
 901   ret i32 %r
 902 }
 903
 904 ; PR20750 - https://bugs.llvm.org/show_bug.cgi?id=20750
 905 ; This IR corresponds to C source where the shift amount is a smaller type than the rotated value:
 906 ; unsigned int rotate32_doubleand1(unsigned int v, unsigned char r) { r = r & 31; return (v << r) | (v >> (((32 - r)) & 31)); }
 907
 908 define i32 @rotateleft32_doubleand1(i32 %v, i8 %r) {
 909 ; CHECK-LABEL: @rotateleft32_doubleand1(
 910 ; CHECK-NEXT:    [[Z:%.*]] = zext i8 [[R:%.*]] to i32
 911 ; CHECK-NEXT:    [[OR:%.*]] = call i32 @llvm.fshl.i32(i32 [[V:%.*]], i32 [[V]], i32 [[Z]])
 912 ; CHECK-NEXT:    ret i32 [[OR]]
 913 ;
 914   %m = and i8 %r, 31
 915   %z = zext i8 %m to i32
 916   %neg = sub nsw i32 0, %z
 917   %and2 = and i32 %neg, 31
 918   %shl = shl i32 %v, %z
 919   %shr = lshr i32 %v, %and2
 920   %or = or i32 %shr, %shl
 921   ret i32 %or
 922 }
 923
 924 define i32 @rotateright32_doubleand1(i32 %v, i16 %r) {
 925 ; CHECK-LABEL: @rotateright32_doubleand1(
 926 ; CHECK-NEXT:    [[Z:%.*]] = zext i16 [[R:%.*]] to i32
 927 ; CHECK-NEXT:    [[OR:%.*]] = call i32 @llvm.fshr.i32(i32 [[V:%.*]], i32 [[V]], i32 [[Z]])
 928 ; CHECK-NEXT:    ret i32 [[OR]]
 929 ;
 930   %m = and i16 %r, 31
 931   %z = zext i16 %m to i32
 932   %neg = sub nsw i32 0, %z
 933   %and2 = and i32 %neg, 31
 934   %shl = shl i32 %v, %and2
 935   %shr = lshr i32 %v, %z
 936   %or = or i32 %shr, %shl
 937   ret i32 %or
 938 }
 939
 940 ; TODO: This should be a rotate (funnel-shift).
 941
 942 define i8 @unmasked_shlop_unmasked_shift_amount(i32 %x, i32 %shamt) {
 943 ; CHECK-LABEL: @unmasked_shlop_unmasked_shift_amount(
 944 ; CHECK-NEXT:    [[MASKX:%.*]] = and i32 [[X:%.*]], 255
 945 ; CHECK-NEXT:    [[T4:%.*]] = sub i32 8, [[SHAMT:%.*]]
 946 ; CHECK-NEXT:    [[T5:%.*]] = shl i32 [[X]], [[T4]]
 947 ; CHECK-NEXT:    [[T6:%.*]] = lshr i32 [[MASKX]], [[SHAMT]]
 948 ; CHECK-NEXT:    [[T7:%.*]] = or i32 [[T5]], [[T6]]
 949 ; CHECK-NEXT:    [[T8:%.*]] = trunc i32 [[T7]] to i8
 950 ; CHECK-NEXT:    ret i8 [[T8]]
 951 ;
 952   %maskx = and i32 %x, 255
 953   %t4 = sub i32 8, %shamt
 954   %t5 = shl i32 %x, %t4
 955   %t6 = lshr i32 %maskx, %shamt
 956   %t7 = or i32 %t5, %t6
 957   %t8 = trunc i32 %t7 to i8
 958   ret i8 %t8
 959 }