test/Transforms/InstCombine/rotate.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
   2 ; RUN: opt < %s -instcombine -S | FileCheck %s
   3
   4 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
   5
   6 ; TODO: Canonicalize rotate by constant to funnel shift intrinsics.
   7 ; This should help cost modeling for vectorization, inlining, etc.
   8 ; If a target does not have a rotate instruction, the expansion will
   9 ; be exactly these same 3 basic ops (shl/lshr/or).
  10
  11 define i32 @rotl_i32_constant(i32 %x) {
  12 ; CHECK-LABEL: @rotl_i32_constant(
  13 ; CHECK-NEXT:    [[SHL:%.*]] = shl i32 [[X:%.*]], 11
  14 ; CHECK-NEXT:    [[SHR:%.*]] = lshr i32 [[X]], 21
  15 ; CHECK-NEXT:    [[R:%.*]] = or i32 [[SHR]], [[SHL]]
  16 ; CHECK-NEXT:    ret i32 [[R]]
  17 ;
  18   %shl = shl i32 %x, 11
  19   %shr = lshr i32 %x, 21
  20   %r = or i32 %shr, %shl
  21   ret i32 %r
  22 }
  23
  24 define i42 @rotr_i42_constant(i42 %x) {
  25 ; CHECK-LABEL: @rotr_i42_constant(
  26 ; CHECK-NEXT:    [[SHL:%.*]] = shl i42 [[X:%.*]], 31
  27 ; CHECK-NEXT:    [[SHR:%.*]] = lshr i42 [[X]], 11
  28 ; CHECK-NEXT:    [[R:%.*]] = or i42 [[SHR]], [[SHL]]
  29 ; CHECK-NEXT:    ret i42 [[R]]
  30 ;
  31   %shl = shl i42 %x, 31
  32   %shr = lshr i42 %x, 11
  33   %r = or i42 %shr, %shl
  34   ret i42 %r
  35 }
  36
  37 define i8 @rotr_i8_constant_commute(i8 %x) {
  38 ; CHECK-LABEL: @rotr_i8_constant_commute(
  39 ; CHECK-NEXT:    [[SHL:%.*]] = shl i8 [[X:%.*]], 5
  40 ; CHECK-NEXT:    [[SHR:%.*]] = lshr i8 [[X]], 3
  41 ; CHECK-NEXT:    [[R:%.*]] = or i8 [[SHL]], [[SHR]]
  42 ; CHECK-NEXT:    ret i8 [[R]]
  43 ;
  44   %shl = shl i8 %x, 5
  45   %shr = lshr i8 %x, 3
  46   %r = or i8 %shl, %shr
  47   ret i8 %r
  48 }
  49
  50 define i88 @rotl_i88_constant_commute(i88 %x) {
  51 ; CHECK-LABEL: @rotl_i88_constant_commute(
  52 ; CHECK-NEXT:    [[SHL:%.*]] = shl i88 [[X:%.*]], 44
  53 ; CHECK-NEXT:    [[SHR:%.*]] = lshr i88 [[X]], 44
  54 ; CHECK-NEXT:    [[R:%.*]] = or i88 [[SHL]], [[SHR]]
  55 ; CHECK-NEXT:    ret i88 [[R]]
  56 ;
  57   %shl = shl i88 %x, 44
  58   %shr = lshr i88 %x, 44
  59   %r = or i88 %shl, %shr
  60   ret i88 %r
  61 }
  62
  63 ; Vector types are allowed.
  64
  65 define <2 x i16> @rotl_v2i16_constant_splat(<2 x i16> %x) {
  66 ; CHECK-LABEL: @rotl_v2i16_constant_splat(
  67 ; CHECK-NEXT:    [[SHL:%.*]] = shl <2 x i16> [[X:%.*]], <i16 1, i16 1>
  68 ; CHECK-NEXT:    [[SHR:%.*]] = lshr <2 x i16> [[X]], <i16 15, i16 15>
  69 ; CHECK-NEXT:    [[R:%.*]] = or <2 x i16> [[SHL]], [[SHR]]
  70 ; CHECK-NEXT:    ret <2 x i16> [[R]]
  71 ;
  72   %shl = shl <2 x i16> %x, <i16 1, i16 1>
  73   %shr = lshr <2 x i16> %x, <i16 15, i16 15>
  74   %r = or <2 x i16> %shl, %shr
  75   ret <2 x i16> %r
  76 }
  77
  78 ; Non-power-of-2 vector types are allowed.
  79
  80 define <2 x i17> @rotr_v2i17_constant_splat(<2 x i17> %x) {
  81 ; CHECK-LABEL: @rotr_v2i17_constant_splat(
  82 ; CHECK-NEXT:    [[SHL:%.*]] = shl <2 x i17> [[X:%.*]], <i17 12, i17 12>
  83 ; CHECK-NEXT:    [[SHR:%.*]] = lshr <2 x i17> [[X]], <i17 5, i17 5>
  84 ; CHECK-NEXT:    [[R:%.*]] = or <2 x i17> [[SHR]], [[SHL]]
  85 ; CHECK-NEXT:    ret <2 x i17> [[R]]
  86 ;
  87   %shl = shl <2 x i17> %x, <i17 12, i17 12>
  88   %shr = lshr <2 x i17> %x, <i17 5, i17 5>
  89   %r = or <2 x i17> %shr, %shl
  90   ret <2 x i17> %r
  91 }
  92
  93 ; Allow arbitrary shift constants.
  94
  95 define <2 x i32> @rotr_v2i32_constant_nonsplat(<2 x i32> %x) {
  96 ; CHECK-LABEL: @rotr_v2i32_constant_nonsplat(
  97 ; CHECK-NEXT:    [[SHL:%.*]] = shl <2 x i32> [[X:%.*]], <i32 17, i32 19>
  98 ; CHECK-NEXT:    [[SHR:%.*]] = lshr <2 x i32> [[X]], <i32 15, i32 13>
  99 ; CHECK-NEXT:    [[R:%.*]] = or <2 x i32> [[SHL]], [[SHR]]
 100 ; CHECK-NEXT:    ret <2 x i32> [[R]]
 101 ;
 102   %shl = shl <2 x i32> %x, <i32 17, i32 19>
 103   %shr = lshr <2 x i32> %x, <i32 15, i32 13>
 104   %r = or <2 x i32> %shl, %shr
 105   ret <2 x i32> %r
 106 }
 107
 108 define <2 x i36> @rotl_v2i16_constant_nonsplat(<2 x i36> %x) {
 109 ; CHECK-LABEL: @rotl_v2i16_constant_nonsplat(
 110 ; CHECK-NEXT:    [[SHL:%.*]] = shl <2 x i36> [[X:%.*]], <i36 21, i36 11>
 111 ; CHECK-NEXT:    [[SHR:%.*]] = lshr <2 x i36> [[X]], <i36 15, i36 25>
 112 ; CHECK-NEXT:    [[R:%.*]] = or <2 x i36> [[SHL]], [[SHR]]
 113 ; CHECK-NEXT:    ret <2 x i36> [[R]]
 114 ;
 115   %shl = shl <2 x i36> %x, <i36 21, i36 11>
 116   %shr = lshr <2 x i36> %x, <i36 15, i36 25>
 117   %r = or <2 x i36> %shl, %shr
 118   ret <2 x i36> %r
 119 }
 120
 121 ; The most basic rotate by variable - no guards for UB due to oversized shifts.
 122 ; This cannot be canonicalized to funnel shift target-independently. The safe
 123 ; expansion includes masking for the shift amount that is not included here,
 124 ; so it could be more expensive.
 125
 126 define i32 @rotl_i32(i32 %x, i32 %y) {
 127 ; CHECK-LABEL: @rotl_i32(
 128 ; CHECK-NEXT:    [[SUB:%.*]] = sub i32 32, [[Y:%.*]]
 129 ; CHECK-NEXT:    [[SHL:%.*]] = shl i32 [[X:%.*]], [[Y]]
 130 ; CHECK-NEXT:    [[SHR:%.*]] = lshr i32 [[X]], [[SUB]]
 131 ; CHECK-NEXT:    [[R:%.*]] = or i32 [[SHR]], [[SHL]]
 132 ; CHECK-NEXT:    ret i32 [[R]]
 133 ;
 134   %sub = sub i32 32, %y
 135   %shl = shl i32 %x, %y
 136   %shr = lshr i32 %x, %sub
 137   %r = or i32 %shr, %shl
 138   ret i32 %r
 139 }
 140
 141 ; Non-power-of-2 types should follow the same reasoning. Left/right is determined by subtract.
 142
 143 define i37 @rotr_i37(i37 %x, i37 %y) {
 144 ; CHECK-LABEL: @rotr_i37(
 145 ; CHECK-NEXT:    [[SUB:%.*]] = sub i37 37, [[Y:%.*]]
 146 ; CHECK-NEXT:    [[SHL:%.*]] = shl i37 [[X:%.*]], [[SUB]]
 147 ; CHECK-NEXT:    [[SHR:%.*]] = lshr i37 [[X]], [[Y]]
 148 ; CHECK-NEXT:    [[R:%.*]] = or i37 [[SHR]], [[SHL]]
 149 ; CHECK-NEXT:    ret i37 [[R]]
 150 ;
 151   %sub = sub i37 37, %y
 152   %shl = shl i37 %x, %sub
 153   %shr = lshr i37 %x, %y
 154   %r = or i37 %shr, %shl
 155   ret i37 %r
 156 }
 157
 158 ; Commute 'or' operands.
 159
 160 define i8 @rotr_i8_commute(i8 %x, i8 %y) {
 161 ; CHECK-LABEL: @rotr_i8_commute(
 162 ; CHECK-NEXT:    [[SUB:%.*]] = sub i8 8, [[Y:%.*]]
 163 ; CHECK-NEXT:    [[SHL:%.*]] = shl i8 [[X:%.*]], [[SUB]]
 164 ; CHECK-NEXT:    [[SHR:%.*]] = lshr i8 [[X]], [[Y]]
 165 ; CHECK-NEXT:    [[R:%.*]] = or i8 [[SHL]], [[SHR]]
 166 ; CHECK-NEXT:    ret i8 [[R]]
 167 ;
 168   %sub = sub i8 8, %y
 169   %shl = shl i8 %x, %sub
 170   %shr = lshr i8 %x, %y
 171   %r = or i8 %shl, %shr
 172   ret i8 %r
 173 }
 174
 175 ; Vector types should follow the same rules.
 176
 177 define <4 x i32> @rotl_v4i32(<4 x i32> %x, <4 x i32> %y) {
 178 ; CHECK-LABEL: @rotl_v4i32(
 179 ; CHECK-NEXT:    [[SUB:%.*]] = sub <4 x i32> <i32 32, i32 32, i32 32, i32 32>, [[Y:%.*]]
 180 ; CHECK-NEXT:    [[SHL:%.*]] = shl <4 x i32> [[X:%.*]], [[Y]]
 181 ; CHECK-NEXT:    [[SHR:%.*]] = lshr <4 x i32> [[X]], [[SUB]]
 182 ; CHECK-NEXT:    [[R:%.*]] = or <4 x i32> [[SHL]], [[SHR]]
 183 ; CHECK-NEXT:    ret <4 x i32> [[R]]
 184 ;
 185   %sub = sub <4 x i32> <i32 32, i32 32, i32 32, i32 32>, %y
 186   %shl = shl <4 x i32> %x, %y
 187   %shr = lshr <4 x i32> %x, %sub
 188   %r = or <4 x i32> %shl, %shr
 189   ret <4 x i32> %r
 190 }
 191
 192 ; Non-power-of-2 vector types should follow the same rules.
 193
 194 define <3 x i42> @rotr_v3i42(<3 x i42> %x, <3 x i42> %y) {
 195 ; CHECK-LABEL: @rotr_v3i42(
 196 ; CHECK-NEXT:    [[SUB:%.*]] = sub <3 x i42> <i42 42, i42 42, i42 42>, [[Y:%.*]]
 197 ; CHECK-NEXT:    [[SHL:%.*]] = shl <3 x i42> [[X:%.*]], [[SUB]]
 198 ; CHECK-NEXT:    [[SHR:%.*]] = lshr <3 x i42> [[X]], [[Y]]
 199 ; CHECK-NEXT:    [[R:%.*]] = or <3 x i42> [[SHR]], [[SHL]]
 200 ; CHECK-NEXT:    ret <3 x i42> [[R]]
 201 ;
 202   %sub = sub <3 x i42> <i42 42, i42 42, i42 42>, %y
 203   %shl = shl <3 x i42> %x, %sub
 204   %shr = lshr <3 x i42> %x, %y
 205   %r = or <3 x i42> %shr, %shl
 206   ret <3 x i42> %r
 207 }
 208
 209 ; This is the canonical pattern for a UB-safe rotate-by-variable with power-of-2-size scalar type.
 210 ; The backend expansion of funnel shift for targets that don't have a rotate instruction should
 211 ; match the original IR, so it is always good to canonicalize to the intrinsics for this pattern.
 212
 213 define i32 @rotl_safe_i32(i32 %x, i32 %y) {
 214 ; CHECK-LABEL: @rotl_safe_i32(
 215 ; CHECK-NEXT:    [[R:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[X]], i32 [[Y:%.*]])
 216 ; CHECK-NEXT:    ret i32 [[R]]
 217 ;
 218   %negy = sub i32 0, %y
 219   %ymask = and i32 %y, 31
 220   %negymask = and i32 %negy, 31
 221   %shl = shl i32 %x, %ymask
 222   %shr = lshr i32 %x, %negymask
 223   %r = or i32 %shr, %shl
 224   ret i32 %r
 225 }
 226
 227 ; Extra uses don't change anything.
 228
 229 define i16 @rotl_safe_i16_commute_extra_use(i16 %x, i16 %y, i16* %p) {
 230 ; CHECK-LABEL: @rotl_safe_i16_commute_extra_use(
 231 ; CHECK-NEXT:    [[NEGY:%.*]] = sub i16 0, [[Y:%.*]]
 232 ; CHECK-NEXT:    [[NEGYMASK:%.*]] = and i16 [[NEGY]], 15
 233 ; CHECK-NEXT:    store i16 [[NEGYMASK]], i16* [[P:%.*]], align 2
 234 ; CHECK-NEXT:    [[R:%.*]] = call i16 @llvm.fshl.i16(i16 [[X:%.*]], i16 [[X]], i16 [[Y]])
 235 ; CHECK-NEXT:    ret i16 [[R]]
 236 ;
 237   %negy = sub i16 0, %y
 238   %ymask = and i16 %y, 15
 239   %negymask = and i16 %negy, 15
 240   store i16 %negymask, i16* %p
 241   %shl = shl i16 %x, %ymask
 242   %shr = lshr i16 %x, %negymask
 243   %r = or i16 %shl, %shr
 244   ret i16 %r
 245 }
 246
 247 ; Left/right is determined by the negation.
 248
 249 define i64 @rotr_safe_i64(i64 %x, i64 %y) {
 250 ; CHECK-LABEL: @rotr_safe_i64(
 251 ; CHECK-NEXT:    [[R:%.*]] = call i64 @llvm.fshr.i64(i64 [[X:%.*]], i64 [[X]], i64 [[Y:%.*]])
 252 ; CHECK-NEXT:    ret i64 [[R]]
 253 ;
 254   %negy = sub i64 0, %y
 255   %ymask = and i64 %y, 63
 256   %negymask = and i64 %negy, 63
 257   %shl = shl i64 %x, %negymask
 258   %shr = lshr i64 %x, %ymask
 259   %r = or i64 %shr, %shl
 260   ret i64 %r
 261 }
 262
 263 ; Extra uses don't change anything.
 264
 265 define i8 @rotr_safe_i8_commute_extra_use(i8 %x, i8 %y, i8* %p) {
 266 ; CHECK-LABEL: @rotr_safe_i8_commute_extra_use(
 267 ; CHECK-NEXT:    [[NEGY:%.*]] = sub i8 0, [[Y:%.*]]
 268 ; CHECK-NEXT:    [[YMASK:%.*]] = and i8 [[Y]], 7
 269 ; CHECK-NEXT:    [[NEGYMASK:%.*]] = and i8 [[NEGY]], 7
 270 ; CHECK-NEXT:    [[SHL:%.*]] = shl i8 [[X:%.*]], [[NEGYMASK]]
 271 ; CHECK-NEXT:    [[SHR:%.*]] = lshr i8 [[X]], [[YMASK]]
 272 ; CHECK-NEXT:    store i8 [[SHR]], i8* [[P:%.*]], align 1
 273 ; CHECK-NEXT:    [[R:%.*]] = or i8 [[SHL]], [[SHR]]
 274 ; CHECK-NEXT:    ret i8 [[R]]
 275 ;
 276   %negy = sub i8 0, %y
 277   %ymask = and i8 %y, 7
 278   %negymask = and i8 %negy, 7
 279   %shl = shl i8 %x, %negymask
 280   %shr = lshr i8 %x, %ymask
 281   store i8 %shr, i8* %p
 282   %r = or i8 %shl, %shr
 283   ret i8 %r
 284 }
 285
 286 ; Vectors follow the same rules.
 287
 288 define <2 x i32> @rotl_safe_v2i32(<2 x i32> %x, <2 x i32> %y) {
 289 ; CHECK-LABEL: @rotl_safe_v2i32(
 290 ; CHECK-NEXT:    [[R:%.*]] = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> [[X:%.*]], <2 x i32> [[X]], <2 x i32> [[Y:%.*]])
 291 ; CHECK-NEXT:    ret <2 x i32> [[R]]
 292 ;
 293   %negy = sub <2 x i32> zeroinitializer, %y
 294   %ymask = and <2 x i32> %y, <i32 31, i32 31>
 295   %negymask = and <2 x i32> %negy, <i32 31, i32 31>
 296   %shl = shl <2 x i32> %x, %ymask
 297   %shr = lshr <2 x i32> %x, %negymask
 298   %r = or <2 x i32> %shr, %shl
 299   ret <2 x i32> %r
 300 }
 301
 302 ; Vectors follow the same rules.
 303
 304 define <3 x i16> @rotr_safe_v3i16(<3 x i16> %x, <3 x i16> %y) {
 305 ; CHECK-LABEL: @rotr_safe_v3i16(
 306 ; CHECK-NEXT:    [[R:%.*]] = call <3 x i16> @llvm.fshr.v3i16(<3 x i16> [[X:%.*]], <3 x i16> [[X]], <3 x i16> [[Y:%.*]])
 307 ; CHECK-NEXT:    ret <3 x i16> [[R]]
 308 ;
 309   %negy = sub <3 x i16> zeroinitializer, %y
 310   %ymask = and <3 x i16> %y, <i16 15, i16 15, i16 15>
 311   %negymask = and <3 x i16> %negy, <i16 15, i16 15, i16 15>
 312   %shl = shl <3 x i16> %x, %negymask
 313   %shr = lshr <3 x i16> %x, %ymask
 314   %r = or <3 x i16> %shr, %shl
 315   ret <3 x i16> %r
 316 }
 317
 318 ; These are optionally UB-free rotate left/right patterns that are narrowed to a smaller bitwidth.
 319 ; See PR34046, PR16726, and PR39624 for motivating examples:
 320 ; https://bugs.llvm.org/show_bug.cgi?id=34046
 321 ; https://bugs.llvm.org/show_bug.cgi?id=16726
 322 ; https://bugs.llvm.org/show_bug.cgi?id=39624
 323
 324 define i16 @rotate_left_16bit(i16 %v, i32 %shift) {
 325 ; CHECK-LABEL: @rotate_left_16bit(
 326 ; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[SHIFT:%.*]] to i16
 327 ; CHECK-NEXT:    [[CONV2:%.*]] = call i16 @llvm.fshl.i16(i16 [[V:%.*]], i16 [[V]], i16 [[TMP1]])
 328 ; CHECK-NEXT:    ret i16 [[CONV2]]
 329 ;
 330   %and = and i32 %shift, 15
 331   %conv = zext i16 %v to i32
 332   %shl = shl i32 %conv, %and
 333   %sub = sub i32 16, %and
 334   %shr = lshr i32 %conv, %sub
 335   %or = or i32 %shr, %shl
 336   %conv2 = trunc i32 %or to i16
 337   ret i16 %conv2
 338 }
 339
 340 ; Commute the 'or' operands and try a vector type.
 341
 342 define <2 x i16> @rotate_left_commute_16bit_vec(<2 x i16> %v, <2 x i32> %shift) {
 343 ; CHECK-LABEL: @rotate_left_commute_16bit_vec(
 344 ; CHECK-NEXT:    [[TMP1:%.*]] = trunc <2 x i32> [[SHIFT:%.*]] to <2 x i16>
 345 ; CHECK-NEXT:    [[CONV2:%.*]] = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> [[V:%.*]], <2 x i16> [[V]], <2 x i16> [[TMP1]])
 346 ; CHECK-NEXT:    ret <2 x i16> [[CONV2]]
 347 ;
 348   %and = and <2 x i32> %shift, <i32 15, i32 15>
 349   %conv = zext <2 x i16> %v to <2 x i32>
 350   %shl = shl <2 x i32> %conv, %and
 351   %sub = sub <2 x i32> <i32 16, i32 16>, %and
 352   %shr = lshr <2 x i32> %conv, %sub
 353   %or = or <2 x i32> %shl, %shr
 354   %conv2 = trunc <2 x i32> %or to <2 x i16>
 355   ret <2 x i16> %conv2
 356 }
 357
 358 ; Change the size, rotation direction (the subtract is on the left-shift), and mask op.
 359
 360 define i8 @rotate_right_8bit(i8 %v, i3 %shift) {
 361 ; CHECK-LABEL: @rotate_right_8bit(
 362 ; CHECK-NEXT:    [[TMP1:%.*]] = zext i3 [[SHIFT:%.*]] to i8
 363 ; CHECK-NEXT:    [[CONV2:%.*]] = call i8 @llvm.fshr.i8(i8 [[V:%.*]], i8 [[V]], i8 [[TMP1]])
 364 ; CHECK-NEXT:    ret i8 [[CONV2]]
 365 ;
 366   %and = zext i3 %shift to i32
 367   %conv = zext i8 %v to i32
 368   %shr = lshr i32 %conv, %and
 369   %sub = sub i32 8, %and
 370   %shl = shl i32 %conv, %sub
 371   %or = or i32 %shl, %shr
 372   %conv2 = trunc i32 %or to i8
 373   ret i8 %conv2
 374 }
 375
 376 ; The shifted value does not need to be a zexted value; here it is masked.
 377 ; The shift mask could be less than the bitwidth, but this is still ok.
 378
 379 define i8 @rotate_right_commute_8bit(i32 %v, i32 %shift) {
 380 ; CHECK-LABEL: @rotate_right_commute_8bit(
 381 ; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[SHIFT:%.*]] to i8
 382 ; CHECK-NEXT:    [[TMP2:%.*]] = and i8 [[TMP1]], 3
 383 ; CHECK-NEXT:    [[TMP3:%.*]] = trunc i32 [[V:%.*]] to i8
 384 ; CHECK-NEXT:    [[CONV2:%.*]] = call i8 @llvm.fshr.i8(i8 [[TMP3]], i8 [[TMP3]], i8 [[TMP2]])
 385 ; CHECK-NEXT:    ret i8 [[CONV2]]
 386 ;
 387   %and = and i32 %shift, 3
 388   %conv = and i32 %v, 255
 389   %shr = lshr i32 %conv, %and
 390   %sub = sub i32 8, %and
 391   %shl = shl i32 %conv, %sub
 392   %or = or i32 %shr, %shl
 393   %conv2 = trunc i32 %or to i8
 394   ret i8 %conv2
 395 }
 396
 397 ; If the original source does not mask the shift amount,
 398 ; we still do the transform by adding masks to make it safe.
 399
 400 define i8 @rotate8_not_safe(i8 %v, i32 %shamt) {
 401 ; CHECK-LABEL: @rotate8_not_safe(
 402 ; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[SHAMT:%.*]] to i8
 403 ; CHECK-NEXT:    [[RET:%.*]] = call i8 @llvm.fshl.i8(i8 [[V:%.*]], i8 [[V]], i8 [[TMP1]])
 404 ; CHECK-NEXT:    ret i8 [[RET]]
 405 ;
 406   %conv = zext i8 %v to i32
 407   %sub = sub i32 8, %shamt
 408   %shr = lshr i32 %conv, %sub
 409   %shl = shl i32 %conv, %shamt
 410   %or = or i32 %shr, %shl
 411   %ret = trunc i32 %or to i8
 412   ret i8 %ret
 413 }
 414
 415 ; A non-power-of-2 destination type can't be masked as above.
 416
 417 define i9 @rotate9_not_safe(i9 %v, i32 %shamt) {
 418 ; CHECK-LABEL: @rotate9_not_safe(
 419 ; CHECK-NEXT:    [[CONV:%.*]] = zext i9 [[V:%.*]] to i32
 420 ; CHECK-NEXT:    [[SUB:%.*]] = sub i32 9, [[SHAMT:%.*]]
 421 ; CHECK-NEXT:    [[SHR:%.*]] = lshr i32 [[CONV]], [[SUB]]
 422 ; CHECK-NEXT:    [[SHL:%.*]] = shl i32 [[CONV]], [[SHAMT]]
 423 ; CHECK-NEXT:    [[OR:%.*]] = or i32 [[SHR]], [[SHL]]
 424 ; CHECK-NEXT:    [[RET:%.*]] = trunc i32 [[OR]] to i9
 425 ; CHECK-NEXT:    ret i9 [[RET]]
 426 ;
 427   %conv = zext i9 %v to i32
 428   %sub = sub i32 9, %shamt
 429   %shr = lshr i32 %conv, %sub
 430   %shl = shl i32 %conv, %shamt
 431   %or = or i32 %shr, %shl
 432   %ret = trunc i32 %or to i9
 433   ret i9 %ret
 434 }
 435
 436 ; We should narrow (v << (s & 15)) | (v >> (-s & 15))
 437 ; when both v and s have been promoted.
 438
 439 define i16 @rotateleft_16_neg_mask(i16 %v, i16 %shamt) {
 440 ; CHECK-LABEL: @rotateleft_16_neg_mask(
 441 ; CHECK-NEXT:    [[RET:%.*]] = call i16 @llvm.fshl.i16(i16 [[V:%.*]], i16 [[V]], i16 [[SHAMT:%.*]])
 442 ; CHECK-NEXT:    ret i16 [[RET]]
 443 ;
 444   %neg = sub i16 0, %shamt
 445   %lshamt = and i16 %shamt, 15
 446   %lshamtconv = zext i16 %lshamt to i32
 447   %rshamt = and i16 %neg, 15
 448   %rshamtconv = zext i16 %rshamt to i32
 449   %conv = zext i16 %v to i32
 450   %shl = shl i32 %conv, %lshamtconv
 451   %shr = lshr i32 %conv, %rshamtconv
 452   %or = or i32 %shr, %shl
 453   %ret = trunc i32 %or to i16
 454   ret i16 %ret
 455 }
 456
 457 define i16 @rotateleft_16_neg_mask_commute(i16 %v, i16 %shamt) {
 458 ; CHECK-LABEL: @rotateleft_16_neg_mask_commute(
 459 ; CHECK-NEXT:    [[RET:%.*]] = call i16 @llvm.fshl.i16(i16 [[V:%.*]], i16 [[V]], i16 [[SHAMT:%.*]])
 460 ; CHECK-NEXT:    ret i16 [[RET]]
 461 ;
 462   %neg = sub i16 0, %shamt
 463   %lshamt = and i16 %shamt, 15
 464   %lshamtconv = zext i16 %lshamt to i32
 465   %rshamt = and i16 %neg, 15
 466   %rshamtconv = zext i16 %rshamt to i32
 467   %conv = zext i16 %v to i32
 468   %shl = shl i32 %conv, %lshamtconv
 469   %shr = lshr i32 %conv, %rshamtconv
 470   %or = or i32 %shl, %shr
 471   %ret = trunc i32 %or to i16
 472   ret i16 %ret
 473 }
 474
 475 define i8 @rotateright_8_neg_mask(i8 %v, i8 %shamt) {
 476 ; CHECK-LABEL: @rotateright_8_neg_mask(
 477 ; CHECK-NEXT:    [[RET:%.*]] = call i8 @llvm.fshr.i8(i8 [[V:%.*]], i8 [[V]], i8 [[SHAMT:%.*]])
 478 ; CHECK-NEXT:    ret i8 [[RET]]
 479 ;
 480   %neg = sub i8 0, %shamt
 481   %rshamt = and i8 %shamt, 7
 482   %rshamtconv = zext i8 %rshamt to i32
 483   %lshamt = and i8 %neg, 7
 484   %lshamtconv = zext i8 %lshamt to i32
 485   %conv = zext i8 %v to i32
 486   %shl = shl i32 %conv, %lshamtconv
 487   %shr = lshr i32 %conv, %rshamtconv
 488   %or = or i32 %shr, %shl
 489   %ret = trunc i32 %or to i8
 490   ret i8 %ret
 491 }
 492
 493 define i8 @rotateright_8_neg_mask_commute(i8 %v, i8 %shamt) {
 494 ; CHECK-LABEL: @rotateright_8_neg_mask_commute(
 495 ; CHECK-NEXT:    [[RET:%.*]] = call i8 @llvm.fshr.i8(i8 [[V:%.*]], i8 [[V]], i8 [[SHAMT:%.*]])
 496 ; CHECK-NEXT:    ret i8 [[RET]]
 497 ;
 498   %neg = sub i8 0, %shamt
 499   %rshamt = and i8 %shamt, 7
 500   %rshamtconv = zext i8 %rshamt to i32
 501   %lshamt = and i8 %neg, 7
 502   %lshamtconv = zext i8 %lshamt to i32
 503   %conv = zext i8 %v to i32
 504   %shl = shl i32 %conv, %lshamtconv
 505   %shr = lshr i32 %conv, %rshamtconv
 506   %or = or i32 %shl, %shr
 507   %ret = trunc i32 %or to i8
 508   ret i8 %ret
 509 }
 510
 511 ; The shift amount may already be in the wide type,
 512 ; so we need to truncate it going into the rotate pattern.
 513
 514 define i16 @rotateright_16_neg_mask_wide_amount(i16 %v, i32 %shamt) {
 515 ; CHECK-LABEL: @rotateright_16_neg_mask_wide_amount(
 516 ; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[SHAMT:%.*]] to i16
 517 ; CHECK-NEXT:    [[RET:%.*]] = call i16 @llvm.fshr.i16(i16 [[V:%.*]], i16 [[V]], i16 [[TMP1]])
 518 ; CHECK-NEXT:    ret i16 [[RET]]
 519 ;
 520   %neg = sub i32 0, %shamt
 521   %rshamt = and i32 %shamt, 15
 522   %lshamt = and i32 %neg, 15
 523   %conv = zext i16 %v to i32
 524   %shl = shl i32 %conv, %lshamt
 525   %shr = lshr i32 %conv, %rshamt
 526   %or = or i32 %shr, %shl
 527   %ret = trunc i32 %or to i16
 528   ret i16 %ret
 529 }
 530
 531 define i16 @rotateright_16_neg_mask_wide_amount_commute(i16 %v, i32 %shamt) {
 532 ; CHECK-LABEL: @rotateright_16_neg_mask_wide_amount_commute(
 533 ; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[SHAMT:%.*]] to i16
 534 ; CHECK-NEXT:    [[RET:%.*]] = call i16 @llvm.fshr.i16(i16 [[V:%.*]], i16 [[V]], i16 [[TMP1]])
 535 ; CHECK-NEXT:    ret i16 [[RET]]
 536 ;
 537   %neg = sub i32 0, %shamt
 538   %rshamt = and i32 %shamt, 15
 539   %lshamt = and i32 %neg, 15
 540   %conv = zext i16 %v to i32
 541   %shl = shl i32 %conv, %lshamt
 542   %shr = lshr i32 %conv, %rshamt
 543   %or = or i32 %shl, %shr
 544   %ret = trunc i32 %or to i16
 545   ret i16 %ret
 546 }
 547
 548 define i8 @rotateleft_8_neg_mask_wide_amount(i8 %v, i32 %shamt) {
 549 ; CHECK-LABEL: @rotateleft_8_neg_mask_wide_amount(
 550 ; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[SHAMT:%.*]] to i8
 551 ; CHECK-NEXT:    [[RET:%.*]] = call i8 @llvm.fshl.i8(i8 [[V:%.*]], i8 [[V]], i8 [[TMP1]])
 552 ; CHECK-NEXT:    ret i8 [[RET]]
 553 ;
 554   %neg = sub i32 0, %shamt
 555   %lshamt = and i32 %shamt, 7
 556   %rshamt = and i32 %neg, 7
 557   %conv = zext i8 %v to i32
 558   %shl = shl i32 %conv, %lshamt
 559   %shr = lshr i32 %conv, %rshamt
 560   %or = or i32 %shr, %shl
 561   %ret = trunc i32 %or to i8
 562   ret i8 %ret
 563 }
 564
 565 define i8 @rotateleft_8_neg_mask_wide_amount_commute(i8 %v, i32 %shamt) {
 566 ; CHECK-LABEL: @rotateleft_8_neg_mask_wide_amount_commute(
 567 ; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[SHAMT:%.*]] to i8
 568 ; CHECK-NEXT:    [[RET:%.*]] = call i8 @llvm.fshl.i8(i8 [[V:%.*]], i8 [[V]], i8 [[TMP1]])
 569 ; CHECK-NEXT:    ret i8 [[RET]]
 570 ;
 571   %neg = sub i32 0, %shamt
 572   %lshamt = and i32 %shamt, 7
 573   %rshamt = and i32 %neg, 7
 574   %conv = zext i8 %v to i32
 575   %shl = shl i32 %conv, %lshamt
 576   %shr = lshr i32 %conv, %rshamt
 577   %or = or i32 %shl, %shr
 578   %ret = trunc i32 %or to i8
 579   ret i8 %ret
 580 }
 581
 582 ; Non-power-of-2 types. This could be transformed, but it's not a typical rotate pattern.
 583
 584 define i9 @rotateleft_9_neg_mask_wide_amount_commute(i9 %v, i33 %shamt) {
 585 ; CHECK-LABEL: @rotateleft_9_neg_mask_wide_amount_commute(
 586 ; CHECK-NEXT:    [[NEG:%.*]] = sub i33 0, [[SHAMT:%.*]]
 587 ; CHECK-NEXT:    [[LSHAMT:%.*]] = and i33 [[SHAMT]], 8
 588 ; CHECK-NEXT:    [[RSHAMT:%.*]] = and i33 [[NEG]], 8
 589 ; CHECK-NEXT:    [[CONV:%.*]] = zext i9 [[V:%.*]] to i33
 590 ; CHECK-NEXT:    [[SHL:%.*]] = shl i33 [[CONV]], [[LSHAMT]]
 591 ; CHECK-NEXT:    [[SHR:%.*]] = lshr i33 [[CONV]], [[RSHAMT]]
 592 ; CHECK-NEXT:    [[OR:%.*]] = or i33 [[SHL]], [[SHR]]
 593 ; CHECK-NEXT:    [[RET:%.*]] = trunc i33 [[OR]] to i9
 594 ; CHECK-NEXT:    ret i9 [[RET]]
 595 ;
 596   %neg = sub i33 0, %shamt
 597   %lshamt = and i33 %shamt, 8
 598   %rshamt = and i33 %neg, 8
 599   %conv = zext i9 %v to i33
 600   %shl = shl i33 %conv, %lshamt
 601   %shr = lshr i33 %conv, %rshamt
 602   %or = or i33 %shl, %shr
 603   %ret = trunc i33 %or to i9
 604   ret i9 %ret
 605 }
 606
 607 ; Convert select pattern to masked shift that ends in 'or'.
 608
 609 define i32 @rotr_select(i32 %x, i32 %shamt) {
 610 ; CHECK-LABEL: @rotr_select(
 611 ; CHECK-NEXT:    [[R:%.*]] = call i32 @llvm.fshr.i32(i32 [[X:%.*]], i32 [[X]], i32 [[SHAMT:%.*]])
 612 ; CHECK-NEXT:    ret i32 [[R]]
 613 ;
 614   %cmp = icmp eq i32 %shamt, 0
 615   %sub = sub i32 32, %shamt
 616   %shr = lshr i32 %x, %shamt
 617   %shl = shl i32 %x, %sub
 618   %or = or i32 %shr, %shl
 619   %r = select i1 %cmp, i32 %x, i32 %or
 620   ret i32 %r
 621 }
 622
 623 ; Convert select pattern to masked shift that ends in 'or'.
 624
 625 define i8 @rotr_select_commute(i8 %x, i8 %shamt) {
 626 ; CHECK-LABEL: @rotr_select_commute(
 627 ; CHECK-NEXT:    [[R:%.*]] = call i8 @llvm.fshr.i8(i8 [[X:%.*]], i8 [[X]], i8 [[SHAMT:%.*]])
 628 ; CHECK-NEXT:    ret i8 [[R]]
 629 ;
 630   %cmp = icmp eq i8 %shamt, 0
 631   %sub = sub i8 8, %shamt
 632   %shr = lshr i8 %x, %shamt
 633   %shl = shl i8 %x, %sub
 634   %or = or i8 %shl, %shr
 635   %r = select i1 %cmp, i8 %x, i8 %or
 636   ret i8 %r
 637 }
 638
 639 ; Convert select pattern to masked shift that ends in 'or'.
 640
 641 define i16 @rotl_select(i16 %x, i16 %shamt) {
 642 ; CHECK-LABEL: @rotl_select(
 643 ; CHECK-NEXT:    [[R:%.*]] = call i16 @llvm.fshl.i16(i16 [[X:%.*]], i16 [[X]], i16 [[SHAMT:%.*]])
 644 ; CHECK-NEXT:    ret i16 [[R]]
 645 ;
 646   %cmp = icmp eq i16 %shamt, 0
 647   %sub = sub i16 16, %shamt
 648   %shr = lshr i16 %x, %sub
 649   %shl = shl i16 %x, %shamt
 650   %or = or i16 %shr, %shl
 651   %r = select i1 %cmp, i16 %x, i16 %or
 652   ret i16 %r
 653 }
 654
 655 ; Convert select pattern to masked shift that ends in 'or'.
 656
 657 define <2 x i64> @rotl_select_commute(<2 x i64> %x, <2 x i64> %shamt) {
 658 ; CHECK-LABEL: @rotl_select_commute(
 659 ; CHECK-NEXT:    [[R:%.*]] = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> [[X:%.*]], <2 x i64> [[X]], <2 x i64> [[SHAMT:%.*]])
 660 ; CHECK-NEXT:    ret <2 x i64> [[R]]
 661 ;
 662   %cmp = icmp eq <2 x i64> %shamt, zeroinitializer
 663   %sub = sub <2 x i64> <i64 64, i64 64>, %shamt
 664   %shr = lshr <2 x i64> %x, %sub
 665   %shl = shl <2 x i64> %x, %shamt
 666   %or = or <2 x i64> %shl, %shr
 667   %r = select <2 x i1> %cmp, <2 x i64> %x, <2 x i64> %or
 668   ret <2 x i64> %r
 669 }
 670
 671 ; Negative test - the transform is only valid with power-of-2 types.
 672
 673 define i24 @rotl_select_weird_type(i24 %x, i24 %shamt) {
 674 ; CHECK-LABEL: @rotl_select_weird_type(
 675 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i24 [[SHAMT:%.*]], 0
 676 ; CHECK-NEXT:    [[SUB:%.*]] = sub i24 24, [[SHAMT]]
 677 ; CHECK-NEXT:    [[SHR:%.*]] = lshr i24 [[X:%.*]], [[SUB]]
 678 ; CHECK-NEXT:    [[SHL:%.*]] = shl i24 [[X]], [[SHAMT]]
 679 ; CHECK-NEXT:    [[OR:%.*]] = or i24 [[SHL]], [[SHR]]
 680 ; CHECK-NEXT:    [[R:%.*]] = select i1 [[CMP]], i24 [[X]], i24 [[OR]]
 681 ; CHECK-NEXT:    ret i24 [[R]]
 682 ;
 683   %cmp = icmp eq i24 %shamt, 0
 684   %sub = sub i24 24, %shamt
 685   %shr = lshr i24 %x, %sub
 686   %shl = shl i24 %x, %shamt
 687   %or = or i24 %shl, %shr
 688   %r = select i1 %cmp, i24 %x, i24 %or
 689   ret i24 %r
 690 }
 691
 692 ; Test that the transform doesn't crash when there's an "or" with a ConstantExpr operand.
 693
 694 @external_global = external global i8
 695
 696 define i32 @rotl_constant_expr(i32 %shamt) {
 697 ; CHECK-LABEL: @rotl_constant_expr(
 698 ; CHECK-NEXT:    [[SHR:%.*]] = lshr i32 ptrtoint (i8* @external_global to i32), [[SHAMT:%.*]]
 699 ; CHECK-NEXT:    [[R:%.*]] = or i32 [[SHR]], shl (i32 ptrtoint (i8* @external_global to i32), i32 11)
 700 ; CHECK-NEXT:    ret i32 [[R]]
 701 ;
 702   %shr = lshr i32 ptrtoint (i8* @external_global to i32), %shamt
 703   %r = or i32 %shr, shl (i32 ptrtoint (i8* @external_global to i32), i32 11)
 704   ret i32 %r
 705 }
 706
 707 ; PR20750 - https://bugs.llvm.org/show_bug.cgi?id=20750
 708 ; This IR corresponds to C source where the shift amount is a smaller type than the rotated value:
 709 ; unsigned int rotate32_doubleand1(unsigned int v, unsigned char r) { r = r & 31; return (v << r) | (v >> (((32 - r)) & 31)); }
 710
 711 define i32 @rotateleft32_doubleand1(i32 %v, i8 %r) {
 712 ; CHECK-LABEL: @rotateleft32_doubleand1(
 713 ; CHECK-NEXT:    [[Z:%.*]] = zext i8 [[R:%.*]] to i32
 714 ; CHECK-NEXT:    [[OR:%.*]] = call i32 @llvm.fshl.i32(i32 [[V:%.*]], i32 [[V]], i32 [[Z]])
 715 ; CHECK-NEXT:    ret i32 [[OR]]
 716 ;
 717   %m = and i8 %r, 31
 718   %z = zext i8 %m to i32
 719   %neg = sub nsw i32 0, %z
 720   %and2 = and i32 %neg, 31
 721   %shl = shl i32 %v, %z
 722   %shr = lshr i32 %v, %and2
 723   %or = or i32 %shr, %shl
 724   ret i32 %or
 725 }
 726
 727 define i32 @rotateright32_doubleand1(i32 %v, i16 %r) {
 728 ; CHECK-LABEL: @rotateright32_doubleand1(
 729 ; CHECK-NEXT:    [[Z:%.*]] = zext i16 [[R:%.*]] to i32
 730 ; CHECK-NEXT:    [[OR:%.*]] = call i32 @llvm.fshr.i32(i32 [[V:%.*]], i32 [[V]], i32 [[Z]])
 731 ; CHECK-NEXT:    ret i32 [[OR]]
 732 ;
 733   %m = and i16 %r, 31
 734   %z = zext i16 %m to i32
 735   %neg = sub nsw i32 0, %z
 736   %and2 = and i32 %neg, 31
 737   %shl = shl i32 %v, %and2
 738   %shr = lshr i32 %v, %z
 739   %or = or i32 %shr, %shl
 740   ret i32 %or
 741 }