llvm/test/Transforms/InstCombine/funnel.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
   2 ; RUN: opt < %s -passes=instcombine -S | FileCheck %s
   3
   4 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
   5
   6 ; Canonicalize or(shl,lshr) by constant to funnel shift intrinsics.
   7 ; This should help cost modeling for vectorization, inlining, etc.
   8 ; If a target does not have a fshl instruction, the expansion will
   9 ; be exactly these same 3 basic ops (shl/lshr/or).
  10
  11 define i32 @fshl_i32_constant(i32 %x, i32 %y) {
  12 ; CHECK-LABEL: @fshl_i32_constant(
  13 ; CHECK-NEXT:    [[R:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[Y:%.*]], i32 11)
  14 ; CHECK-NEXT:    ret i32 [[R]]
  15 ;
  16   %shl = shl i32 %x, 11
  17   %shr = lshr i32 %y, 21
  18   %r = or i32 %shr, %shl
  19   ret i32 %r
  20 }
  21
  22 define i42 @fshr_i42_constant(i42 %x, i42 %y) {
  23 ; CHECK-LABEL: @fshr_i42_constant(
  24 ; CHECK-NEXT:    [[R:%.*]] = call i42 @llvm.fshl.i42(i42 [[Y:%.*]], i42 [[X:%.*]], i42 11)
  25 ; CHECK-NEXT:    ret i42 [[R]]
  26 ;
  27   %shr = lshr i42 %x, 31
  28   %shl = shl i42 %y, 11
  29   %r = or i42 %shr, %shl
  30   ret i42 %r
  31 }
  32
  33 ; Vector types are allowed.
  34
  35 define <2 x i16> @fshl_v2i16_constant_splat(<2 x i16> %x, <2 x i16> %y) {
  36 ; CHECK-LABEL: @fshl_v2i16_constant_splat(
  37 ; CHECK-NEXT:    [[R:%.*]] = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> [[X:%.*]], <2 x i16> [[Y:%.*]], <2 x i16> <i16 1, i16 1>)
  38 ; CHECK-NEXT:    ret <2 x i16> [[R]]
  39 ;
  40   %shl = shl <2 x i16> %x, <i16 1, i16 1>
  41   %shr = lshr <2 x i16> %y, <i16 15, i16 15>
  42   %r = or <2 x i16> %shl, %shr
  43   ret <2 x i16> %r
  44 }
  45
  46 define <2 x i16> @fshl_v2i16_constant_splat_poison0(<2 x i16> %x, <2 x i16> %y) {
  47 ; CHECK-LABEL: @fshl_v2i16_constant_splat_poison0(
  48 ; CHECK-NEXT:    [[R:%.*]] = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> [[X:%.*]], <2 x i16> [[Y:%.*]], <2 x i16> <i16 1, i16 1>)
  49 ; CHECK-NEXT:    ret <2 x i16> [[R]]
  50 ;
  51   %shl = shl <2 x i16> %x, <i16 poison, i16 1>
  52   %shr = lshr <2 x i16> %y, <i16 15, i16 15>
  53   %r = or <2 x i16> %shl, %shr
  54   ret <2 x i16> %r
  55 }
  56
  57 define <2 x i16> @fshl_v2i16_constant_splat_poison1(<2 x i16> %x, <2 x i16> %y) {
  58 ; CHECK-LABEL: @fshl_v2i16_constant_splat_poison1(
  59 ; CHECK-NEXT:    [[R:%.*]] = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> [[X:%.*]], <2 x i16> [[Y:%.*]], <2 x i16> <i16 1, i16 1>)
  60 ; CHECK-NEXT:    ret <2 x i16> [[R]]
  61 ;
  62   %shl = shl <2 x i16> %x, <i16 1, i16 1>
  63   %shr = lshr <2 x i16> %y, <i16 15, i16 poison>
  64   %r = or <2 x i16> %shl, %shr
  65   ret <2 x i16> %r
  66 }
  67
  68 ; Non-power-of-2 vector types are allowed.
  69
  70 define <2 x i17> @fshr_v2i17_constant_splat(<2 x i17> %x, <2 x i17> %y) {
  71 ; CHECK-LABEL: @fshr_v2i17_constant_splat(
  72 ; CHECK-NEXT:    [[R:%.*]] = call <2 x i17> @llvm.fshl.v2i17(<2 x i17> [[Y:%.*]], <2 x i17> [[X:%.*]], <2 x i17> <i17 5, i17 5>)
  73 ; CHECK-NEXT:    ret <2 x i17> [[R]]
  74 ;
  75   %shr = lshr <2 x i17> %x, <i17 12, i17 12>
  76   %shl = shl <2 x i17> %y, <i17 5, i17 5>
  77   %r = or <2 x i17> %shr, %shl
  78   ret <2 x i17> %r
  79 }
  80
  81 define <2 x i17> @fshr_v2i17_constant_splat_poison0(<2 x i17> %x, <2 x i17> %y) {
  82 ; CHECK-LABEL: @fshr_v2i17_constant_splat_poison0(
  83 ; CHECK-NEXT:    [[R:%.*]] = call <2 x i17> @llvm.fshl.v2i17(<2 x i17> [[Y:%.*]], <2 x i17> [[X:%.*]], <2 x i17> <i17 5, i17 5>)
  84 ; CHECK-NEXT:    ret <2 x i17> [[R]]
  85 ;
  86   %shr = lshr <2 x i17> %x, <i17 12, i17 poison>
  87   %shl = shl <2 x i17> %y, <i17 poison, i17 5>
  88   %r = or <2 x i17> %shr, %shl
  89   ret <2 x i17> %r
  90 }
  91
  92 define <2 x i17> @fshr_v2i17_constant_splat_poison1(<2 x i17> %x, <2 x i17> %y) {
  93 ; CHECK-LABEL: @fshr_v2i17_constant_splat_poison1(
  94 ; CHECK-NEXT:    [[R:%.*]] = call <2 x i17> @llvm.fshl.v2i17(<2 x i17> [[Y:%.*]], <2 x i17> [[X:%.*]], <2 x i17> <i17 5, i17 5>)
  95 ; CHECK-NEXT:    ret <2 x i17> [[R]]
  96 ;
  97   %shr = lshr <2 x i17> %x, <i17 12, i17 poison>
  98   %shl = shl <2 x i17> %y, <i17 5, i17 poison>
  99   %r = or <2 x i17> %shr, %shl
 100   ret <2 x i17> %r
 101 }
 102
 103 ; Allow arbitrary shift constants.
 104 ; Support poison elements.
 105
 106 define <2 x i32> @fshr_v2i32_constant_nonsplat(<2 x i32> %x, <2 x i32> %y) {
 107 ; CHECK-LABEL: @fshr_v2i32_constant_nonsplat(
 108 ; CHECK-NEXT:    [[R:%.*]] = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> [[Y:%.*]], <2 x i32> [[X:%.*]], <2 x i32> <i32 15, i32 13>)
 109 ; CHECK-NEXT:    ret <2 x i32> [[R]]
 110 ;
 111   %shr = lshr <2 x i32> %x, <i32 17, i32 19>
 112   %shl = shl <2 x i32> %y, <i32 15, i32 13>
 113   %r = or <2 x i32> %shl, %shr
 114   ret <2 x i32> %r
 115 }
 116
 117 define <2 x i32> @fshr_v2i32_constant_nonsplat_poison0(<2 x i32> %x, <2 x i32> %y) {
 118 ; CHECK-LABEL: @fshr_v2i32_constant_nonsplat_poison0(
 119 ; CHECK-NEXT:    [[R:%.*]] = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> [[Y:%.*]], <2 x i32> [[X:%.*]], <2 x i32> <i32 0, i32 13>)
 120 ; CHECK-NEXT:    ret <2 x i32> [[R]]
 121 ;
 122   %shr = lshr <2 x i32> %x, <i32 poison, i32 19>
 123   %shl = shl <2 x i32> %y, <i32 15, i32 13>
 124   %r = or <2 x i32> %shl, %shr
 125   ret <2 x i32> %r
 126 }
 127
 128 define <2 x i32> @fshr_v2i32_constant_nonsplat_poison1(<2 x i32> %x, <2 x i32> %y) {
 129 ; CHECK-LABEL: @fshr_v2i32_constant_nonsplat_poison1(
 130 ; CHECK-NEXT:    [[R:%.*]] = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> [[Y:%.*]], <2 x i32> [[X:%.*]], <2 x i32> <i32 15, i32 poison>)
 131 ; CHECK-NEXT:    ret <2 x i32> [[R]]
 132 ;
 133   %shr = lshr <2 x i32> %x, <i32 17, i32 19>
 134   %shl = shl <2 x i32> %y, <i32 15, i32 poison>
 135   %r = or <2 x i32> %shl, %shr
 136   ret <2 x i32> %r
 137 }
 138
 139 define <2 x i36> @fshl_v2i36_constant_nonsplat(<2 x i36> %x, <2 x i36> %y) {
 140 ; CHECK-LABEL: @fshl_v2i36_constant_nonsplat(
 141 ; CHECK-NEXT:    [[R:%.*]] = call <2 x i36> @llvm.fshl.v2i36(<2 x i36> [[X:%.*]], <2 x i36> [[Y:%.*]], <2 x i36> <i36 21, i36 11>)
 142 ; CHECK-NEXT:    ret <2 x i36> [[R]]
 143 ;
 144   %shl = shl <2 x i36> %x, <i36 21, i36 11>
 145   %shr = lshr <2 x i36> %y, <i36 15, i36 25>
 146   %r = or <2 x i36> %shl, %shr
 147   ret <2 x i36> %r
 148 }
 149
 150 define <3 x i36> @fshl_v3i36_constant_nonsplat_poison0(<3 x i36> %x, <3 x i36> %y) {
 151 ; CHECK-LABEL: @fshl_v3i36_constant_nonsplat_poison0(
 152 ; CHECK-NEXT:    [[R:%.*]] = call <3 x i36> @llvm.fshl.v3i36(<3 x i36> [[X:%.*]], <3 x i36> [[Y:%.*]], <3 x i36> <i36 21, i36 11, i36 poison>)
 153 ; CHECK-NEXT:    ret <3 x i36> [[R]]
 154 ;
 155   %shl = shl <3 x i36> %x, <i36 21, i36 11, i36 poison>
 156   %shr = lshr <3 x i36> %y, <i36 15, i36 25, i36 poison>
 157   %r = or <3 x i36> %shl, %shr
 158   ret <3 x i36> %r
 159 }
 160
 161 ; Fold or(shl(x,a),lshr(y,bw-a)) -> fshl(x,y,a) iff a < bw
 162
 163 define i64 @fshl_sub_mask(i64 %x, i64 %y, i64 %a) {
 164 ; CHECK-LABEL: @fshl_sub_mask(
 165 ; CHECK-NEXT:    [[R:%.*]] = call i64 @llvm.fshl.i64(i64 [[X:%.*]], i64 [[Y:%.*]], i64 [[A:%.*]])
 166 ; CHECK-NEXT:    ret i64 [[R]]
 167 ;
 168   %mask = and i64 %a, 63
 169   %shl = shl i64 %x, %mask
 170   %sub = sub nuw nsw i64 64, %mask
 171   %shr = lshr i64 %y, %sub
 172   %r = or i64 %shl, %shr
 173   ret i64 %r
 174 }
 175
 176 ; Fold or(lshr(v,a),shl(v,bw-a)) -> fshr(y,x,a) iff a < bw
 177
 178 define i64 @fshr_sub_mask(i64 %x, i64 %y, i64 %a) {
 179 ; CHECK-LABEL: @fshr_sub_mask(
 180 ; CHECK-NEXT:    [[R:%.*]] = call i64 @llvm.fshr.i64(i64 [[Y:%.*]], i64 [[X:%.*]], i64 [[A:%.*]])
 181 ; CHECK-NEXT:    ret i64 [[R]]
 182 ;
 183   %mask = and i64 %a, 63
 184   %shr = lshr i64 %x, %mask
 185   %sub = sub nuw nsw i64 64, %mask
 186   %shl = shl i64 %y, %sub
 187   %r = or i64 %shl, %shr
 188   ret i64 %r
 189 }
 190
 191 define <2 x i64> @fshr_sub_mask_vector(<2 x i64> %x, <2 x i64> %y, <2 x i64> %a) {
 192 ; CHECK-LABEL: @fshr_sub_mask_vector(
 193 ; CHECK-NEXT:    [[R:%.*]] = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> [[Y:%.*]], <2 x i64> [[X:%.*]], <2 x i64> [[A:%.*]])
 194 ; CHECK-NEXT:    ret <2 x i64> [[R]]
 195 ;
 196   %mask = and <2 x i64> %a, <i64 63, i64 63>
 197   %shr = lshr <2 x i64> %x, %mask
 198   %sub = sub nuw nsw <2 x i64> <i64 64, i64 64>, %mask
 199   %shl = shl <2 x i64> %y, %sub
 200   %r = or <2 x i64> %shl, %shr
 201   ret <2 x i64> %r
 202 }
 203
 204 ; PR35155 - these are optionally UB-free funnel shift left/right patterns that are narrowed to a smaller bitwidth.
 205
 206 define i16 @fshl_16bit(i16 %x, i16 %y, i32 %shift) {
 207 ; CHECK-LABEL: @fshl_16bit(
 208 ; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[SHIFT:%.*]] to i16
 209 ; CHECK-NEXT:    [[CONV2:%.*]] = call i16 @llvm.fshl.i16(i16 [[X:%.*]], i16 [[Y:%.*]], i16 [[TMP1]])
 210 ; CHECK-NEXT:    ret i16 [[CONV2]]
 211 ;
 212   %and = and i32 %shift, 15
 213   %convx = zext i16 %x to i32
 214   %shl = shl i32 %convx, %and
 215   %sub = sub i32 16, %and
 216   %convy = zext i16 %y to i32
 217   %shr = lshr i32 %convy, %sub
 218   %or = or i32 %shr, %shl
 219   %conv2 = trunc i32 %or to i16
 220   ret i16 %conv2
 221 }
 222
 223 ; Commute the 'or' operands and try a vector type.
 224
 225 define <2 x i16> @fshl_commute_16bit_vec(<2 x i16> %x, <2 x i16> %y, <2 x i32> %shift) {
 226 ; CHECK-LABEL: @fshl_commute_16bit_vec(
 227 ; CHECK-NEXT:    [[TMP1:%.*]] = trunc <2 x i32> [[SHIFT:%.*]] to <2 x i16>
 228 ; CHECK-NEXT:    [[CONV2:%.*]] = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> [[X:%.*]], <2 x i16> [[Y:%.*]], <2 x i16> [[TMP1]])
 229 ; CHECK-NEXT:    ret <2 x i16> [[CONV2]]
 230 ;
 231   %and = and <2 x i32> %shift, <i32 15, i32 15>
 232   %convx = zext <2 x i16> %x to <2 x i32>
 233   %shl = shl <2 x i32> %convx, %and
 234   %sub = sub <2 x i32> <i32 16, i32 16>, %and
 235   %convy = zext <2 x i16> %y to <2 x i32>
 236   %shr = lshr <2 x i32> %convy, %sub
 237   %or = or <2 x i32> %shl, %shr
 238   %conv2 = trunc <2 x i32> %or to <2 x i16>
 239   ret <2 x i16> %conv2
 240 }
 241
 242 ; Change the size, shift direction (the subtract is on the left-shift), and mask op.
 243
 244 define i8 @fshr_8bit(i8 %x, i8 %y, i3 %shift) {
 245 ; CHECK-LABEL: @fshr_8bit(
 246 ; CHECK-NEXT:    [[TMP1:%.*]] = zext i3 [[SHIFT:%.*]] to i8
 247 ; CHECK-NEXT:    [[CONV2:%.*]] = call i8 @llvm.fshr.i8(i8 [[Y:%.*]], i8 [[X:%.*]], i8 [[TMP1]])
 248 ; CHECK-NEXT:    ret i8 [[CONV2]]
 249 ;
 250   %and = zext i3 %shift to i32
 251   %convx = zext i8 %x to i32
 252   %shr = lshr i32 %convx, %and
 253   %sub = sub i32 8, %and
 254   %convy = zext i8 %y to i32
 255   %shl = shl i32 %convy, %sub
 256   %or = or i32 %shl, %shr
 257   %conv2 = trunc i32 %or to i8
 258   ret i8 %conv2
 259 }
 260
 261 ; The right-shifted value does not need to be a zexted value; here it is masked.
 262 ; The shift mask could be less than the bitwidth, but this is still ok.
 263
 264 define i8 @fshr_commute_8bit(i32 %x, i32 %y, i32 %shift) {
 265 ; CHECK-LABEL: @fshr_commute_8bit(
 266 ; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[SHIFT:%.*]] to i8
 267 ; CHECK-NEXT:    [[TMP2:%.*]] = and i8 [[TMP1]], 3
 268 ; CHECK-NEXT:    [[TMP3:%.*]] = trunc i32 [[Y:%.*]] to i8
 269 ; CHECK-NEXT:    [[TMP4:%.*]] = trunc i32 [[X:%.*]] to i8
 270 ; CHECK-NEXT:    [[CONV2:%.*]] = call i8 @llvm.fshr.i8(i8 [[TMP3]], i8 [[TMP4]], i8 [[TMP2]])
 271 ; CHECK-NEXT:    ret i8 [[CONV2]]
 272 ;
 273   %and = and i32 %shift, 3
 274   %convx = and i32 %x, 255
 275   %shr = lshr i32 %convx, %and
 276   %sub = sub i32 8, %and
 277   %convy = and i32 %y, 255
 278   %shl = shl i32 %convy, %sub
 279   %or = or i32 %shr, %shl
 280   %conv2 = trunc i32 %or to i8
 281   ret i8 %conv2
 282 }
 283
 284 ; The left-shifted value does not need to be masked at all.
 285
 286 define i8 @fshr_commute_8bit_unmasked_shl(i32 %x, i32 %y, i32 %shift) {
 287 ; CHECK-LABEL: @fshr_commute_8bit_unmasked_shl(
 288 ; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[SHIFT:%.*]] to i8
 289 ; CHECK-NEXT:    [[TMP2:%.*]] = and i8 [[TMP1]], 3
 290 ; CHECK-NEXT:    [[TMP3:%.*]] = trunc i32 [[Y:%.*]] to i8
 291 ; CHECK-NEXT:    [[TMP4:%.*]] = trunc i32 [[X:%.*]] to i8
 292 ; CHECK-NEXT:    [[CONV2:%.*]] = call i8 @llvm.fshr.i8(i8 [[TMP3]], i8 [[TMP4]], i8 [[TMP2]])
 293 ; CHECK-NEXT:    ret i8 [[CONV2]]
 294 ;
 295   %and = and i32 %shift, 3
 296   %convx = and i32 %x, 255
 297   %shr = lshr i32 %convx, %and
 298   %sub = sub i32 8, %and
 299   %convy = and i32 %y, 255
 300   %shl = shl i32 %y, %sub
 301   %or = or i32 %shr, %shl
 302   %conv2 = trunc i32 %or to i8
 303   ret i8 %conv2
 304 }
 305
 306 ; Convert select pattern to funnel shift that ends in 'or'.
 307
 308 define i8 @fshr_select(i8 %x, i8 %y, i8 %shamt) {
 309 ; CHECK-LABEL: @fshr_select(
 310 ; CHECK-NEXT:    [[TMP1:%.*]] = freeze i8 [[X:%.*]]
 311 ; CHECK-NEXT:    [[R:%.*]] = call i8 @llvm.fshr.i8(i8 [[TMP1]], i8 [[Y:%.*]], i8 [[SHAMT:%.*]])
 312 ; CHECK-NEXT:    ret i8 [[R]]
 313 ;
 314   %cmp = icmp eq i8 %shamt, 0
 315   %sub = sub i8 8, %shamt
 316   %shr = lshr i8 %y, %shamt
 317   %shl = shl i8 %x, %sub
 318   %or = or i8 %shl, %shr
 319   %r = select i1 %cmp, i8 %y, i8 %or
 320   ret i8 %r
 321 }
 322
 323 ; Convert select pattern to funnel shift that ends in 'or'.
 324
 325 define i16 @fshl_select(i16 %x, i16 %y, i16 %shamt) {
 326 ; CHECK-LABEL: @fshl_select(
 327 ; CHECK-NEXT:    [[TMP1:%.*]] = freeze i16 [[Y:%.*]]
 328 ; CHECK-NEXT:    [[R:%.*]] = call i16 @llvm.fshl.i16(i16 [[X:%.*]], i16 [[TMP1]], i16 [[SHAMT:%.*]])
 329 ; CHECK-NEXT:    ret i16 [[R]]
 330 ;
 331   %cmp = icmp eq i16 %shamt, 0
 332   %sub = sub i16 16, %shamt
 333   %shr = lshr i16 %y, %sub
 334   %shl = shl i16 %x, %shamt
 335   %or = or i16 %shr, %shl
 336   %r = select i1 %cmp, i16 %x, i16 %or
 337   ret i16 %r
 338 }
 339
 340 ; Convert select pattern to funnel shift that ends in 'or'.
 341
 342 define <2 x i64> @fshl_select_vector(<2 x i64> %x, <2 x i64> %y, <2 x i64> %shamt) {
 343 ; CHECK-LABEL: @fshl_select_vector(
 344 ; CHECK-NEXT:    [[TMP1:%.*]] = freeze <2 x i64> [[X:%.*]]
 345 ; CHECK-NEXT:    [[R:%.*]] = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> [[Y:%.*]], <2 x i64> [[TMP1]], <2 x i64> [[SHAMT:%.*]])
 346 ; CHECK-NEXT:    ret <2 x i64> [[R]]
 347 ;
 348   %cmp = icmp eq <2 x i64> %shamt, zeroinitializer
 349   %sub = sub <2 x i64> <i64 64, i64 64>, %shamt
 350   %shr = lshr <2 x i64> %x, %sub
 351   %shl = shl <2 x i64> %y, %shamt
 352   %or = or <2 x i64> %shl, %shr
 353   %r = select <2 x i1> %cmp, <2 x i64> %y, <2 x i64> %or
 354   ret <2 x i64> %r
 355 }
 356
 357 ; Convert 'or concat' to fshl if opposite 'or concat' exists.
 358
 359 define i32 @fshl_concat_i8_i24(i8 %x, i24 %y, ptr %addr) {
 360 ; CHECK-LABEL: @fshl_concat_i8_i24(
 361 ; CHECK-NEXT:    [[ZEXT_X:%.*]] = zext i8 [[X:%.*]] to i32
 362 ; CHECK-NEXT:    [[SLX:%.*]] = shl nuw i32 [[ZEXT_X]], 24
 363 ; CHECK-NEXT:    [[ZEXT_Y:%.*]] = zext i24 [[Y:%.*]] to i32
 364 ; CHECK-NEXT:    [[XY:%.*]] = or disjoint i32 [[SLX]], [[ZEXT_Y]]
 365 ; CHECK-NEXT:    store i32 [[XY]], ptr [[ADDR:%.*]], align 4
 366 ; CHECK-NEXT:    [[YX:%.*]] = call i32 @llvm.fshl.i32(i32 [[XY]], i32 [[XY]], i32 8)
 367 ; CHECK-NEXT:    ret i32 [[YX]]
 368 ;
 369   %zext.x = zext i8 %x to i32
 370   %slx = shl i32 %zext.x, 24
 371   %zext.y = zext i24 %y to i32
 372   %xy = or i32 %zext.y, %slx
 373   store i32 %xy, ptr %addr, align 4
 374   %sly = shl i32 %zext.y, 8
 375   %yx = or i32 %zext.x, %sly
 376   ret i32 %yx
 377 }
 378
 379 define i32 @fshl_concat_i8_i8(i8 %x, i8 %y, ptr %addr) {
 380 ; CHECK-LABEL: @fshl_concat_i8_i8(
 381 ; CHECK-NEXT:    [[ZEXT_X:%.*]] = zext i8 [[X:%.*]] to i32
 382 ; CHECK-NEXT:    [[SLX:%.*]] = shl nuw nsw i32 [[ZEXT_X]], 13
 383 ; CHECK-NEXT:    [[ZEXT_Y:%.*]] = zext i8 [[Y:%.*]] to i32
 384 ; CHECK-NEXT:    [[XY:%.*]] = or disjoint i32 [[SLX]], [[ZEXT_Y]]
 385 ; CHECK-NEXT:    store i32 [[XY]], ptr [[ADDR:%.*]], align 4
 386 ; CHECK-NEXT:    [[YX:%.*]] = call i32 @llvm.fshl.i32(i32 [[XY]], i32 [[XY]], i32 19)
 387 ; CHECK-NEXT:    ret i32 [[YX]]
 388 ;
 389   %zext.x = zext i8 %x to i32
 390   %slx = shl i32 %zext.x, 13
 391   %zext.y = zext i8 %y to i32
 392   %xy = or i32 %zext.y, %slx
 393   store i32 %xy, ptr %addr, align 4
 394   %sly = shl i32 %zext.y, 19
 395   %yx = or i32 %zext.x, %sly
 396   ret i32 %yx
 397 }
 398
 399 define i32 @fshl_concat_i8_i8_overlap(i8 %x, i8 %y, ptr %addr) {
 400 ; CHECK-LABEL: @fshl_concat_i8_i8_overlap(
 401 ; CHECK-NEXT:    [[ZEXT_X:%.*]] = zext i8 [[X:%.*]] to i32
 402 ; CHECK-NEXT:    [[SLX:%.*]] = shl i32 [[ZEXT_X]], 25
 403 ; CHECK-NEXT:    [[ZEXT_Y:%.*]] = zext i8 [[Y:%.*]] to i32
 404 ; CHECK-NEXT:    [[XY:%.*]] = or disjoint i32 [[SLX]], [[ZEXT_Y]]
 405 ; CHECK-NEXT:    store i32 [[XY]], ptr [[ADDR:%.*]], align 4
 406 ; CHECK-NEXT:    [[SLY:%.*]] = shl nuw nsw i32 [[ZEXT_Y]], 7
 407 ; CHECK-NEXT:    [[YX:%.*]] = or i32 [[SLY]], [[ZEXT_X]]
 408 ; CHECK-NEXT:    ret i32 [[YX]]
 409 ;
 410   ; Test sly overlap.
 411   %zext.x = zext i8 %x to i32
 412   %slx = shl i32 %zext.x, 25
 413   %zext.y = zext i8 %y to i32
 414   %xy = or i32 %zext.y, %slx
 415   store i32 %xy, ptr %addr, align 4
 416   %sly = shl i32 %zext.y, 7
 417   %yx = or i32 %zext.x, %sly
 418   ret i32 %yx
 419 }
 420
 421 define i32 @fshl_concat_i8_i8_drop(i8 %x, i8 %y, ptr %addr) {
 422 ; CHECK-LABEL: @fshl_concat_i8_i8_drop(
 423 ; CHECK-NEXT:    [[ZEXT_X:%.*]] = zext i8 [[X:%.*]] to i32
 424 ; CHECK-NEXT:    [[SLX:%.*]] = shl nuw nsw i32 [[ZEXT_X]], 7
 425 ; CHECK-NEXT:    [[ZEXT_Y:%.*]] = zext i8 [[Y:%.*]] to i32
 426 ; CHECK-NEXT:    [[XY:%.*]] = or i32 [[SLX]], [[ZEXT_Y]]
 427 ; CHECK-NEXT:    store i32 [[XY]], ptr [[ADDR:%.*]], align 4
 428 ; CHECK-NEXT:    [[SLY:%.*]] = shl i32 [[ZEXT_Y]], 25
 429 ; CHECK-NEXT:    [[YX:%.*]] = or disjoint i32 [[SLY]], [[ZEXT_X]]
 430 ; CHECK-NEXT:    ret i32 [[YX]]
 431 ;
 432   ; Test sly drop.
 433   %zext.x = zext i8 %x to i32
 434   %slx = shl i32 %zext.x, 7
 435   %zext.y = zext i8 %y to i32
 436   %xy = or i32 %zext.y, %slx
 437   store i32 %xy, ptr %addr, align 4
 438   %sly = shl i32 %zext.y, 25
 439   %yx = or i32 %zext.x, %sly
 440   ret i32 %yx
 441 }
 442
 443 define i32 @fshl_concat_i8_i8_different_slot(i8 %x, i8 %y, ptr %addr) {
 444 ; CHECK-LABEL: @fshl_concat_i8_i8_different_slot(
 445 ; CHECK-NEXT:    [[ZEXT_X:%.*]] = zext i8 [[X:%.*]] to i32
 446 ; CHECK-NEXT:    [[SLX:%.*]] = shl nuw nsw i32 [[ZEXT_X]], 9
 447 ; CHECK-NEXT:    [[ZEXT_Y:%.*]] = zext i8 [[Y:%.*]] to i32
 448 ; CHECK-NEXT:    [[XY:%.*]] = or disjoint i32 [[SLX]], [[ZEXT_Y]]
 449 ; CHECK-NEXT:    store i32 [[XY]], ptr [[ADDR:%.*]], align 4
 450 ; CHECK-NEXT:    [[SLY:%.*]] = shl nuw nsw i32 [[ZEXT_Y]], 22
 451 ; CHECK-NEXT:    [[YX:%.*]] = or disjoint i32 [[SLY]], [[ZEXT_X]]
 452 ; CHECK-NEXT:    ret i32 [[YX]]
 453 ;
 454   %zext.x = zext i8 %x to i32
 455   %slx = shl i32 %zext.x, 9
 456   %zext.y = zext i8 %y to i32
 457   %xy = or i32 %zext.y, %slx
 458   store i32 %xy, ptr %addr, align 4
 459   %sly = shl i32 %zext.y, 22
 460   %yx = or i32 %zext.x, %sly
 461   ret i32 %yx
 462 }
 463
 464 define i32 @fshl_concat_unknown_source(i32 %zext.x, i32 %zext.y, ptr %addr) {
 465 ; CHECK-LABEL: @fshl_concat_unknown_source(
 466 ; CHECK-NEXT:    [[SLX:%.*]] = shl i32 [[ZEXT_X:%.*]], 16
 467 ; CHECK-NEXT:    [[XY:%.*]] = or i32 [[SLX]], [[ZEXT_Y:%.*]]
 468 ; CHECK-NEXT:    store i32 [[XY]], ptr [[ADDR:%.*]], align 4
 469 ; CHECK-NEXT:    [[SLY:%.*]] = shl i32 [[ZEXT_Y]], 16
 470 ; CHECK-NEXT:    [[YX:%.*]] = or i32 [[SLY]], [[ZEXT_X]]
 471 ; CHECK-NEXT:    ret i32 [[YX]]
 472 ;
 473   %slx = shl i32 %zext.x, 16
 474   %xy = or i32 %zext.y, %slx
 475   store i32 %xy, ptr %addr, align 4
 476   %sly = shl i32 %zext.y, 16
 477   %yx = or i32 %zext.x, %sly
 478   ret i32 %yx
 479 }
 480
 481 define <2 x i32> @fshl_concat_vector(<2 x i8> %x, <2 x i24> %y, ptr %addr) {
 482 ; CHECK-LABEL: @fshl_concat_vector(
 483 ; CHECK-NEXT:    [[ZEXT_X:%.*]] = zext <2 x i8> [[X:%.*]] to <2 x i32>
 484 ; CHECK-NEXT:    [[SLX:%.*]] = shl nuw <2 x i32> [[ZEXT_X]], <i32 24, i32 24>
 485 ; CHECK-NEXT:    [[ZEXT_Y:%.*]] = zext <2 x i24> [[Y:%.*]] to <2 x i32>
 486 ; CHECK-NEXT:    [[XY:%.*]] = or disjoint <2 x i32> [[SLX]], [[ZEXT_Y]]
 487 ; CHECK-NEXT:    store <2 x i32> [[XY]], ptr [[ADDR:%.*]], align 4
 488 ; CHECK-NEXT:    [[YX:%.*]] = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> [[XY]], <2 x i32> [[XY]], <2 x i32> <i32 8, i32 8>)
 489 ; CHECK-NEXT:    ret <2 x i32> [[YX]]
 490 ;
 491   %zext.x = zext <2 x i8> %x to <2 x i32>
 492   %slx = shl <2 x i32> %zext.x, <i32 24, i32 24>
 493   %zext.y = zext <2 x i24> %y to <2 x i32>
 494   %xy = or <2 x i32> %slx, %zext.y
 495   store <2 x i32> %xy, ptr %addr, align 4
 496   %sly = shl <2 x i32> %zext.y, <i32 8, i32 8>
 497   %yx = or <2 x i32> %sly, %zext.x
 498   ret <2 x i32> %yx
 499 }
 500
 501 ; Negative test - an oversized shift in the narrow type would produce the wrong value.
 502
 503 define i8 @unmasked_shlop_unmasked_shift_amount(i32 %x, i32 %y, i32 %shamt) {
 504 ; CHECK-LABEL: @unmasked_shlop_unmasked_shift_amount(
 505 ; CHECK-NEXT:    [[MASKY:%.*]] = and i32 [[Y:%.*]], 255
 506 ; CHECK-NEXT:    [[T4:%.*]] = sub i32 8, [[SHAMT:%.*]]
 507 ; CHECK-NEXT:    [[T5:%.*]] = shl i32 [[X:%.*]], [[T4]]
 508 ; CHECK-NEXT:    [[T6:%.*]] = lshr i32 [[MASKY]], [[SHAMT]]
 509 ; CHECK-NEXT:    [[T7:%.*]] = or i32 [[T5]], [[T6]]
 510 ; CHECK-NEXT:    [[T8:%.*]] = trunc i32 [[T7]] to i8
 511 ; CHECK-NEXT:    ret i8 [[T8]]
 512 ;
 513   %masky = and i32 %y, 255
 514   %t4 = sub i32 8, %shamt
 515   %t5 = shl i32 %x, %t4
 516   %t6 = lshr i32 %masky, %shamt
 517   %t7 = or i32 %t5, %t6
 518   %t8 = trunc i32 %t7 to i8
 519   ret i8 %t8
 520 }
 521
 522 ; Negative test - an oversized shift in the narrow type would produce the wrong value.
 523
 524 define i8 @unmasked_shlop_insufficient_mask_shift_amount(i16 %x, i16 %y, i16 %shamt) {
 525 ; CHECK-LABEL: @unmasked_shlop_insufficient_mask_shift_amount(
 526 ; CHECK-NEXT:    [[SHM:%.*]] = and i16 [[SHAMT:%.*]], 15
 527 ; CHECK-NEXT:    [[MASKX:%.*]] = and i16 [[X:%.*]], 255
 528 ; CHECK-NEXT:    [[T4:%.*]] = sub nsw i16 8, [[SHM]]
 529 ; CHECK-NEXT:    [[T5:%.*]] = shl i16 [[Y:%.*]], [[T4]]
 530 ; CHECK-NEXT:    [[T6:%.*]] = lshr i16 [[MASKX]], [[SHM]]
 531 ; CHECK-NEXT:    [[T7:%.*]] = or i16 [[T5]], [[T6]]
 532 ; CHECK-NEXT:    [[T8:%.*]] = trunc i16 [[T7]] to i8
 533 ; CHECK-NEXT:    ret i8 [[T8]]
 534 ;
 535   %shm = and i16 %shamt, 15
 536   %maskx = and i16 %x, 255
 537   %t4 = sub i16 8, %shm
 538   %t5 = shl i16 %y, %t4
 539   %t6 = lshr i16 %maskx, %shm
 540   %t7 = or i16 %t5, %t6
 541   %t8 = trunc i16 %t7 to i8
 542   ret i8 %t8
 543 }
 544
 545 define i8 @unmasked_shlop_masked_shift_amount(i16 %x, i16 %y, i16 %shamt) {
 546 ; CHECK-LABEL: @unmasked_shlop_masked_shift_amount(
 547 ; CHECK-NEXT:    [[TMP1:%.*]] = trunc i16 [[SHAMT:%.*]] to i8
 548 ; CHECK-NEXT:    [[TMP2:%.*]] = trunc i16 [[Y:%.*]] to i8
 549 ; CHECK-NEXT:    [[TMP3:%.*]] = trunc i16 [[X:%.*]] to i8
 550 ; CHECK-NEXT:    [[T8:%.*]] = call i8 @llvm.fshr.i8(i8 [[TMP2]], i8 [[TMP3]], i8 [[TMP1]])
 551 ; CHECK-NEXT:    ret i8 [[T8]]
 552 ;
 553   %shm = and i16 %shamt, 7
 554   %maskx = and i16 %x, 255
 555   %t4 = sub i16 8, %shm
 556   %t5 = shl i16 %y, %t4
 557   %t6 = lshr i16 %maskx, %shm
 558   %t7 = or i16 %t5, %t6
 559   %t8 = trunc i16 %t7 to i8
 560   ret i8 %t8
 561 }
 562
 563 define i32 @test_rotl_and_neg(i32 %x, i32 %shamt) {
 564 ; CHECK-LABEL: @test_rotl_and_neg(
 565 ; CHECK-NEXT:    [[OR:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[X]], i32 [[SHAMT:%.*]])
 566 ; CHECK-NEXT:    ret i32 [[OR]]
 567 ;
 568   %shl = shl i32 %x, %shamt
 569   %neg = sub i32 0, %shamt
 570   %and = and i32 %neg, 31
 571   %shr = lshr i32 %x, %and
 572   %or = or i32 %shl, %shr
 573   ret i32 %or
 574 }
 575
 576 define i32 @test_rotl_and_neg_commuted(i32 %x, i32 %shamt) {
 577 ; CHECK-LABEL: @test_rotl_and_neg_commuted(
 578 ; CHECK-NEXT:    [[OR:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[X]], i32 [[SHAMT:%.*]])
 579 ; CHECK-NEXT:    ret i32 [[OR]]
 580 ;
 581   %shl = shl i32 %x, %shamt
 582   %neg = sub i32 0, %shamt
 583   %and = and i32 %neg, 31
 584   %shr = lshr i32 %x, %and
 585   %or = or i32 %shr, %shl
 586   ret i32 %or
 587 }
 588
 589 define i32 @test_rotr_and_neg(i32 %x, i32 %shamt) {
 590 ; CHECK-LABEL: @test_rotr_and_neg(
 591 ; CHECK-NEXT:    [[OR:%.*]] = call i32 @llvm.fshr.i32(i32 [[X:%.*]], i32 [[X]], i32 [[SHAMT:%.*]])
 592 ; CHECK-NEXT:    ret i32 [[OR]]
 593 ;
 594   %shr = lshr i32 %x, %shamt
 595   %neg = sub i32 0, %shamt
 596   %and = and i32 %neg, 31
 597   %shl = shl i32 %x, %and
 598   %or = or i32 %shl, %shr
 599   ret i32 %or
 600 }
 601
 602 ; Negative tests
 603
 604 ; Only work for rotation patterns
 605 define i32 @test_fshl_and_neg(i32 %x, i32 %y, i32 %shamt) {
 606 ; CHECK-LABEL: @test_fshl_and_neg(
 607 ; CHECK-NEXT:    [[SHL:%.*]] = shl i32 [[X:%.*]], [[SHAMT:%.*]]
 608 ; CHECK-NEXT:    [[NEG:%.*]] = sub i32 0, [[SHAMT]]
 609 ; CHECK-NEXT:    [[AND:%.*]] = and i32 [[NEG]], 31
 610 ; CHECK-NEXT:    [[SHR:%.*]] = lshr i32 [[Y:%.*]], [[AND]]
 611 ; CHECK-NEXT:    [[OR:%.*]] = or i32 [[SHL]], [[SHR]]
 612 ; CHECK-NEXT:    ret i32 [[OR]]
 613 ;
 614   %shl = shl i32 %x, %shamt
 615   %neg = sub i32 0, %shamt
 616   %and = and i32 %neg, 31
 617   %shr = lshr i32 %y, %and
 618   %or = or i32 %shl, %shr
 619   ret i32 %or
 620 }
 621
 622 define i32 @test_rotl_and_neg_wrong_mask(i32 %x, i32 %shamt) {
 623 ; CHECK-LABEL: @test_rotl_and_neg_wrong_mask(
 624 ; CHECK-NEXT:    [[SHL:%.*]] = shl i32 [[X:%.*]], [[SHAMT:%.*]]
 625 ; CHECK-NEXT:    [[NEG:%.*]] = sub i32 0, [[SHAMT]]
 626 ; CHECK-NEXT:    [[AND:%.*]] = and i32 [[NEG]], 15
 627 ; CHECK-NEXT:    [[SHR:%.*]] = lshr i32 [[X]], [[AND]]
 628 ; CHECK-NEXT:    [[OR:%.*]] = or i32 [[SHL]], [[SHR]]
 629 ; CHECK-NEXT:    ret i32 [[OR]]
 630 ;
 631   %shl = shl i32 %x, %shamt
 632   %neg = sub i32 0, %shamt
 633   %and = and i32 %neg, 15
 634   %shr = lshr i32 %x, %and
 635   %or = or i32 %shl, %shr
 636   ret i32 %or
 637 }