llvm/test/Transforms/InstCombine/vector-casts.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
   2 ; RUN: opt < %s -instcombine -S | FileCheck %s
   3
   4 ; Can't get smaller than this.
   5
   6 define <2 x i1> @trunc(<2 x i64> %a) {
   7 ; CHECK-LABEL: @trunc(
   8 ; CHECK-NEXT:    [[T:%.*]] = trunc <2 x i64> [[A:%.*]] to <2 x i1>
   9 ; CHECK-NEXT:    ret <2 x i1> [[T]]
  10 ;
  11   %t = trunc <2 x i64> %a to <2 x i1>
  12   ret <2 x i1> %t
  13 }
  14
  15 ; This is trunc.
  16
  17 define <2 x i1> @and_cmp_is_trunc(<2 x i64> %a) {
  18 ; CHECK-LABEL: @and_cmp_is_trunc(
  19 ; CHECK-NEXT:    [[R:%.*]] = trunc <2 x i64> [[A:%.*]] to <2 x i1>
  20 ; CHECK-NEXT:    ret <2 x i1> [[R]]
  21 ;
  22   %t = and <2 x i64> %a, <i64 1, i64 1>
  23   %r = icmp ne <2 x i64> %t, zeroinitializer
  24   ret <2 x i1> %r
  25 }
  26
  27 ; This is trunc.
  28
  29 define <2 x i1> @and_cmp_is_trunc_even_with_undef_elt(<2 x i64> %a) {
  30 ; CHECK-LABEL: @and_cmp_is_trunc_even_with_undef_elt(
  31 ; CHECK-NEXT:    [[R:%.*]] = trunc <2 x i64> [[A:%.*]] to <2 x i1>
  32 ; CHECK-NEXT:    ret <2 x i1> [[R]]
  33 ;
  34   %t = and <2 x i64> %a, <i64 undef, i64 1>
  35   %r = icmp ne <2 x i64> %t, zeroinitializer
  36   ret <2 x i1> %r
  37 }
  38
  39 ; TODO: This could be just 1 instruction (trunc), but our undef matching is incomplete.
  40
  41 define <2 x i1> @and_cmp_is_trunc_even_with_undef_elts(<2 x i64> %a) {
  42 ; CHECK-LABEL: @and_cmp_is_trunc_even_with_undef_elts(
  43 ; CHECK-NEXT:    [[T:%.*]] = and <2 x i64> [[A:%.*]], <i64 undef, i64 1>
  44 ; CHECK-NEXT:    [[R:%.*]] = icmp ne <2 x i64> [[T]], <i64 undef, i64 0>
  45 ; CHECK-NEXT:    ret <2 x i1> [[R]]
  46 ;
  47   %t = and <2 x i64> %a, <i64 undef, i64 1>
  48   %r = icmp ne <2 x i64> %t, <i64 undef, i64 0>
  49   ret <2 x i1> %r
  50 }
  51
  52 ; The ashr turns into an lshr.
  53 define <2 x i64> @test2(<2 x i64> %a) {
  54 ; CHECK-LABEL: @test2(
  55 ; CHECK-NEXT:    [[B:%.*]] = lshr <2 x i64> [[A:%.*]], <i64 1, i64 1>
  56 ; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i64> [[B]], <i64 32767, i64 32767>
  57 ; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
  58 ;
  59   %b = and <2 x i64> %a, <i64 65535, i64 65535>
  60   %t = ashr <2 x i64> %b, <i64 1, i64 1>
  61   ret <2 x i64> %t
  62 }
  63
  64 define <2 x i64> @test3(<4 x float> %a, <4 x float> %b) {
  65 ; CHECK-LABEL: @test3(
  66 ; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ord <4 x float> [[A:%.*]], [[B:%.*]]
  67 ; CHECK-NEXT:    [[AND:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32>
  68 ; CHECK-NEXT:    [[CONV:%.*]] = bitcast <4 x i32> [[AND]] to <2 x i64>
  69 ; CHECK-NEXT:    ret <2 x i64> [[CONV]]
  70 ;
  71   %cmp = fcmp ord <4 x float> %a, zeroinitializer
  72   %sext = sext <4 x i1> %cmp to <4 x i32>
  73   %cmp4 = fcmp ord <4 x float> %b, zeroinitializer
  74   %sext5 = sext <4 x i1> %cmp4 to <4 x i32>
  75   %and = and <4 x i32> %sext, %sext5
  76   %conv = bitcast <4 x i32> %and to <2 x i64>
  77   ret <2 x i64> %conv
  78 }
  79
  80 define <2 x i64> @test4(<4 x float> %a, <4 x float> %b) {
  81 ; CHECK-LABEL: @test4(
  82 ; CHECK-NEXT:    [[TMP1:%.*]] = fcmp uno <4 x float> [[A:%.*]], [[B:%.*]]
  83 ; CHECK-NEXT:    [[OR:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32>
  84 ; CHECK-NEXT:    [[CONV:%.*]] = bitcast <4 x i32> [[OR]] to <2 x i64>
  85 ; CHECK-NEXT:    ret <2 x i64> [[CONV]]
  86 ;
  87   %cmp = fcmp uno <4 x float> %a, zeroinitializer
  88   %sext = sext <4 x i1> %cmp to <4 x i32>
  89   %cmp4 = fcmp uno <4 x float> %b, zeroinitializer
  90   %sext5 = sext <4 x i1> %cmp4 to <4 x i32>
  91   %or = or <4 x i32> %sext, %sext5
  92   %conv = bitcast <4 x i32> %or to <2 x i64>
  93   ret <2 x i64> %conv
  94 }
  95
  96 ; rdar://7434900
  97 define <2 x i64> @test5(<4 x float> %a, <4 x float> %b) {
  98 ; CHECK-LABEL: @test5(
  99 ; CHECK-NEXT:    [[CMP:%.*]] = fcmp ult <4 x float> [[A:%.*]], zeroinitializer
 100 ; CHECK-NEXT:    [[CMP4:%.*]] = fcmp ult <4 x float> [[B:%.*]], zeroinitializer
 101 ; CHECK-NEXT:    [[AND1:%.*]] = and <4 x i1> [[CMP]], [[CMP4]]
 102 ; CHECK-NEXT:    [[AND:%.*]] = sext <4 x i1> [[AND1]] to <4 x i32>
 103 ; CHECK-NEXT:    [[CONV:%.*]] = bitcast <4 x i32> [[AND]] to <2 x i64>
 104 ; CHECK-NEXT:    ret <2 x i64> [[CONV]]
 105 ;
 106   %cmp = fcmp ult <4 x float> %a, zeroinitializer
 107   %sext = sext <4 x i1> %cmp to <4 x i32>
 108   %cmp4 = fcmp ult <4 x float> %b, zeroinitializer
 109   %sext5 = sext <4 x i1> %cmp4 to <4 x i32>
 110   %and = and <4 x i32> %sext, %sext5
 111   %conv = bitcast <4 x i32> %and to <2 x i64>
 112   ret <2 x i64> %conv
 113 }
 114
 115 define <2 x i64> @test6(<4 x float> %a, <4 x float> %b) {
 116 ; CHECK-LABEL: @test6(
 117 ; CHECK-NEXT:    [[CMP:%.*]] = fcmp ult <4 x float> [[A:%.*]], zeroinitializer
 118 ; CHECK-NEXT:    [[CMP4:%.*]] = fcmp ult <4 x float> [[B:%.*]], zeroinitializer
 119 ; CHECK-NEXT:    [[AND1:%.*]] = or <4 x i1> [[CMP]], [[CMP4]]
 120 ; CHECK-NEXT:    [[AND:%.*]] = sext <4 x i1> [[AND1]] to <4 x i32>
 121 ; CHECK-NEXT:    [[CONV:%.*]] = bitcast <4 x i32> [[AND]] to <2 x i64>
 122 ; CHECK-NEXT:    ret <2 x i64> [[CONV]]
 123 ;
 124   %cmp = fcmp ult <4 x float> %a, zeroinitializer
 125   %sext = sext <4 x i1> %cmp to <4 x i32>
 126   %cmp4 = fcmp ult <4 x float> %b, zeroinitializer
 127   %sext5 = sext <4 x i1> %cmp4 to <4 x i32>
 128   %and = or <4 x i32> %sext, %sext5
 129   %conv = bitcast <4 x i32> %and to <2 x i64>
 130   ret <2 x i64> %conv
 131 }
 132
 133 define <2 x i64> @test7(<4 x float> %a, <4 x float> %b) {
 134 ; CHECK-LABEL: @test7(
 135 ; CHECK-NEXT:    [[CMP:%.*]] = fcmp ult <4 x float> [[A:%.*]], zeroinitializer
 136 ; CHECK-NEXT:    [[CMP4:%.*]] = fcmp ult <4 x float> [[B:%.*]], zeroinitializer
 137 ; CHECK-NEXT:    [[AND1:%.*]] = xor <4 x i1> [[CMP]], [[CMP4]]
 138 ; CHECK-NEXT:    [[AND:%.*]] = sext <4 x i1> [[AND1]] to <4 x i32>
 139 ; CHECK-NEXT:    [[CONV:%.*]] = bitcast <4 x i32> [[AND]] to <2 x i64>
 140 ; CHECK-NEXT:    ret <2 x i64> [[CONV]]
 141 ;
 142   %cmp = fcmp ult <4 x float> %a, zeroinitializer
 143   %sext = sext <4 x i1> %cmp to <4 x i32>
 144   %cmp4 = fcmp ult <4 x float> %b, zeroinitializer
 145   %sext5 = sext <4 x i1> %cmp4 to <4 x i32>
 146   %and = xor <4 x i32> %sext, %sext5
 147   %conv = bitcast <4 x i32> %and to <2 x i64>
 148   ret <2 x i64> %conv
 149 }
 150
 151 define void @convert(<2 x i32>* %dst.addr, <2 x i64> %src) {
 152 ; CHECK-LABEL: @convert(
 153 ; CHECK-NEXT:    [[VAL:%.*]] = trunc <2 x i64> [[SRC:%.*]] to <2 x i32>
 154 ; CHECK-NEXT:    [[ADD:%.*]] = add <2 x i32> [[VAL]], <i32 1, i32 1>
 155 ; CHECK-NEXT:    store <2 x i32> [[ADD]], <2 x i32>* [[DST_ADDR:%.*]], align 8
 156 ; CHECK-NEXT:    ret void
 157 ;
 158   %val = trunc <2 x i64> %src to <2 x i32>
 159   %add = add <2 x i32> %val, <i32 1, i32 1>
 160   store <2 x i32> %add, <2 x i32>* %dst.addr
 161   ret void
 162 }
 163
 164 define <2 x i65> @foo(<2 x i64> %t) {
 165 ; CHECK-LABEL: @foo(
 166 ; CHECK-NEXT:    [[A_MASK:%.*]] = and <2 x i64> [[T:%.*]], <i64 4294967295, i64 4294967295>
 167 ; CHECK-NEXT:    [[B:%.*]] = zext <2 x i64> [[A_MASK]] to <2 x i65>
 168 ; CHECK-NEXT:    ret <2 x i65> [[B]]
 169 ;
 170   %a = trunc <2 x i64> %t to <2 x i32>
 171   %b = zext <2 x i32> %a to <2 x i65>
 172   ret <2 x i65> %b
 173 }
 174
 175 define <2 x i64> @bar(<2 x i65> %t) {
 176 ; CHECK-LABEL: @bar(
 177 ; CHECK-NEXT:    [[TMP1:%.*]] = trunc <2 x i65> [[T:%.*]] to <2 x i64>
 178 ; CHECK-NEXT:    [[B:%.*]] = and <2 x i64> [[TMP1]], <i64 4294967295, i64 4294967295>
 179 ; CHECK-NEXT:    ret <2 x i64> [[B]]
 180 ;
 181   %a = trunc <2 x i65> %t to <2 x i32>
 182   %b = zext <2 x i32> %a to <2 x i64>
 183   ret <2 x i64> %b
 184 }
 185
 186 define <2 x i64> @bars(<2 x i65> %t) {
 187 ; CHECK-LABEL: @bars(
 188 ; CHECK-NEXT:    [[A:%.*]] = trunc <2 x i65> [[T:%.*]] to <2 x i32>
 189 ; CHECK-NEXT:    [[B:%.*]] = sext <2 x i32> [[A]] to <2 x i64>
 190 ; CHECK-NEXT:    ret <2 x i64> [[B]]
 191 ;
 192   %a = trunc <2 x i65> %t to <2 x i32>
 193   %b = sext <2 x i32> %a to <2 x i64>
 194   ret <2 x i64> %b
 195 }
 196
 197 define <2 x i64> @quxs(<2 x i64> %t) {
 198 ; CHECK-LABEL: @quxs(
 199 ; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i64> [[T:%.*]], <i64 32, i64 32>
 200 ; CHECK-NEXT:    [[B:%.*]] = ashr exact <2 x i64> [[TMP1]], <i64 32, i64 32>
 201 ; CHECK-NEXT:    ret <2 x i64> [[B]]
 202 ;
 203   %a = trunc <2 x i64> %t to <2 x i32>
 204   %b = sext <2 x i32> %a to <2 x i64>
 205   ret <2 x i64> %b
 206 }
 207
 208 define <2 x i64> @quxt(<2 x i64> %t) {
 209 ; CHECK-LABEL: @quxt(
 210 ; CHECK-NEXT:    [[A:%.*]] = shl <2 x i64> [[T:%.*]], <i64 32, i64 32>
 211 ; CHECK-NEXT:    [[B:%.*]] = ashr exact <2 x i64> [[A]], <i64 32, i64 32>
 212 ; CHECK-NEXT:    ret <2 x i64> [[B]]
 213 ;
 214   %a = shl <2 x i64> %t, <i64 32, i64 32>
 215   %b = ashr <2 x i64> %a, <i64 32, i64 32>
 216   ret <2 x i64> %b
 217 }
 218
 219 define <2 x double> @fa(<2 x double> %t) {
 220 ; CHECK-LABEL: @fa(
 221 ; CHECK-NEXT:    [[A:%.*]] = fptrunc <2 x double> [[T:%.*]] to <2 x float>
 222 ; CHECK-NEXT:    [[B:%.*]] = fpext <2 x float> [[A]] to <2 x double>
 223 ; CHECK-NEXT:    ret <2 x double> [[B]]
 224 ;
 225   %a = fptrunc <2 x double> %t to <2 x float>
 226   %b = fpext <2 x float> %a to <2 x double>
 227   ret <2 x double> %b
 228 }
 229
 230 define <2 x double> @fb(<2 x double> %t) {
 231 ; CHECK-LABEL: @fb(
 232 ; CHECK-NEXT:    [[A:%.*]] = fptoui <2 x double> [[T:%.*]] to <2 x i64>
 233 ; CHECK-NEXT:    [[B:%.*]] = uitofp <2 x i64> [[A]] to <2 x double>
 234 ; CHECK-NEXT:    ret <2 x double> [[B]]
 235 ;
 236   %a = fptoui <2 x double> %t to <2 x i64>
 237   %b = uitofp <2 x i64> %a to <2 x double>
 238   ret <2 x double> %b
 239 }
 240
 241 define <2 x double> @fc(<2 x double> %t) {
 242 ; CHECK-LABEL: @fc(
 243 ; CHECK-NEXT:    [[A:%.*]] = fptosi <2 x double> [[T:%.*]] to <2 x i64>
 244 ; CHECK-NEXT:    [[B:%.*]] = sitofp <2 x i64> [[A]] to <2 x double>
 245 ; CHECK-NEXT:    ret <2 x double> [[B]]
 246 ;
 247   %a = fptosi <2 x double> %t to <2 x i64>
 248   %b = sitofp <2 x i64> %a to <2 x double>
 249   ret <2 x double> %b
 250 }
 251
 252 ; PR9228
 253 define <4 x float> @f(i32 %a) {
 254 ; CHECK-LABEL: @f(
 255 ; CHECK-NEXT:    ret <4 x float> undef
 256 ;
 257   %dim = insertelement <4 x i32> undef, i32 %a, i32 0
 258   %dim30 = insertelement <4 x i32> %dim, i32 %a, i32 1
 259   %dim31 = insertelement <4 x i32> %dim30, i32 %a, i32 2
 260   %dim32 = insertelement <4 x i32> %dim31, i32 %a, i32 3
 261
 262   %offset_ptr = getelementptr <4 x float>, <4 x float>* null, i32 1
 263   %offset_int = ptrtoint <4 x float>* %offset_ptr to i64
 264   %sizeof32 = trunc i64 %offset_int to i32
 265
 266   %smearinsert33 = insertelement <4 x i32> undef, i32 %sizeof32, i32 0
 267   %smearinsert34 = insertelement <4 x i32> %smearinsert33, i32 %sizeof32, i32 1
 268   %smearinsert35 = insertelement <4 x i32> %smearinsert34, i32 %sizeof32, i32 2
 269   %smearinsert36 = insertelement <4 x i32> %smearinsert35, i32 %sizeof32, i32 3
 270
 271   %delta_scale = mul <4 x i32> %dim32, %smearinsert36
 272   %offset_delta = add <4 x i32> zeroinitializer, %delta_scale
 273
 274   %offset_varying_delta = add <4 x i32> %offset_delta, undef
 275
 276   ret <4 x float> undef
 277 }
 278
 279 define <8 x i32> @pr24458(<8 x float> %n) {
 280 ; CHECK-LABEL: @pr24458(
 281 ; CHECK-NEXT:    ret <8 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
 282 ;
 283   %notequal_b_load_.i = fcmp une <8 x float> %n, zeroinitializer
 284   %equal_a_load72_.i = fcmp ueq <8 x float> %n, zeroinitializer
 285   %notequal_b_load__to_boolvec.i = sext <8 x i1> %notequal_b_load_.i to <8 x i32>
 286   %equal_a_load72__to_boolvec.i = sext <8 x i1> %equal_a_load72_.i to <8 x i32>
 287   %wrong = or <8 x i32> %notequal_b_load__to_boolvec.i, %equal_a_load72__to_boolvec.i
 288   ret <8 x i32> %wrong
 289 }
 290
 291 ; Hoist a trunc to a scalar if we're inserting into an undef vector.
 292 ; trunc (inselt undef, X, Index) --> inselt undef, (trunc X), Index
 293
 294 define <3 x i16> @trunc_inselt_undef(i32 %x) {
 295 ; CHECK-LABEL: @trunc_inselt_undef(
 296 ; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[X:%.*]] to i16
 297 ; CHECK-NEXT:    [[TRUNC:%.*]] = insertelement <3 x i16> undef, i16 [[TMP1]], i32 1
 298 ; CHECK-NEXT:    ret <3 x i16> [[TRUNC]]
 299 ;
 300   %vec = insertelement <3 x i32> undef, i32 %x, i32 1
 301   %trunc = trunc <3 x i32> %vec to <3 x i16>
 302   ret <3 x i16> %trunc
 303 }
 304
 305 ; Hoist a trunc to a scalar if we're inserting into an undef vector.
 306 ; trunc (inselt undef, X, Index) --> inselt undef, (trunc X), Index
 307
 308 define <2 x float> @fptrunc_inselt_undef(double %x, i32 %index) {
 309 ; CHECK-LABEL: @fptrunc_inselt_undef(
 310 ; CHECK-NEXT:    [[TMP1:%.*]] = fptrunc double [[X:%.*]] to float
 311 ; CHECK-NEXT:    [[TRUNC:%.*]] = insertelement <2 x float> undef, float [[TMP1]], i32 [[INDEX:%.*]]
 312 ; CHECK-NEXT:    ret <2 x float> [[TRUNC]]
 313 ;
 314   %vec = insertelement <2 x double> <double undef, double undef>, double %x, i32 %index
 315   %trunc = fptrunc <2 x double> %vec to <2 x float>
 316   ret <2 x float> %trunc
 317 }
 318
 319 ; TODO: Strengthen the backend, so we can have this canonicalization.
 320 ; Insert a scalar int into a constant vector and truncate:
 321 ; trunc (inselt C, X, Index) --> inselt C, (trunc X), Index
 322
 323 define <3 x i16> @trunc_inselt1(i32 %x) {
 324 ; CHECK-LABEL: @trunc_inselt1(
 325 ; CHECK-NEXT:    [[VEC:%.*]] = insertelement <3 x i32> <i32 3, i32 poison, i32 65536>, i32 [[X:%.*]], i32 1
 326 ; CHECK-NEXT:    [[TRUNC:%.*]] = trunc <3 x i32> [[VEC]] to <3 x i16>
 327 ; CHECK-NEXT:    ret <3 x i16> [[TRUNC]]
 328 ;
 329   %vec = insertelement <3 x i32> <i32 3, i32 -2, i32 65536>, i32 %x, i32 1
 330   %trunc = trunc <3 x i32> %vec to <3 x i16>
 331   ret <3 x i16> %trunc
 332 }
 333
 334 ; TODO: Strengthen the backend, so we can have this canonicalization.
 335 ; Insert a scalar FP into a constant vector and FP truncate:
 336 ; fptrunc (inselt C, X, Index) --> inselt C, (fptrunc X), Index
 337
 338 define <2 x float> @fptrunc_inselt1(double %x, i32 %index) {
 339 ; CHECK-LABEL: @fptrunc_inselt1(
 340 ; CHECK-NEXT:    [[VEC:%.*]] = insertelement <2 x double> <double undef, double 3.000000e+00>, double [[X:%.*]], i32 [[INDEX:%.*]]
 341 ; CHECK-NEXT:    [[TRUNC:%.*]] = fptrunc <2 x double> [[VEC]] to <2 x float>
 342 ; CHECK-NEXT:    ret <2 x float> [[TRUNC]]
 343 ;
 344   %vec = insertelement <2 x double> <double undef, double 3.0>, double %x, i32 %index
 345   %trunc = fptrunc <2 x double> %vec to <2 x float>
 346   ret <2 x float> %trunc
 347 }
 348
 349 ; TODO: Strengthen the backend, so we can have this canonicalization.
 350 ; Insert a scalar int constant into a vector and truncate:
 351 ; trunc (inselt X, C, Index) --> inselt (trunc X), C', Index
 352
 353 define <8 x i16> @trunc_inselt2(<8 x i32> %x, i32 %index) {
 354 ; CHECK-LABEL: @trunc_inselt2(
 355 ; CHECK-NEXT:    [[VEC:%.*]] = insertelement <8 x i32> [[X:%.*]], i32 1048576, i32 [[INDEX:%.*]]
 356 ; CHECK-NEXT:    [[TRUNC:%.*]] = trunc <8 x i32> [[VEC]] to <8 x i16>
 357 ; CHECK-NEXT:    ret <8 x i16> [[TRUNC]]
 358 ;
 359   %vec = insertelement <8 x i32> %x, i32 1048576, i32 %index
 360   %trunc = trunc <8 x i32> %vec to <8 x i16>
 361   ret <8 x i16> %trunc
 362 }
 363
 364 ; TODO: Strengthen the backend, so we can have this canonicalization.
 365 ; Insert a scalar FP constant into a vector and FP truncate:
 366 ; fptrunc (inselt X, C, Index) --> inselt (fptrunc X), C', Index
 367
 368 define <3 x float> @fptrunc_inselt2(<3 x double> %x) {
 369 ; CHECK-LABEL: @fptrunc_inselt2(
 370 ; CHECK-NEXT:    [[VEC:%.*]] = insertelement <3 x double> [[X:%.*]], double 4.000000e+00, i32 2
 371 ; CHECK-NEXT:    [[TRUNC:%.*]] = fptrunc <3 x double> [[VEC]] to <3 x float>
 372 ; CHECK-NEXT:    ret <3 x float> [[TRUNC]]
 373 ;
 374   %vec = insertelement <3 x double> %x, double 4.0, i32 2
 375   %trunc = fptrunc <3 x double> %vec to <3 x float>
 376   ret <3 x float> %trunc
 377 }
 378
 379 ; Converting to a wide type might reduce instruction count,
 380 ; but we can not do that unless the backend can recover from
 381 ; the creation of a potentially illegal op (like a 64-bit vmul).
 382 ; PR40032 - https://bugs.llvm.org/show_bug.cgi?id=40032
 383
 384 define <2 x i64> @sext_less_casting_with_wideop(<2 x i64> %x, <2 x i64> %y) {
 385 ; CHECK-LABEL: @sext_less_casting_with_wideop(
 386 ; CHECK-NEXT:    [[XNARROW:%.*]] = trunc <2 x i64> [[X:%.*]] to <2 x i32>
 387 ; CHECK-NEXT:    [[YNARROW:%.*]] = trunc <2 x i64> [[Y:%.*]] to <2 x i32>
 388 ; CHECK-NEXT:    [[MUL:%.*]] = mul <2 x i32> [[XNARROW]], [[YNARROW]]
 389 ; CHECK-NEXT:    [[R:%.*]] = sext <2 x i32> [[MUL]] to <2 x i64>
 390 ; CHECK-NEXT:    ret <2 x i64> [[R]]
 391 ;
 392   %xnarrow = trunc <2 x i64> %x to <2 x i32>
 393   %ynarrow = trunc <2 x i64> %y to <2 x i32>
 394   %mul = mul <2 x i32> %xnarrow, %ynarrow
 395   %r = sext <2 x i32> %mul to <2 x i64>
 396   ret <2 x i64> %r
 397 }
 398
 399 define <2 x i64> @zext_less_casting_with_wideop(<2 x i64> %x, <2 x i64> %y) {
 400 ; CHECK-LABEL: @zext_less_casting_with_wideop(
 401 ; CHECK-NEXT:    [[XNARROW:%.*]] = trunc <2 x i64> [[X:%.*]] to <2 x i32>
 402 ; CHECK-NEXT:    [[YNARROW:%.*]] = trunc <2 x i64> [[Y:%.*]] to <2 x i32>
 403 ; CHECK-NEXT:    [[MUL:%.*]] = mul <2 x i32> [[XNARROW]], [[YNARROW]]
 404 ; CHECK-NEXT:    [[R:%.*]] = zext <2 x i32> [[MUL]] to <2 x i64>
 405 ; CHECK-NEXT:    ret <2 x i64> [[R]]
 406 ;
 407   %xnarrow = trunc <2 x i64> %x to <2 x i32>
 408   %ynarrow = trunc <2 x i64> %y to <2 x i32>
 409   %mul = mul <2 x i32> %xnarrow, %ynarrow
 410   %r = zext <2 x i32> %mul to <2 x i64>
 411   ret <2 x i64> %r
 412 }
 413
 414 define <4 x float> @sitofp_shuf(<4 x i32> %x) {
 415 ; CHECK-LABEL: @sitofp_shuf(
 416 ; CHECK-NEXT:    [[TMP1:%.*]] = sitofp <4 x i32> [[X:%.*]] to <4 x float>
 417 ; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 undef>
 418 ; CHECK-NEXT:    ret <4 x float> [[R]]
 419 ;
 420   %s = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> <i32 1, i32 2, i32 3, i32 undef>
 421   %r = sitofp <4 x i32> %s to <4 x float>
 422   ret <4 x float> %r
 423 }
 424
 425 define <3 x half> @uitofp_shuf(<3 x i16> %x) {
 426 ; CHECK-LABEL: @uitofp_shuf(
 427 ; CHECK-NEXT:    [[TMP1:%.*]] = uitofp <3 x i16> [[X:%.*]] to <3 x half>
 428 ; CHECK-NEXT:    [[R:%.*]] = shufflevector <3 x half> [[TMP1]], <3 x half> undef, <3 x i32> <i32 2, i32 undef, i32 0>
 429 ; CHECK-NEXT:    ret <3 x half> [[R]]
 430 ;
 431   %s = shufflevector <3 x i16> %x, <3 x i16> poison, <3 x i32> <i32 2, i32 undef, i32 0>
 432   %r = uitofp <3 x i16> %s to <3 x half>
 433   ret <3 x half> %r
 434 }
 435
 436 define <4 x i64> @fptosi_shuf(<4 x double> %x) {
 437 ; CHECK-LABEL: @fptosi_shuf(
 438 ; CHECK-NEXT:    [[TMP1:%.*]] = fptosi <4 x double> [[X:%.*]] to <4 x i64>
 439 ; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> undef, <4 x i32> <i32 undef, i32 2, i32 3, i32 undef>
 440 ; CHECK-NEXT:    ret <4 x i64> [[R]]
 441 ;
 442   %s = shufflevector <4 x double> %x, <4 x double> poison, <4 x i32> <i32 undef, i32 2, i32 3, i32 undef>
 443   %r = fptosi <4 x double> %s to <4 x i64>
 444   ret <4 x i64> %r
 445 }
 446
 447 define <2 x i32> @fptoui_shuf(<2 x float> %x) {
 448 ; CHECK-LABEL: @fptoui_shuf(
 449 ; CHECK-NEXT:    [[TMP1:%.*]] = fptoui <2 x float> [[X:%.*]] to <2 x i32>
 450 ; CHECK-NEXT:    [[R:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <2 x i32> <i32 1, i32 1>
 451 ; CHECK-NEXT:    ret <2 x i32> [[R]]
 452 ;
 453   %s = shufflevector <2 x float> %x, <2 x float> poison, <2 x i32> <i32 1, i32 1>
 454   %r = fptoui <2 x float> %s to <2 x i32>
 455   ret <2 x i32> %r
 456 }
 457
 458 ; negative test
 459 ; TODO: Should we reduce the width of the shuffle?
 460
 461 define <4 x half> @narrowing_sitofp_shuf(<4 x i32> %x) {
 462 ; CHECK-LABEL: @narrowing_sitofp_shuf(
 463 ; CHECK-NEXT:    [[S:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> <i32 1, i32 2, i32 3, i32 undef>
 464 ; CHECK-NEXT:    [[R:%.*]] = sitofp <4 x i32> [[S]] to <4 x half>
 465 ; CHECK-NEXT:    ret <4 x half> [[R]]
 466 ;
 467   %s = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> <i32 1, i32 2, i32 3, i32 undef>
 468   %r = sitofp <4 x i32> %s to <4 x half>
 469   ret <4 x half> %r
 470 }
 471
 472 ; negative test
 473
 474 define <4 x double> @widening_uitofp_shuf(<4 x i32> %x) {
 475 ; CHECK-LABEL: @widening_uitofp_shuf(
 476 ; CHECK-NEXT:    [[S:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> <i32 1, i32 2, i32 3, i32 undef>
 477 ; CHECK-NEXT:    [[R:%.*]] = uitofp <4 x i32> [[S]] to <4 x double>
 478 ; CHECK-NEXT:    ret <4 x double> [[R]]
 479 ;
 480   %s = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> <i32 1, i32 2, i32 3, i32 undef>
 481   %r = uitofp <4 x i32> %s to <4 x double>
 482   ret <4 x double> %r
 483 }
 484
 485 ; negative test
 486
 487 define <3 x i64> @fptosi_narrowing_shuf(<4 x double> %x) {
 488 ; CHECK-LABEL: @fptosi_narrowing_shuf(
 489 ; CHECK-NEXT:    [[S:%.*]] = shufflevector <4 x double> [[X:%.*]], <4 x double> poison, <3 x i32> <i32 undef, i32 2, i32 3>
 490 ; CHECK-NEXT:    [[R:%.*]] = fptosi <3 x double> [[S]] to <3 x i64>
 491 ; CHECK-NEXT:    ret <3 x i64> [[R]]
 492 ;
 493   %s = shufflevector <4 x double> %x, <4 x double> poison, <3 x i32> <i32 undef, i32 2, i32 3>
 494   %r = fptosi <3 x double> %s to <3 x i64>
 495   ret <3 x i64> %r
 496 }
 497
 498 ; negative test
 499 ; TODO: Should we reduce the width of the cast?
 500
 501 define <3 x i32> @fptoui_widening_shuf(<2 x float> %x) {
 502 ; CHECK-LABEL: @fptoui_widening_shuf(
 503 ; CHECK-NEXT:    [[S:%.*]] = shufflevector <2 x float> [[X:%.*]], <2 x float> poison, <3 x i32> <i32 1, i32 1, i32 0>
 504 ; CHECK-NEXT:    [[R:%.*]] = fptoui <3 x float> [[S]] to <3 x i32>
 505 ; CHECK-NEXT:    ret <3 x i32> [[R]]
 506 ;
 507   %s = shufflevector <2 x float> %x, <2 x float> poison, <3 x i32> <i32 1, i32 1, i32 0>
 508   %r = fptoui <3 x float> %s to <3 x i32>
 509   ret <3 x i32> %r
 510 }
 511
 512 ; negative test
 513 ; TODO: Should we reduce the width of the cast?
 514
 515 define <4 x half> @narrowing_sitofp_widening_shuf(<2 x i32> %x) {
 516 ; CHECK-LABEL: @narrowing_sitofp_widening_shuf(
 517 ; CHECK-NEXT:    [[S:%.*]] = shufflevector <2 x i32> [[X:%.*]], <2 x i32> poison, <4 x i32> <i32 1, i32 0, i32 0, i32 undef>
 518 ; CHECK-NEXT:    [[R:%.*]] = sitofp <4 x i32> [[S]] to <4 x half>
 519 ; CHECK-NEXT:    ret <4 x half> [[R]]
 520 ;
 521   %s = shufflevector <2 x i32> %x, <2 x i32> poison, <4 x i32> <i32 1, i32 0, i32 0, i32 undef>
 522   %r = sitofp <4 x i32> %s to <4 x half>
 523   ret <4 x half> %r
 524 }
 525
 526 declare void @use(<4 x i32>)
 527
 528 ; negative test
 529
 530 define <4 x float> @sitofp_shuf_extra_use(<4 x i32> %x) {
 531 ; CHECK-LABEL: @sitofp_shuf_extra_use(
 532 ; CHECK-NEXT:    [[S:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> <i32 1, i32 2, i32 3, i32 undef>
 533 ; CHECK-NEXT:    call void @use(<4 x i32> [[S]])
 534 ; CHECK-NEXT:    [[R:%.*]] = sitofp <4 x i32> [[S]] to <4 x float>
 535 ; CHECK-NEXT:    ret <4 x float> [[R]]
 536 ;
 537   %s = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> <i32 1, i32 2, i32 3, i32 undef>
 538   call void @use(<4 x i32> %s)
 539   %r = sitofp <4 x i32> %s to <4 x float>
 540   ret <4 x float> %r
 541 }
 542
 543 ; negative test
 544 ; TODO: Allow scalable vectors?
 545
 546 define <vscale x 4 x float> @sitofp_shuf_scalable(<vscale x 4 x i32> %x) {
 547 ; CHECK-LABEL: @sitofp_shuf_scalable(
 548 ; CHECK-NEXT:    [[S:%.*]] = shufflevector <vscale x 4 x i32> [[X:%.*]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
 549 ; CHECK-NEXT:    [[R:%.*]] = sitofp <vscale x 4 x i32> [[S]] to <vscale x 4 x float>
 550 ; CHECK-NEXT:    ret <vscale x 4 x float> [[R]]
 551 ;
 552   %s = shufflevector <vscale x 4 x i32> %x, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
 553   %r = sitofp <vscale x 4 x i32> %s to <vscale x 4 x float>
 554   ret <vscale x 4 x float> %r
 555 }