llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
   2 ; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -instcombine -S < %s | FileCheck %s
   3
   4 ; --------------------------------------------------------------------
   5 ; llvm.amdgcn.rcp
   6 ; --------------------------------------------------------------------
   7
   8 declare float @llvm.amdgcn.rcp.f32(float) nounwind readnone
   9 declare double @llvm.amdgcn.rcp.f64(double) nounwind readnone
  10
  11 define float @test_constant_fold_rcp_f32_undef() nounwind {
  12 ; CHECK-LABEL: @test_constant_fold_rcp_f32_undef(
  13 ; CHECK-NEXT:    ret float 0x7FF8000000000000
  14 ;
  15   %val = call float @llvm.amdgcn.rcp.f32(float undef) nounwind readnone
  16   ret float %val
  17 }
  18
  19 define float @test_constant_fold_rcp_f32_1() nounwind {
  20 ; CHECK-LABEL: @test_constant_fold_rcp_f32_1(
  21 ; CHECK-NEXT:    ret float 1.000000e+00
  22 ;
  23   %val = call float @llvm.amdgcn.rcp.f32(float 1.0) nounwind readnone
  24   ret float %val
  25 }
  26
  27 define double @test_constant_fold_rcp_f64_1() nounwind {
  28 ; CHECK-LABEL: @test_constant_fold_rcp_f64_1(
  29 ; CHECK-NEXT:    ret double 1.000000e+00
  30 ;
  31   %val = call double @llvm.amdgcn.rcp.f64(double 1.0) nounwind readnone
  32   ret double %val
  33 }
  34
  35 define float @test_constant_fold_rcp_f32_half() nounwind {
  36 ; CHECK-LABEL: @test_constant_fold_rcp_f32_half(
  37 ; CHECK-NEXT:    ret float 2.000000e+00
  38 ;
  39   %val = call float @llvm.amdgcn.rcp.f32(float 0.5) nounwind readnone
  40   ret float %val
  41 }
  42
  43 define double @test_constant_fold_rcp_f64_half() nounwind {
  44 ; CHECK-LABEL: @test_constant_fold_rcp_f64_half(
  45 ; CHECK-NEXT:    ret double 2.000000e+00
  46 ;
  47   %val = call double @llvm.amdgcn.rcp.f64(double 0.5) nounwind readnone
  48   ret double %val
  49 }
  50
  51 define float @test_constant_fold_rcp_f32_43() nounwind {
  52 ; CHECK-LABEL: @test_constant_fold_rcp_f32_43(
  53 ; CHECK-NEXT:    ret float 0x3F97D05F40000000
  54 ;
  55   %val = call float @llvm.amdgcn.rcp.f32(float 4.300000e+01) nounwind readnone
  56   ret float %val
  57 }
  58
  59 define double @test_constant_fold_rcp_f64_43() nounwind {
  60 ; CHECK-LABEL: @test_constant_fold_rcp_f64_43(
  61 ; CHECK-NEXT:    ret double 0x3F97D05F417D05F4
  62 ;
  63   %val = call double @llvm.amdgcn.rcp.f64(double 4.300000e+01) nounwind readnone
  64   ret double %val
  65 }
  66
  67 define float @test_constant_fold_rcp_f32_43_strictfp() nounwind strictfp {
  68 ; CHECK-LABEL: @test_constant_fold_rcp_f32_43_strictfp(
  69 ; CHECK-NEXT:    [[VAL:%.*]] = call float @llvm.amdgcn.rcp.f32(float 4.300000e+01) [[ATTR11:#.*]]
  70 ; CHECK-NEXT:    ret float [[VAL]]
  71 ;
  72   %val = call float @llvm.amdgcn.rcp.f32(float 4.300000e+01) strictfp nounwind readnone
  73   ret float %val
  74 }
  75
  76 ; --------------------------------------------------------------------
  77 ; llvm.amdgcn.rsq
  78 ; --------------------------------------------------------------------
  79
  80 declare float @llvm.amdgcn.rsq.f32(float) nounwind readnone
  81
  82 define float @test_constant_fold_rsq_f32_undef() nounwind {
  83 ; CHECK-LABEL: @test_constant_fold_rsq_f32_undef(
  84 ; CHECK-NEXT:    ret float 0x7FF8000000000000
  85 ;
  86   %val = call float @llvm.amdgcn.rsq.f32(float undef) nounwind readnone
  87   ret float %val
  88 }
  89
  90 ; --------------------------------------------------------------------
  91 ; llvm.amdgcn.frexp.mant
  92 ; --------------------------------------------------------------------
  93
  94 declare float @llvm.amdgcn.frexp.mant.f32(float) nounwind readnone
  95 declare double @llvm.amdgcn.frexp.mant.f64(double) nounwind readnone
  96
  97
  98 define float @test_constant_fold_frexp_mant_f32_undef() nounwind {
  99 ; CHECK-LABEL: @test_constant_fold_frexp_mant_f32_undef(
 100 ; CHECK-NEXT:    ret float undef
 101 ;
 102   %val = call float @llvm.amdgcn.frexp.mant.f32(float undef)
 103   ret float %val
 104 }
 105
 106 define double @test_constant_fold_frexp_mant_f64_undef() nounwind {
 107 ; CHECK-LABEL: @test_constant_fold_frexp_mant_f64_undef(
 108 ; CHECK-NEXT:    ret double undef
 109 ;
 110   %val = call double @llvm.amdgcn.frexp.mant.f64(double undef)
 111   ret double %val
 112 }
 113
 114 define float @test_constant_fold_frexp_mant_f32_0() nounwind {
 115 ; CHECK-LABEL: @test_constant_fold_frexp_mant_f32_0(
 116 ; CHECK-NEXT:    ret float 0.000000e+00
 117 ;
 118   %val = call float @llvm.amdgcn.frexp.mant.f32(float 0.0)
 119   ret float %val
 120 }
 121
 122 define double @test_constant_fold_frexp_mant_f64_0() nounwind {
 123 ; CHECK-LABEL: @test_constant_fold_frexp_mant_f64_0(
 124 ; CHECK-NEXT:    ret double 0.000000e+00
 125 ;
 126   %val = call double @llvm.amdgcn.frexp.mant.f64(double 0.0)
 127   ret double %val
 128 }
 129
 130 define float @test_constant_fold_frexp_mant_f32_n0() nounwind {
 131 ; CHECK-LABEL: @test_constant_fold_frexp_mant_f32_n0(
 132 ; CHECK-NEXT:    ret float -0.000000e+00
 133 ;
 134   %val = call float @llvm.amdgcn.frexp.mant.f32(float -0.0)
 135   ret float %val
 136 }
 137
 138 define double @test_constant_fold_frexp_mant_f64_n0() nounwind {
 139 ; CHECK-LABEL: @test_constant_fold_frexp_mant_f64_n0(
 140 ; CHECK-NEXT:    ret double -0.000000e+00
 141 ;
 142   %val = call double @llvm.amdgcn.frexp.mant.f64(double -0.0)
 143   ret double %val
 144 }
 145
 146 define float @test_constant_fold_frexp_mant_f32_1() nounwind {
 147 ; CHECK-LABEL: @test_constant_fold_frexp_mant_f32_1(
 148 ; CHECK-NEXT:    ret float 5.000000e-01
 149 ;
 150   %val = call float @llvm.amdgcn.frexp.mant.f32(float 1.0)
 151   ret float %val
 152 }
 153
 154 define double @test_constant_fold_frexp_mant_f64_1() nounwind {
 155 ; CHECK-LABEL: @test_constant_fold_frexp_mant_f64_1(
 156 ; CHECK-NEXT:    ret double 5.000000e-01
 157 ;
 158   %val = call double @llvm.amdgcn.frexp.mant.f64(double 1.0)
 159   ret double %val
 160 }
 161
 162 define float @test_constant_fold_frexp_mant_f32_n1() nounwind {
 163 ; CHECK-LABEL: @test_constant_fold_frexp_mant_f32_n1(
 164 ; CHECK-NEXT:    ret float -5.000000e-01
 165 ;
 166   %val = call float @llvm.amdgcn.frexp.mant.f32(float -1.0)
 167   ret float %val
 168 }
 169
 170 define double @test_constant_fold_frexp_mant_f64_n1() nounwind {
 171 ; CHECK-LABEL: @test_constant_fold_frexp_mant_f64_n1(
 172 ; CHECK-NEXT:    ret double -5.000000e-01
 173 ;
 174   %val = call double @llvm.amdgcn.frexp.mant.f64(double -1.0)
 175   ret double %val
 176 }
 177
 178 define float @test_constant_fold_frexp_mant_f32_nan() nounwind {
 179 ; CHECK-LABEL: @test_constant_fold_frexp_mant_f32_nan(
 180 ; CHECK-NEXT:    ret float 0x7FF8000000000000
 181 ;
 182   %val = call float @llvm.amdgcn.frexp.mant.f32(float 0x7FF8000000000000)
 183   ret float %val
 184 }
 185
 186 define double @test_constant_fold_frexp_mant_f64_nan() nounwind {
 187 ; CHECK-LABEL: @test_constant_fold_frexp_mant_f64_nan(
 188 ; CHECK-NEXT:    ret double 0x7FF8000000000000
 189 ;
 190   %val = call double @llvm.amdgcn.frexp.mant.f64(double 0x7FF8000000000000)
 191   ret double %val
 192 }
 193
 194 define float @test_constant_fold_frexp_mant_f32_inf() nounwind {
 195 ; CHECK-LABEL: @test_constant_fold_frexp_mant_f32_inf(
 196 ; CHECK-NEXT:    ret float 0x7FF0000000000000
 197 ;
 198   %val = call float @llvm.amdgcn.frexp.mant.f32(float 0x7FF0000000000000)
 199   ret float %val
 200 }
 201
 202 define double @test_constant_fold_frexp_mant_f64_inf() nounwind {
 203 ; CHECK-LABEL: @test_constant_fold_frexp_mant_f64_inf(
 204 ; CHECK-NEXT:    ret double 0x7FF0000000000000
 205 ;
 206   %val = call double @llvm.amdgcn.frexp.mant.f64(double 0x7FF0000000000000)
 207   ret double %val
 208 }
 209
 210 define float @test_constant_fold_frexp_mant_f32_ninf() nounwind {
 211 ; CHECK-LABEL: @test_constant_fold_frexp_mant_f32_ninf(
 212 ; CHECK-NEXT:    ret float 0xFFF0000000000000
 213 ;
 214   %val = call float @llvm.amdgcn.frexp.mant.f32(float 0xFFF0000000000000)
 215   ret float %val
 216 }
 217
 218 define double @test_constant_fold_frexp_mant_f64_ninf() nounwind {
 219 ; CHECK-LABEL: @test_constant_fold_frexp_mant_f64_ninf(
 220 ; CHECK-NEXT:    ret double 0xFFF0000000000000
 221 ;
 222   %val = call double @llvm.amdgcn.frexp.mant.f64(double 0xFFF0000000000000)
 223   ret double %val
 224 }
 225
 226 define float @test_constant_fold_frexp_mant_f32_max_num() nounwind {
 227 ; CHECK-LABEL: @test_constant_fold_frexp_mant_f32_max_num(
 228 ; CHECK-NEXT:    ret float 0x3FEFFFFFE0000000
 229 ;
 230   %val = call float @llvm.amdgcn.frexp.mant.f32(float 0x47EFFFFFE0000000)
 231   ret float %val
 232 }
 233
 234 define double @test_constant_fold_frexp_mant_f64_max_num() nounwind {
 235 ; CHECK-LABEL: @test_constant_fold_frexp_mant_f64_max_num(
 236 ; CHECK-NEXT:    ret double 0x3FEFFFFFFFFFFFFF
 237 ;
 238   %val = call double @llvm.amdgcn.frexp.mant.f64(double 0x7FEFFFFFFFFFFFFF)
 239   ret double %val
 240 }
 241
 242 define float @test_constant_fold_frexp_mant_f32_min_num() nounwind {
 243 ; CHECK-LABEL: @test_constant_fold_frexp_mant_f32_min_num(
 244 ; CHECK-NEXT:    ret float 5.000000e-01
 245 ;
 246   %val = call float @llvm.amdgcn.frexp.mant.f32(float 0x36A0000000000000)
 247   ret float %val
 248 }
 249
 250 define double @test_constant_fold_frexp_mant_f64_min_num() nounwind {
 251 ; CHECK-LABEL: @test_constant_fold_frexp_mant_f64_min_num(
 252 ; CHECK-NEXT:    ret double 5.000000e-01
 253 ;
 254   %val = call double @llvm.amdgcn.frexp.mant.f64(double 4.940656e-324)
 255   ret double %val
 256 }
 257
 258
 259 ; --------------------------------------------------------------------
 260 ; llvm.amdgcn.frexp.exp
 261 ; --------------------------------------------------------------------
 262
 263 declare i32 @llvm.amdgcn.frexp.exp.f32(float) nounwind readnone
 264 declare i32 @llvm.amdgcn.frexp.exp.f64(double) nounwind readnone
 265
 266 define i32 @test_constant_fold_frexp_exp_f32_undef() nounwind {
 267 ; CHECK-LABEL: @test_constant_fold_frexp_exp_f32_undef(
 268 ; CHECK-NEXT:    ret i32 undef
 269 ;
 270   %val = call i32 @llvm.amdgcn.frexp.exp.f32(float undef)
 271   ret i32 %val
 272 }
 273
 274 define i32 @test_constant_fold_frexp_exp_f64_undef() nounwind {
 275 ; CHECK-LABEL: @test_constant_fold_frexp_exp_f64_undef(
 276 ; CHECK-NEXT:    ret i32 undef
 277 ;
 278   %val = call i32 @llvm.amdgcn.frexp.exp.f64(double undef)
 279   ret i32 %val
 280 }
 281
 282 define i32 @test_constant_fold_frexp_exp_f32_0() nounwind {
 283 ; CHECK-LABEL: @test_constant_fold_frexp_exp_f32_0(
 284 ; CHECK-NEXT:    ret i32 0
 285 ;
 286   %val = call i32 @llvm.amdgcn.frexp.exp.f32(float 0.0)
 287   ret i32 %val
 288 }
 289
 290 define i32 @test_constant_fold_frexp_exp_f64_0() nounwind {
 291 ; CHECK-LABEL: @test_constant_fold_frexp_exp_f64_0(
 292 ; CHECK-NEXT:    ret i32 0
 293 ;
 294   %val = call i32 @llvm.amdgcn.frexp.exp.f64(double 0.0)
 295   ret i32 %val
 296 }
 297
 298 define i32 @test_constant_fold_frexp_exp_f32_n0() nounwind {
 299 ; CHECK-LABEL: @test_constant_fold_frexp_exp_f32_n0(
 300 ; CHECK-NEXT:    ret i32 0
 301 ;
 302   %val = call i32 @llvm.amdgcn.frexp.exp.f32(float -0.0)
 303   ret i32 %val
 304 }
 305
 306 define i32 @test_constant_fold_frexp_exp_f64_n0() nounwind {
 307 ; CHECK-LABEL: @test_constant_fold_frexp_exp_f64_n0(
 308 ; CHECK-NEXT:    ret i32 0
 309 ;
 310   %val = call i32 @llvm.amdgcn.frexp.exp.f64(double -0.0)
 311   ret i32 %val
 312 }
 313
 314 define i32 @test_constant_fold_frexp_exp_f32_1024() nounwind {
 315 ; CHECK-LABEL: @test_constant_fold_frexp_exp_f32_1024(
 316 ; CHECK-NEXT:    ret i32 11
 317 ;
 318   %val = call i32 @llvm.amdgcn.frexp.exp.f32(float 1024.0)
 319   ret i32 %val
 320 }
 321
 322 define i32 @test_constant_fold_frexp_exp_f64_1024() nounwind {
 323 ; CHECK-LABEL: @test_constant_fold_frexp_exp_f64_1024(
 324 ; CHECK-NEXT:    ret i32 11
 325 ;
 326   %val = call i32 @llvm.amdgcn.frexp.exp.f64(double 1024.0)
 327   ret i32 %val
 328 }
 329
 330 define i32 @test_constant_fold_frexp_exp_f32_n1024() nounwind {
 331 ; CHECK-LABEL: @test_constant_fold_frexp_exp_f32_n1024(
 332 ; CHECK-NEXT:    ret i32 11
 333 ;
 334   %val = call i32 @llvm.amdgcn.frexp.exp.f32(float -1024.0)
 335   ret i32 %val
 336 }
 337
 338 define i32 @test_constant_fold_frexp_exp_f64_n1024() nounwind {
 339 ; CHECK-LABEL: @test_constant_fold_frexp_exp_f64_n1024(
 340 ; CHECK-NEXT:    ret i32 11
 341 ;
 342   %val = call i32 @llvm.amdgcn.frexp.exp.f64(double -1024.0)
 343   ret i32 %val
 344 }
 345
 346 define i32 @test_constant_fold_frexp_exp_f32_1_1024() nounwind {
 347 ; CHECK-LABEL: @test_constant_fold_frexp_exp_f32_1_1024(
 348 ; CHECK-NEXT:    ret i32 -9
 349 ;
 350   %val = call i32 @llvm.amdgcn.frexp.exp.f32(float 0.0009765625)
 351   ret i32 %val
 352 }
 353
 354 define i32 @test_constant_fold_frexp_exp_f64_1_1024() nounwind {
 355 ; CHECK-LABEL: @test_constant_fold_frexp_exp_f64_1_1024(
 356 ; CHECK-NEXT:    ret i32 -9
 357 ;
 358   %val = call i32 @llvm.amdgcn.frexp.exp.f64(double 0.0009765625)
 359   ret i32 %val
 360 }
 361
 362 define i32 @test_constant_fold_frexp_exp_f32_nan() nounwind {
 363 ; CHECK-LABEL: @test_constant_fold_frexp_exp_f32_nan(
 364 ; CHECK-NEXT:    ret i32 0
 365 ;
 366   %val = call i32 @llvm.amdgcn.frexp.exp.f32(float 0x7FF8000000000000)
 367   ret i32 %val
 368 }
 369
 370 define i32 @test_constant_fold_frexp_exp_f64_nan() nounwind {
 371 ; CHECK-LABEL: @test_constant_fold_frexp_exp_f64_nan(
 372 ; CHECK-NEXT:    ret i32 0
 373 ;
 374   %val = call i32 @llvm.amdgcn.frexp.exp.f64(double 0x7FF8000000000000)
 375   ret i32 %val
 376 }
 377
 378 define i32 @test_constant_fold_frexp_exp_f32_inf() nounwind {
 379 ; CHECK-LABEL: @test_constant_fold_frexp_exp_f32_inf(
 380 ; CHECK-NEXT:    ret i32 0
 381 ;
 382   %val = call i32 @llvm.amdgcn.frexp.exp.f32(float 0x7FF0000000000000)
 383   ret i32 %val
 384 }
 385
 386 define i32 @test_constant_fold_frexp_exp_f64_inf() nounwind {
 387 ; CHECK-LABEL: @test_constant_fold_frexp_exp_f64_inf(
 388 ; CHECK-NEXT:    ret i32 0
 389 ;
 390   %val = call i32 @llvm.amdgcn.frexp.exp.f64(double 0x7FF0000000000000)
 391   ret i32 %val
 392 }
 393
 394 define i32 @test_constant_fold_frexp_exp_f32_ninf() nounwind {
 395 ; CHECK-LABEL: @test_constant_fold_frexp_exp_f32_ninf(
 396 ; CHECK-NEXT:    ret i32 0
 397 ;
 398   %val = call i32 @llvm.amdgcn.frexp.exp.f32(float 0xFFF0000000000000)
 399   ret i32 %val
 400 }
 401
 402 define i32 @test_constant_fold_frexp_exp_f64_ninf() nounwind {
 403 ; CHECK-LABEL: @test_constant_fold_frexp_exp_f64_ninf(
 404 ; CHECK-NEXT:    ret i32 0
 405 ;
 406   %val = call i32 @llvm.amdgcn.frexp.exp.f64(double 0xFFF0000000000000)
 407   ret i32 %val
 408 }
 409
 410 define i32 @test_constant_fold_frexp_exp_f32_max_num() nounwind {
 411 ; CHECK-LABEL: @test_constant_fold_frexp_exp_f32_max_num(
 412 ; CHECK-NEXT:    ret i32 128
 413 ;
 414   %val = call i32 @llvm.amdgcn.frexp.exp.f32(float 0x47EFFFFFE0000000)
 415   ret i32 %val
 416 }
 417
 418 define i32 @test_constant_fold_frexp_exp_f64_max_num() nounwind {
 419 ; CHECK-LABEL: @test_constant_fold_frexp_exp_f64_max_num(
 420 ; CHECK-NEXT:    ret i32 1024
 421 ;
 422   %val = call i32 @llvm.amdgcn.frexp.exp.f64(double 0x7FEFFFFFFFFFFFFF)
 423   ret i32 %val
 424 }
 425
 426 define i32 @test_constant_fold_frexp_exp_f32_min_num() nounwind {
 427 ; CHECK-LABEL: @test_constant_fold_frexp_exp_f32_min_num(
 428 ; CHECK-NEXT:    ret i32 -148
 429 ;
 430   %val = call i32 @llvm.amdgcn.frexp.exp.f32(float 0x36A0000000000000)
 431   ret i32 %val
 432 }
 433
 434 define i32 @test_constant_fold_frexp_exp_f64_min_num() nounwind {
 435 ; CHECK-LABEL: @test_constant_fold_frexp_exp_f64_min_num(
 436 ; CHECK-NEXT:    ret i32 -1073
 437 ;
 438   %val = call i32 @llvm.amdgcn.frexp.exp.f64(double 4.940656e-324)
 439   ret i32 %val
 440 }
 441
 442 ; --------------------------------------------------------------------
 443 ; llvm.amdgcn.class
 444 ; --------------------------------------------------------------------
 445
 446 declare i1 @llvm.amdgcn.class.f32(float, i32) nounwind readnone
 447 declare i1 @llvm.amdgcn.class.f64(double, i32) nounwind readnone
 448
 449 define i1 @test_class_undef_mask_f32(float %x) nounwind {
 450 ; CHECK-LABEL: @test_class_undef_mask_f32(
 451 ; CHECK-NEXT:    ret i1 false
 452 ;
 453   %val = call i1 @llvm.amdgcn.class.f32(float %x, i32 undef)
 454   ret i1 %val
 455 }
 456
 457 define i1 @test_class_over_max_mask_f32(float %x) nounwind {
 458 ; CHECK-LABEL: @test_class_over_max_mask_f32(
 459 ; CHECK-NEXT:    [[VAL:%.*]] = call i1 @llvm.amdgcn.class.f32(float [[X:%.*]], i32 1)
 460 ; CHECK-NEXT:    ret i1 [[VAL]]
 461 ;
 462   %val = call i1 @llvm.amdgcn.class.f32(float %x, i32 1025)
 463   ret i1 %val
 464 }
 465
 466 define i1 @test_class_no_mask_f32(float %x) nounwind {
 467 ; CHECK-LABEL: @test_class_no_mask_f32(
 468 ; CHECK-NEXT:    ret i1 false
 469 ;
 470   %val = call i1 @llvm.amdgcn.class.f32(float %x, i32 0)
 471   ret i1 %val
 472 }
 473
 474 define i1 @test_class_full_mask_f32(float %x) nounwind {
 475 ; CHECK-LABEL: @test_class_full_mask_f32(
 476 ; CHECK-NEXT:    ret i1 true
 477 ;
 478   %val = call i1 @llvm.amdgcn.class.f32(float %x, i32 1023)
 479   ret i1 %val
 480 }
 481
 482 define i1 @test_class_undef_no_mask_f32() nounwind {
 483 ; CHECK-LABEL: @test_class_undef_no_mask_f32(
 484 ; CHECK-NEXT:    ret i1 false
 485 ;
 486   %val = call i1 @llvm.amdgcn.class.f32(float undef, i32 0)
 487   ret i1 %val
 488 }
 489
 490 define i1 @test_class_undef_full_mask_f32() nounwind {
 491 ; CHECK-LABEL: @test_class_undef_full_mask_f32(
 492 ; CHECK-NEXT:    ret i1 true
 493 ;
 494   %val = call i1 @llvm.amdgcn.class.f32(float undef, i32 1023)
 495   ret i1 %val
 496 }
 497
 498 define i1 @test_class_undef_val_f32() nounwind {
 499 ; CHECK-LABEL: @test_class_undef_val_f32(
 500 ; CHECK-NEXT:    ret i1 undef
 501 ;
 502   %val = call i1 @llvm.amdgcn.class.f32(float undef, i32 4)
 503   ret i1 %val
 504 }
 505
 506 define i1 @test_class_undef_undef_f32() nounwind {
 507 ; CHECK-LABEL: @test_class_undef_undef_f32(
 508 ; CHECK-NEXT:    ret i1 undef
 509 ;
 510   %val = call i1 @llvm.amdgcn.class.f32(float undef, i32 undef)
 511   ret i1 %val
 512 }
 513
 514 define i1 @test_class_var_mask_f32(float %x, i32 %mask) nounwind {
 515 ; CHECK-LABEL: @test_class_var_mask_f32(
 516 ; CHECK-NEXT:    [[VAL:%.*]] = call i1 @llvm.amdgcn.class.f32(float [[X:%.*]], i32 [[MASK:%.*]])
 517 ; CHECK-NEXT:    ret i1 [[VAL]]
 518 ;
 519   %val = call i1 @llvm.amdgcn.class.f32(float %x, i32 %mask)
 520   ret i1 %val
 521 }
 522
 523 define i1 @test_class_isnan_f32(float %x) nounwind {
 524 ; CHECK-LABEL: @test_class_isnan_f32(
 525 ; CHECK-NEXT:    [[VAL:%.*]] = fcmp uno float [[X:%.*]], 0.000000e+00
 526 ; CHECK-NEXT:    ret i1 [[VAL]]
 527 ;
 528   %val = call i1 @llvm.amdgcn.class.f32(float %x, i32 3)
 529   ret i1 %val
 530 }
 531
 532 define i1 @test_class_is_p0_n0_f32(float %x) nounwind {
 533 ; CHECK-LABEL: @test_class_is_p0_n0_f32(
 534 ; CHECK-NEXT:    [[VAL:%.*]] = fcmp oeq float [[X:%.*]], 0.000000e+00
 535 ; CHECK-NEXT:    ret i1 [[VAL]]
 536 ;
 537   %val = call i1 @llvm.amdgcn.class.f32(float %x, i32 96)
 538   ret i1 %val
 539 }
 540
 541 define i1 @test_constant_class_snan_test_snan_f64() nounwind {
 542 ; CHECK-LABEL: @test_constant_class_snan_test_snan_f64(
 543 ; CHECK-NEXT:    ret i1 true
 544 ;
 545   %val = call i1 @llvm.amdgcn.class.f64(double 0x7FF0000000000001, i32 1)
 546   ret i1 %val
 547 }
 548
 549 define i1 @test_constant_class_qnan_test_qnan_f64() nounwind {
 550 ; CHECK-LABEL: @test_constant_class_qnan_test_qnan_f64(
 551 ; CHECK-NEXT:    ret i1 true
 552 ;
 553   %val = call i1 @llvm.amdgcn.class.f64(double 0x7FF8000000000000, i32 2)
 554   ret i1 %val
 555 }
 556
 557 define i1 @test_constant_class_qnan_test_snan_f64() nounwind {
 558 ; CHECK-LABEL: @test_constant_class_qnan_test_snan_f64(
 559 ; CHECK-NEXT:    ret i1 false
 560 ;
 561   %val = call i1 @llvm.amdgcn.class.f64(double 0x7FF8000000000000, i32 1)
 562   ret i1 %val
 563 }
 564
 565 define i1 @test_constant_class_ninf_test_ninf_f64() nounwind {
 566 ; CHECK-LABEL: @test_constant_class_ninf_test_ninf_f64(
 567 ; CHECK-NEXT:    ret i1 true
 568 ;
 569   %val = call i1 @llvm.amdgcn.class.f64(double 0xFFF0000000000000, i32 4)
 570   ret i1 %val
 571 }
 572
 573 define i1 @test_constant_class_pinf_test_ninf_f64() nounwind {
 574 ; CHECK-LABEL: @test_constant_class_pinf_test_ninf_f64(
 575 ; CHECK-NEXT:    ret i1 false
 576 ;
 577   %val = call i1 @llvm.amdgcn.class.f64(double 0x7FF0000000000000, i32 4)
 578   ret i1 %val
 579 }
 580
 581 define i1 @test_constant_class_qnan_test_ninf_f64() nounwind {
 582 ; CHECK-LABEL: @test_constant_class_qnan_test_ninf_f64(
 583 ; CHECK-NEXT:    ret i1 false
 584 ;
 585   %val = call i1 @llvm.amdgcn.class.f64(double 0x7FF8000000000000, i32 4)
 586   ret i1 %val
 587 }
 588
 589 define i1 @test_constant_class_snan_test_ninf_f64() nounwind {
 590 ; CHECK-LABEL: @test_constant_class_snan_test_ninf_f64(
 591 ; CHECK-NEXT:    ret i1 false
 592 ;
 593   %val = call i1 @llvm.amdgcn.class.f64(double 0x7FF0000000000001, i32 4)
 594   ret i1 %val
 595 }
 596
 597 define i1 @test_constant_class_nnormal_test_nnormal_f64() nounwind {
 598 ; CHECK-LABEL: @test_constant_class_nnormal_test_nnormal_f64(
 599 ; CHECK-NEXT:    ret i1 true
 600 ;
 601   %val = call i1 @llvm.amdgcn.class.f64(double -1.0, i32 8)
 602   ret i1 %val
 603 }
 604
 605 define i1 @test_constant_class_pnormal_test_nnormal_f64() nounwind {
 606 ; CHECK-LABEL: @test_constant_class_pnormal_test_nnormal_f64(
 607 ; CHECK-NEXT:    ret i1 false
 608 ;
 609   %val = call i1 @llvm.amdgcn.class.f64(double 1.0, i32 8)
 610   ret i1 %val
 611 }
 612
 613 define i1 @test_constant_class_nsubnormal_test_nsubnormal_f64() nounwind {
 614 ; CHECK-LABEL: @test_constant_class_nsubnormal_test_nsubnormal_f64(
 615 ; CHECK-NEXT:    ret i1 true
 616 ;
 617   %val = call i1 @llvm.amdgcn.class.f64(double 0x800fffffffffffff, i32 16)
 618   ret i1 %val
 619 }
 620
 621 define i1 @test_constant_class_psubnormal_test_nsubnormal_f64() nounwind {
 622 ; CHECK-LABEL: @test_constant_class_psubnormal_test_nsubnormal_f64(
 623 ; CHECK-NEXT:    ret i1 false
 624 ;
 625   %val = call i1 @llvm.amdgcn.class.f64(double 0x000fffffffffffff, i32 16)
 626   ret i1 %val
 627 }
 628
 629 define i1 @test_constant_class_nzero_test_nzero_f64() nounwind {
 630 ; CHECK-LABEL: @test_constant_class_nzero_test_nzero_f64(
 631 ; CHECK-NEXT:    ret i1 true
 632 ;
 633   %val = call i1 @llvm.amdgcn.class.f64(double -0.0, i32 32)
 634   ret i1 %val
 635 }
 636
 637 define i1 @test_constant_class_pzero_test_nzero_f64() nounwind {
 638 ; CHECK-LABEL: @test_constant_class_pzero_test_nzero_f64(
 639 ; CHECK-NEXT:    ret i1 false
 640 ;
 641   %val = call i1 @llvm.amdgcn.class.f64(double 0.0, i32 32)
 642   ret i1 %val
 643 }
 644
 645 define i1 @test_constant_class_pzero_test_pzero_f64() nounwind {
 646 ; CHECK-LABEL: @test_constant_class_pzero_test_pzero_f64(
 647 ; CHECK-NEXT:    ret i1 true
 648 ;
 649   %val = call i1 @llvm.amdgcn.class.f64(double 0.0, i32 64)
 650   ret i1 %val
 651 }
 652
 653 define i1 @test_constant_class_nzero_test_pzero_f64() nounwind {
 654 ; CHECK-LABEL: @test_constant_class_nzero_test_pzero_f64(
 655 ; CHECK-NEXT:    ret i1 false
 656 ;
 657   %val = call i1 @llvm.amdgcn.class.f64(double -0.0, i32 64)
 658   ret i1 %val
 659 }
 660
 661 define i1 @test_constant_class_psubnormal_test_psubnormal_f64() nounwind {
 662 ; CHECK-LABEL: @test_constant_class_psubnormal_test_psubnormal_f64(
 663 ; CHECK-NEXT:    ret i1 true
 664 ;
 665   %val = call i1 @llvm.amdgcn.class.f64(double 0x000fffffffffffff, i32 128)
 666   ret i1 %val
 667 }
 668
 669 define i1 @test_constant_class_nsubnormal_test_psubnormal_f64() nounwind {
 670 ; CHECK-LABEL: @test_constant_class_nsubnormal_test_psubnormal_f64(
 671 ; CHECK-NEXT:    ret i1 false
 672 ;
 673   %val = call i1 @llvm.amdgcn.class.f64(double 0x800fffffffffffff, i32 128)
 674   ret i1 %val
 675 }
 676
 677 define i1 @test_constant_class_pnormal_test_pnormal_f64() nounwind {
 678 ; CHECK-LABEL: @test_constant_class_pnormal_test_pnormal_f64(
 679 ; CHECK-NEXT:    ret i1 true
 680 ;
 681   %val = call i1 @llvm.amdgcn.class.f64(double 1.0, i32 256)
 682   ret i1 %val
 683 }
 684
 685 define i1 @test_constant_class_nnormal_test_pnormal_f64() nounwind {
 686 ; CHECK-LABEL: @test_constant_class_nnormal_test_pnormal_f64(
 687 ; CHECK-NEXT:    ret i1 false
 688 ;
 689   %val = call i1 @llvm.amdgcn.class.f64(double -1.0, i32 256)
 690   ret i1 %val
 691 }
 692
 693 define i1 @test_constant_class_pinf_test_pinf_f64() nounwind {
 694 ; CHECK-LABEL: @test_constant_class_pinf_test_pinf_f64(
 695 ; CHECK-NEXT:    ret i1 true
 696 ;
 697   %val = call i1 @llvm.amdgcn.class.f64(double 0x7FF0000000000000, i32 512)
 698   ret i1 %val
 699 }
 700
 701 define i1 @test_constant_class_ninf_test_pinf_f64() nounwind {
 702 ; CHECK-LABEL: @test_constant_class_ninf_test_pinf_f64(
 703 ; CHECK-NEXT:    ret i1 false
 704 ;
 705   %val = call i1 @llvm.amdgcn.class.f64(double 0xFFF0000000000000, i32 512)
 706   ret i1 %val
 707 }
 708
 709 define i1 @test_constant_class_qnan_test_pinf_f64() nounwind {
 710 ; CHECK-LABEL: @test_constant_class_qnan_test_pinf_f64(
 711 ; CHECK-NEXT:    ret i1 false
 712 ;
 713   %val = call i1 @llvm.amdgcn.class.f64(double 0x7FF8000000000000, i32 512)
 714   ret i1 %val
 715 }
 716
 717 define i1 @test_constant_class_snan_test_pinf_f64() nounwind {
 718 ; CHECK-LABEL: @test_constant_class_snan_test_pinf_f64(
 719 ; CHECK-NEXT:    ret i1 false
 720 ;
 721   %val = call i1 @llvm.amdgcn.class.f64(double 0x7FF0000000000001, i32 512)
 722   ret i1 %val
 723 }
 724
 725 define i1 @test_class_is_snan_nnan_src(float %x) {
 726 ; CHECK-LABEL: @test_class_is_snan_nnan_src(
 727 ; CHECK-NEXT:    ret i1 false
 728 ;
 729   %nnan = fadd nnan float %x, 1.0
 730   %class = call i1 @llvm.amdgcn.class.f32(float %nnan, i32 1)
 731   ret i1 %class
 732 }
 733
 734 define i1 @test_class_is_qnan_nnan_src(float %x) {
 735 ; CHECK-LABEL: @test_class_is_qnan_nnan_src(
 736 ; CHECK-NEXT:    ret i1 false
 737 ;
 738   %nnan = fadd nnan float %x, 1.0
 739   %class = call i1 @llvm.amdgcn.class.f32(float %nnan, i32 2)
 740   ret i1 %class
 741 }
 742
 743 define i1 @test_class_is_nan_nnan_src(float %x) {
 744 ; CHECK-LABEL: @test_class_is_nan_nnan_src(
 745 ; CHECK-NEXT:    ret i1 false
 746 ;
 747   %nnan = fadd nnan float %x, 1.0
 748   %class = call i1 @llvm.amdgcn.class.f32(float %nnan, i32 3)
 749   ret i1 %class
 750 }
 751
 752 define i1 @test_class_is_nan_other_nnan_src(float %x) {
 753 ; CHECK-LABEL: @test_class_is_nan_other_nnan_src(
 754 ; CHECK-NEXT:    [[NNAN:%.*]] = fadd nnan float [[X:%.*]], 1.000000e+00
 755 ; CHECK-NEXT:    [[CLASS:%.*]] = call i1 @llvm.amdgcn.class.f32(float [[NNAN]], i32 264)
 756 ; CHECK-NEXT:    ret i1 [[CLASS]]
 757 ;
 758   %nnan = fadd nnan float %x, 1.0
 759   %class = call i1 @llvm.amdgcn.class.f32(float %nnan, i32 267)
 760   ret i1 %class
 761 }
 762
 763 ; --------------------------------------------------------------------
 764 ; llvm.amdgcn.cos
 765 ; --------------------------------------------------------------------
 766 declare float @llvm.amdgcn.cos.f32(float) nounwind readnone
 767 declare float @llvm.fabs.f32(float) nounwind readnone
 768
 769 define float @cos_fneg_f32(float %x) {
 770 ; CHECK-LABEL: @cos_fneg_f32(
 771 ; CHECK-NEXT:    [[COS:%.*]] = call float @llvm.amdgcn.cos.f32(float [[X:%.*]])
 772 ; CHECK-NEXT:    ret float [[COS]]
 773 ;
 774   %x.fneg = fsub float -0.0, %x
 775   %cos = call float @llvm.amdgcn.cos.f32(float %x.fneg)
 776   ret float %cos
 777 }
 778
 779 define float @cos_unary_fneg_f32(float %x) {
 780 ; CHECK-LABEL: @cos_unary_fneg_f32(
 781 ; CHECK-NEXT:    [[COS:%.*]] = call float @llvm.amdgcn.cos.f32(float [[X:%.*]])
 782 ; CHECK-NEXT:    ret float [[COS]]
 783 ;
 784   %x.fneg = fneg float %x
 785   %cos = call float @llvm.amdgcn.cos.f32(float %x.fneg)
 786   ret float %cos
 787 }
 788
 789 define float @cos_fabs_f32(float %x) {
 790 ; CHECK-LABEL: @cos_fabs_f32(
 791 ; CHECK-NEXT:    [[COS:%.*]] = call float @llvm.amdgcn.cos.f32(float [[X:%.*]])
 792 ; CHECK-NEXT:    ret float [[COS]]
 793 ;
 794   %x.fabs = call float @llvm.fabs.f32(float %x)
 795   %cos = call float @llvm.amdgcn.cos.f32(float %x.fabs)
 796   ret float %cos
 797 }
 798
 799 define float @cos_fabs_fneg_f32(float %x) {
 800 ; CHECK-LABEL: @cos_fabs_fneg_f32(
 801 ; CHECK-NEXT:    [[COS:%.*]] = call float @llvm.amdgcn.cos.f32(float [[X:%.*]])
 802 ; CHECK-NEXT:    ret float [[COS]]
 803 ;
 804   %x.fabs = call float @llvm.fabs.f32(float %x)
 805   %x.fabs.fneg = fsub float -0.0, %x.fabs
 806   %cos = call float @llvm.amdgcn.cos.f32(float %x.fabs.fneg)
 807   ret float %cos
 808 }
 809
 810 define float @cos_fabs_unary_fneg_f32(float %x) {
 811 ; CHECK-LABEL: @cos_fabs_unary_fneg_f32(
 812 ; CHECK-NEXT:    [[COS:%.*]] = call float @llvm.amdgcn.cos.f32(float [[X:%.*]])
 813 ; CHECK-NEXT:    ret float [[COS]]
 814 ;
 815   %x.fabs = call float @llvm.fabs.f32(float %x)
 816   %x.fabs.fneg = fneg float %x.fabs
 817   %cos = call float @llvm.amdgcn.cos.f32(float %x.fabs.fneg)
 818   ret float %cos
 819 }
 820
 821 ; --------------------------------------------------------------------
 822 ; llvm.amdgcn.cvt.pkrtz
 823 ; --------------------------------------------------------------------
 824
 825 declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) nounwind readnone
 826
 827 define <2 x half> @vars_lhs_cvt_pkrtz(float %x, float %y) {
 828 ; CHECK-LABEL: @vars_lhs_cvt_pkrtz(
 829 ; CHECK-NEXT:    [[CVT:%.*]] = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float [[X:%.*]], float [[Y:%.*]])
 830 ; CHECK-NEXT:    ret <2 x half> [[CVT]]
 831 ;
 832   %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %x, float %y)
 833   ret <2 x half> %cvt
 834 }
 835
 836 define <2 x half> @constant_lhs_cvt_pkrtz(float %y) {
 837 ; CHECK-LABEL: @constant_lhs_cvt_pkrtz(
 838 ; CHECK-NEXT:    [[CVT:%.*]] = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float 0.000000e+00, float [[Y:%.*]])
 839 ; CHECK-NEXT:    ret <2 x half> [[CVT]]
 840 ;
 841   %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float 0.0, float %y)
 842   ret <2 x half> %cvt
 843 }
 844
 845 define <2 x half> @constant_rhs_cvt_pkrtz(float %x) {
 846 ; CHECK-LABEL: @constant_rhs_cvt_pkrtz(
 847 ; CHECK-NEXT:    [[CVT:%.*]] = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float [[X:%.*]], float 0.000000e+00)
 848 ; CHECK-NEXT:    ret <2 x half> [[CVT]]
 849 ;
 850   %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %x, float 0.0)
 851   ret <2 x half> %cvt
 852 }
 853
 854 define <2 x half> @undef_lhs_cvt_pkrtz(float %y) {
 855 ; CHECK-LABEL: @undef_lhs_cvt_pkrtz(
 856 ; CHECK-NEXT:    [[CVT:%.*]] = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float undef, float [[Y:%.*]])
 857 ; CHECK-NEXT:    ret <2 x half> [[CVT]]
 858 ;
 859   %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float undef, float %y)
 860   ret <2 x half> %cvt
 861 }
 862
 863 define <2 x half> @undef_rhs_cvt_pkrtz(float %x) {
 864 ; CHECK-LABEL: @undef_rhs_cvt_pkrtz(
 865 ; CHECK-NEXT:    [[CVT:%.*]] = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float [[X:%.*]], float undef)
 866 ; CHECK-NEXT:    ret <2 x half> [[CVT]]
 867 ;
 868   %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %x, float undef)
 869   ret <2 x half> %cvt
 870 }
 871
 872 define <2 x half> @undef_cvt_pkrtz() {
 873 ; CHECK-LABEL: @undef_cvt_pkrtz(
 874 ; CHECK-NEXT:    ret <2 x half> undef
 875 ;
 876   %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float undef, float undef)
 877   ret <2 x half> %cvt
 878 }
 879
 880 define <2 x half> @constant_splat0_cvt_pkrtz() {
 881 ; CHECK-LABEL: @constant_splat0_cvt_pkrtz(
 882 ; CHECK-NEXT:    ret <2 x half> zeroinitializer
 883 ;
 884   %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float 0.0, float 0.0)
 885   ret <2 x half> %cvt
 886 }
 887
 888 define <2 x half> @constant_cvt_pkrtz() {
 889 ; CHECK-LABEL: @constant_cvt_pkrtz(
 890 ; CHECK-NEXT:    ret <2 x half> <half 0xH4000, half 0xH4400>
 891 ;
 892   %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float 2.0, float 4.0)
 893   ret <2 x half> %cvt
 894 }
 895
 896 ; Test constant values where rtz changes result
 897 define <2 x half> @constant_rtz_pkrtz() {
 898 ; CHECK-LABEL: @constant_rtz_pkrtz(
 899 ; CHECK-NEXT:    ret <2 x half> <half 0xH7BFF, half 0xH7BFF>
 900 ;
 901   %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float 65535.0, float 65535.0)
 902   ret <2 x half> %cvt
 903 }
 904
 905 ; --------------------------------------------------------------------
 906 ; llvm.amdgcn.cvt.pknorm.i16
 907 ; --------------------------------------------------------------------
 908
 909 declare <2 x i16> @llvm.amdgcn.cvt.pknorm.i16(float, float) nounwind readnone
 910
 911 define <2 x i16> @undef_lhs_cvt_pknorm_i16(float %y) {
 912 ; CHECK-LABEL: @undef_lhs_cvt_pknorm_i16(
 913 ; CHECK-NEXT:    [[CVT:%.*]] = call <2 x i16> @llvm.amdgcn.cvt.pknorm.i16(float undef, float [[Y:%.*]])
 914 ; CHECK-NEXT:    ret <2 x i16> [[CVT]]
 915 ;
 916   %cvt = call <2 x i16> @llvm.amdgcn.cvt.pknorm.i16(float undef, float %y)
 917   ret <2 x i16> %cvt
 918 }
 919
 920 define <2 x i16> @undef_rhs_cvt_pknorm_i16(float %x) {
 921 ; CHECK-LABEL: @undef_rhs_cvt_pknorm_i16(
 922 ; CHECK-NEXT:    [[CVT:%.*]] = call <2 x i16> @llvm.amdgcn.cvt.pknorm.i16(float [[X:%.*]], float undef)
 923 ; CHECK-NEXT:    ret <2 x i16> [[CVT]]
 924 ;
 925   %cvt = call <2 x i16> @llvm.amdgcn.cvt.pknorm.i16(float %x, float undef)
 926   ret <2 x i16> %cvt
 927 }
 928
 929 define <2 x i16> @undef_cvt_pknorm_i16() {
 930 ; CHECK-LABEL: @undef_cvt_pknorm_i16(
 931 ; CHECK-NEXT:    ret <2 x i16> undef
 932 ;
 933   %cvt = call <2 x i16> @llvm.amdgcn.cvt.pknorm.i16(float undef, float undef)
 934   ret <2 x i16> %cvt
 935 }
 936
 937 ; --------------------------------------------------------------------
 938 ; llvm.amdgcn.cvt.pknorm.u16
 939 ; --------------------------------------------------------------------
 940
 941 declare <2 x i16> @llvm.amdgcn.cvt.pknorm.u16(float, float) nounwind readnone
 942
 943 define <2 x i16> @undef_lhs_cvt_pknorm_u16(float %y) {
 944 ; CHECK-LABEL: @undef_lhs_cvt_pknorm_u16(
 945 ; CHECK-NEXT:    [[CVT:%.*]] = call <2 x i16> @llvm.amdgcn.cvt.pknorm.u16(float undef, float [[Y:%.*]])
 946 ; CHECK-NEXT:    ret <2 x i16> [[CVT]]
 947 ;
 948   %cvt = call <2 x i16> @llvm.amdgcn.cvt.pknorm.u16(float undef, float %y)
 949   ret <2 x i16> %cvt
 950 }
 951
 952 define <2 x i16> @undef_rhs_cvt_pknorm_u16(float %x) {
 953 ; CHECK-LABEL: @undef_rhs_cvt_pknorm_u16(
 954 ; CHECK-NEXT:    [[CVT:%.*]] = call <2 x i16> @llvm.amdgcn.cvt.pknorm.u16(float [[X:%.*]], float undef)
 955 ; CHECK-NEXT:    ret <2 x i16> [[CVT]]
 956 ;
 957   %cvt = call <2 x i16> @llvm.amdgcn.cvt.pknorm.u16(float %x, float undef)
 958   ret <2 x i16> %cvt
 959 }
 960
 961 define <2 x i16> @undef_cvt_pknorm_u16() {
 962 ; CHECK-LABEL: @undef_cvt_pknorm_u16(
 963 ; CHECK-NEXT:    ret <2 x i16> undef
 964 ;
 965   %cvt = call <2 x i16> @llvm.amdgcn.cvt.pknorm.u16(float undef, float undef)
 966   ret <2 x i16> %cvt
 967 }
 968
 969 ; --------------------------------------------------------------------
 970 ; llvm.amdgcn.cvt.pk.i16
 971 ; --------------------------------------------------------------------
 972
 973 declare <2 x i16> @llvm.amdgcn.cvt.pk.i16(i32, i32) nounwind readnone
 974
 975 define <2 x i16> @undef_lhs_cvt_pk_i16(i32 %y) {
 976 ; CHECK-LABEL: @undef_lhs_cvt_pk_i16(
 977 ; CHECK-NEXT:    [[CVT:%.*]] = call <2 x i16> @llvm.amdgcn.cvt.pk.i16(i32 undef, i32 [[Y:%.*]])
 978 ; CHECK-NEXT:    ret <2 x i16> [[CVT]]
 979 ;
 980   %cvt = call <2 x i16> @llvm.amdgcn.cvt.pk.i16(i32 undef, i32 %y)
 981   ret <2 x i16> %cvt
 982 }
 983
 984 define <2 x i16> @undef_rhs_cvt_pk_i16(i32 %x) {
 985 ; CHECK-LABEL: @undef_rhs_cvt_pk_i16(
 986 ; CHECK-NEXT:    [[CVT:%.*]] = call <2 x i16> @llvm.amdgcn.cvt.pk.i16(i32 [[X:%.*]], i32 undef)
 987 ; CHECK-NEXT:    ret <2 x i16> [[CVT]]
 988 ;
 989   %cvt = call <2 x i16> @llvm.amdgcn.cvt.pk.i16(i32 %x, i32 undef)
 990   ret <2 x i16> %cvt
 991 }
 992
 993 define <2 x i16> @undef_cvt_pk_i16() {
 994 ; CHECK-LABEL: @undef_cvt_pk_i16(
 995 ; CHECK-NEXT:    ret <2 x i16> undef
 996 ;
 997   %cvt = call <2 x i16> @llvm.amdgcn.cvt.pk.i16(i32 undef, i32 undef)
 998   ret <2 x i16> %cvt
 999 }
1000
1001 ; --------------------------------------------------------------------
1002 ; llvm.amdgcn.cvt.pk.u16
1003 ; --------------------------------------------------------------------
1004
1005 declare <2 x i16> @llvm.amdgcn.cvt.pk.u16(i32, i32) nounwind readnone
1006
1007 define <2 x i16> @undef_lhs_cvt_pk_u16(i32 %y) {
1008 ; CHECK-LABEL: @undef_lhs_cvt_pk_u16(
1009 ; CHECK-NEXT:    [[CVT:%.*]] = call <2 x i16> @llvm.amdgcn.cvt.pk.u16(i32 undef, i32 [[Y:%.*]])
1010 ; CHECK-NEXT:    ret <2 x i16> [[CVT]]
1011 ;
1012   %cvt = call <2 x i16> @llvm.amdgcn.cvt.pk.u16(i32 undef, i32 %y)
1013   ret <2 x i16> %cvt
1014 }
1015
1016 define <2 x i16> @undef_rhs_cvt_pk_u16(i32 %x) {
1017 ; CHECK-LABEL: @undef_rhs_cvt_pk_u16(
1018 ; CHECK-NEXT:    [[CVT:%.*]] = call <2 x i16> @llvm.amdgcn.cvt.pk.u16(i32 [[X:%.*]], i32 undef)
1019 ; CHECK-NEXT:    ret <2 x i16> [[CVT]]
1020 ;
1021   %cvt = call <2 x i16> @llvm.amdgcn.cvt.pk.u16(i32 %x, i32 undef)
1022   ret <2 x i16> %cvt
1023 }
1024
1025 define <2 x i16> @undef_cvt_pk_u16() {
1026 ; CHECK-LABEL: @undef_cvt_pk_u16(
1027 ; CHECK-NEXT:    ret <2 x i16> undef
1028 ;
1029   %cvt = call <2 x i16> @llvm.amdgcn.cvt.pk.u16(i32 undef, i32 undef)
1030   ret <2 x i16> %cvt
1031 }
1032
1033 ; --------------------------------------------------------------------
1034 ; llvm.amdgcn.ubfe
1035 ; --------------------------------------------------------------------
1036
1037 declare i32 @llvm.amdgcn.ubfe.i32(i32, i32, i32) nounwind readnone
1038 declare i64 @llvm.amdgcn.ubfe.i64(i64, i32, i32) nounwind readnone
1039
1040 define i32 @ubfe_var_i32(i32 %src, i32 %offset, i32 %width) {
1041 ; CHECK-LABEL: @ubfe_var_i32(
1042 ; CHECK-NEXT:    [[BFE:%.*]] = call i32 @llvm.amdgcn.ubfe.i32(i32 [[SRC:%.*]], i32 [[OFFSET:%.*]], i32 [[WIDTH:%.*]])
1043 ; CHECK-NEXT:    ret i32 [[BFE]]
1044 ;
1045   %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 %offset, i32 %width)
1046   ret i32 %bfe
1047 }
1048
1049 define i32 @ubfe_clear_high_bits_constant_offset_i32(i32 %src, i32 %width) {
1050 ; CHECK-LABEL: @ubfe_clear_high_bits_constant_offset_i32(
1051 ; CHECK-NEXT:    [[BFE:%.*]] = call i32 @llvm.amdgcn.ubfe.i32(i32 [[SRC:%.*]], i32 5, i32 [[WIDTH:%.*]])
1052 ; CHECK-NEXT:    ret i32 [[BFE]]
1053 ;
1054   %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 133, i32 %width)
1055   ret i32 %bfe
1056 }
1057
1058 define i32 @ubfe_clear_high_bits_constant_width_i32(i32 %src, i32 %offset) {
1059 ; CHECK-LABEL: @ubfe_clear_high_bits_constant_width_i32(
1060 ; CHECK-NEXT:    [[BFE:%.*]] = call i32 @llvm.amdgcn.ubfe.i32(i32 [[SRC:%.*]], i32 [[OFFSET:%.*]], i32 5)
1061 ; CHECK-NEXT:    ret i32 [[BFE]]
1062 ;
1063   %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 %offset, i32 133)
1064   ret i32 %bfe
1065 }
1066
1067 define i32 @ubfe_width_0(i32 %src, i32 %offset) {
1068 ; CHECK-LABEL: @ubfe_width_0(
1069 ; CHECK-NEXT:    ret i32 0
1070 ;
1071   %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 %offset, i32 0)
1072   ret i32 %bfe
1073 }
1074
1075 define i32 @ubfe_width_31(i32 %src, i32 %offset) {
1076 ; CHECK-LABEL: @ubfe_width_31(
1077 ; CHECK-NEXT:    [[BFE:%.*]] = call i32 @llvm.amdgcn.ubfe.i32(i32 [[SRC:%.*]], i32 [[OFFSET:%.*]], i32 31)
1078 ; CHECK-NEXT:    ret i32 [[BFE]]
1079 ;
1080   %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 %offset, i32 31)
1081   ret i32 %bfe
1082 }
1083
1084 define i32 @ubfe_width_32(i32 %src, i32 %offset) {
1085 ; CHECK-LABEL: @ubfe_width_32(
1086 ; CHECK-NEXT:    ret i32 0
1087 ;
1088   %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 %offset, i32 32)
1089   ret i32 %bfe
1090 }
1091
1092 define i32 @ubfe_width_33(i32 %src, i32 %offset) {
1093 ; CHECK-LABEL: @ubfe_width_33(
1094 ; CHECK-NEXT:    [[BFE:%.*]] = call i32 @llvm.amdgcn.ubfe.i32(i32 [[SRC:%.*]], i32 [[OFFSET:%.*]], i32 1)
1095 ; CHECK-NEXT:    ret i32 [[BFE]]
1096 ;
1097   %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 %offset, i32 33)
1098   ret i32 %bfe
1099 }
1100
1101 define i32 @ubfe_offset_33(i32 %src, i32 %width) {
1102 ; CHECK-LABEL: @ubfe_offset_33(
1103 ; CHECK-NEXT:    [[BFE:%.*]] = call i32 @llvm.amdgcn.ubfe.i32(i32 [[SRC:%.*]], i32 1, i32 [[WIDTH:%.*]])
1104 ; CHECK-NEXT:    ret i32 [[BFE]]
1105 ;
1106   %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 33, i32 %width)
1107   ret i32 %bfe
1108 }
1109
1110 define i32 @ubfe_offset_0(i32 %src, i32 %width) {
1111 ; CHECK-LABEL: @ubfe_offset_0(
1112 ; CHECK-NEXT:    [[BFE:%.*]] = call i32 @llvm.amdgcn.ubfe.i32(i32 [[SRC:%.*]], i32 0, i32 [[WIDTH:%.*]])
1113 ; CHECK-NEXT:    ret i32 [[BFE]]
1114 ;
1115   %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 0, i32 %width)
1116   ret i32 %bfe
1117 }
1118
1119 define i32 @ubfe_offset_32(i32 %src, i32 %width) {
1120 ; CHECK-LABEL: @ubfe_offset_32(
1121 ; CHECK-NEXT:    [[BFE:%.*]] = call i32 @llvm.amdgcn.ubfe.i32(i32 [[SRC:%.*]], i32 0, i32 [[WIDTH:%.*]])
1122 ; CHECK-NEXT:    ret i32 [[BFE]]
1123 ;
1124   %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 32, i32 %width)
1125   ret i32 %bfe
1126 }
1127
1128 define i32 @ubfe_offset_31(i32 %src, i32 %width) {
1129 ; CHECK-LABEL: @ubfe_offset_31(
1130 ; CHECK-NEXT:    [[BFE:%.*]] = call i32 @llvm.amdgcn.ubfe.i32(i32 [[SRC:%.*]], i32 31, i32 [[WIDTH:%.*]])
1131 ; CHECK-NEXT:    ret i32 [[BFE]]
1132 ;
1133   %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 31, i32 %width)
1134   ret i32 %bfe
1135 }
1136
1137 define i32 @ubfe_offset_0_width_0(i32 %src) {
1138 ; CHECK-LABEL: @ubfe_offset_0_width_0(
1139 ; CHECK-NEXT:    ret i32 0
1140 ;
1141   %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 0, i32 0)
1142   ret i32 %bfe
1143 }
1144
1145 define i32 @ubfe_offset_0_width_3(i32 %src) {
1146 ; CHECK-LABEL: @ubfe_offset_0_width_3(
1147 ; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[SRC:%.*]], 7
1148 ; CHECK-NEXT:    ret i32 [[TMP1]]
1149 ;
1150   %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 0, i32 3)
1151   ret i32 %bfe
1152 }
1153
1154 define i32 @ubfe_offset_3_width_1(i32 %src) {
1155 ; CHECK-LABEL: @ubfe_offset_3_width_1(
1156 ; CHECK-NEXT:    [[TMP1:%.*]] = lshr i32 [[SRC:%.*]], 3
1157 ; CHECK-NEXT:    [[BFE:%.*]] = and i32 [[TMP1]], 1
1158 ; CHECK-NEXT:    ret i32 [[BFE]]
1159 ;
1160   %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 3, i32 1)
1161   ret i32 %bfe
1162 }
1163
1164 define i32 @ubfe_offset_3_width_4(i32 %src) {
1165 ; CHECK-LABEL: @ubfe_offset_3_width_4(
1166 ; CHECK-NEXT:    [[TMP1:%.*]] = lshr i32 [[SRC:%.*]], 3
1167 ; CHECK-NEXT:    [[BFE:%.*]] = and i32 [[TMP1]], 15
1168 ; CHECK-NEXT:    ret i32 [[BFE]]
1169 ;
1170   %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 3, i32 4)
1171   ret i32 %bfe
1172 }
1173
1174 define i32 @ubfe_0_0_0() {
1175 ; CHECK-LABEL: @ubfe_0_0_0(
1176 ; CHECK-NEXT:    ret i32 0
1177 ;
1178   %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 0, i32 0, i32 0)
1179   ret i32 %bfe
1180 }
1181
1182 define i32 @ubfe_neg1_5_7() {
1183 ; CHECK-LABEL: @ubfe_neg1_5_7(
1184 ; CHECK-NEXT:    ret i32 127
1185 ;
1186   %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 -1, i32 5, i32 7)
1187   ret i32 %bfe
1188 }
1189
1190 define i32 @ubfe_undef_src_i32(i32 %offset, i32 %width) {
1191 ; CHECK-LABEL: @ubfe_undef_src_i32(
1192 ; CHECK-NEXT:    ret i32 undef
1193 ;
1194   %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 undef, i32 %offset, i32 %width)
1195   ret i32 %bfe
1196 }
1197
1198 define i32 @ubfe_undef_offset_i32(i32 %src, i32 %width) {
1199 ; CHECK-LABEL: @ubfe_undef_offset_i32(
1200 ; CHECK-NEXT:    [[BFE:%.*]] = call i32 @llvm.amdgcn.ubfe.i32(i32 [[SRC:%.*]], i32 undef, i32 [[WIDTH:%.*]])
1201 ; CHECK-NEXT:    ret i32 [[BFE]]
1202 ;
1203   %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 undef, i32 %width)
1204   ret i32 %bfe
1205 }
1206
1207 define i32 @ubfe_undef_width_i32(i32 %src, i32 %offset) {
1208 ; CHECK-LABEL: @ubfe_undef_width_i32(
1209 ; CHECK-NEXT:    [[BFE:%.*]] = call i32 @llvm.amdgcn.ubfe.i32(i32 [[SRC:%.*]], i32 [[OFFSET:%.*]], i32 undef)
1210 ; CHECK-NEXT:    ret i32 [[BFE]]
1211 ;
1212   %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 %offset, i32 undef)
1213   ret i32 %bfe
1214 }
1215
1216 define i64 @ubfe_offset_33_width_4_i64(i64 %src) {
1217 ; CHECK-LABEL: @ubfe_offset_33_width_4_i64(
1218 ; CHECK-NEXT:    [[TMP1:%.*]] = lshr i64 [[SRC:%.*]], 33
1219 ; CHECK-NEXT:    [[BFE:%.*]] = and i64 [[TMP1]], 15
1220 ; CHECK-NEXT:    ret i64 [[BFE]]
1221 ;
1222   %bfe = call i64 @llvm.amdgcn.ubfe.i64(i64 %src, i32 33, i32 4)
1223   ret i64 %bfe
1224 }
1225
1226 define i64 @ubfe_offset_0_i64(i64 %src, i32 %width) {
1227 ; CHECK-LABEL: @ubfe_offset_0_i64(
1228 ; CHECK-NEXT:    [[BFE:%.*]] = call i64 @llvm.amdgcn.ubfe.i64(i64 [[SRC:%.*]], i32 0, i32 [[WIDTH:%.*]])
1229 ; CHECK-NEXT:    ret i64 [[BFE]]
1230 ;
1231   %bfe = call i64 @llvm.amdgcn.ubfe.i64(i64 %src, i32 0, i32 %width)
1232   ret i64 %bfe
1233 }
1234
1235 define i64 @ubfe_offset_32_width_32_i64(i64 %src) {
1236 ; CHECK-LABEL: @ubfe_offset_32_width_32_i64(
1237 ; CHECK-NEXT:    [[BFE:%.*]] = lshr i64 [[SRC:%.*]], 32
1238 ; CHECK-NEXT:    ret i64 [[BFE]]
1239 ;
1240   %bfe = call i64 @llvm.amdgcn.ubfe.i64(i64 %src, i32 32, i32 32)
1241   ret i64 %bfe
1242 }
1243
1244 ; --------------------------------------------------------------------
1245 ; llvm.amdgcn.sbfe
1246 ; --------------------------------------------------------------------
1247
1248 declare i32 @llvm.amdgcn.sbfe.i32(i32, i32, i32) nounwind readnone
1249 declare i64 @llvm.amdgcn.sbfe.i64(i64, i32, i32) nounwind readnone
1250
1251 define i32 @sbfe_offset_31(i32 %src, i32 %width) {
1252 ; CHECK-LABEL: @sbfe_offset_31(
1253 ; CHECK-NEXT:    [[BFE:%.*]] = call i32 @llvm.amdgcn.sbfe.i32(i32 [[SRC:%.*]], i32 31, i32 [[WIDTH:%.*]])
1254 ; CHECK-NEXT:    ret i32 [[BFE]]
1255 ;
1256   %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %src, i32 31, i32 %width)
1257   ret i32 %bfe
1258 }
1259
1260 define i32 @sbfe_neg1_5_7() {
1261 ; CHECK-LABEL: @sbfe_neg1_5_7(
1262 ; CHECK-NEXT:    ret i32 -1
1263 ;
1264   %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 -1, i32 5, i32 7)
1265   ret i32 %bfe
1266 }
1267
1268 define i64 @sbfe_offset_32_width_32_i64(i64 %src) {
1269 ; CHECK-LABEL: @sbfe_offset_32_width_32_i64(
1270 ; CHECK-NEXT:    [[BFE:%.*]] = ashr i64 [[SRC:%.*]], 32
1271 ; CHECK-NEXT:    ret i64 [[BFE]]
1272 ;
1273   %bfe = call i64 @llvm.amdgcn.sbfe.i64(i64 %src, i32 32, i32 32)
1274   ret i64 %bfe
1275 }
1276
1277 ; --------------------------------------------------------------------
1278 ; llvm.amdgcn.exp
1279 ; --------------------------------------------------------------------
1280
1281 declare void @llvm.amdgcn.exp.f32(i32 immarg, i32 immarg, float, float, float, float, i1 immarg, i1 immarg) nounwind inaccessiblememonly
1282
1283
1284
1285
1286 define void @exp_disabled_inputs_to_undef(float %x, float %y, float %z, float %w) {
1287   ; enable src0..src3 constants
1288 ; CHECK-LABEL: @exp_disabled_inputs_to_undef(
1289 ; CHECK-NEXT:    call void @llvm.amdgcn.exp.f32(i32 0, i32 1, float 1.000000e+00, float undef, float undef, float undef, i1 true, i1 false)
1290 ; CHECK-NEXT:    call void @llvm.amdgcn.exp.f32(i32 0, i32 2, float undef, float 2.000000e+00, float undef, float undef, i1 true, i1 false)
1291 ; CHECK-NEXT:    call void @llvm.amdgcn.exp.f32(i32 0, i32 4, float undef, float undef, float 5.000000e-01, float undef, i1 true, i1 false)
1292 ; CHECK-NEXT:    call void @llvm.amdgcn.exp.f32(i32 0, i32 8, float undef, float undef, float undef, float 4.000000e+00, i1 true, i1 false)
1293 ; CHECK-NEXT:    call void @llvm.amdgcn.exp.f32(i32 0, i32 1, float [[X:%.*]], float undef, float undef, float undef, i1 true, i1 false)
1294 ; CHECK-NEXT:    call void @llvm.amdgcn.exp.f32(i32 0, i32 2, float undef, float [[Y:%.*]], float undef, float undef, i1 true, i1 false)
1295 ; CHECK-NEXT:    call void @llvm.amdgcn.exp.f32(i32 0, i32 4, float undef, float undef, float [[Z:%.*]], float undef, i1 true, i1 false)
1296 ; CHECK-NEXT:    call void @llvm.amdgcn.exp.f32(i32 0, i32 8, float undef, float undef, float undef, float [[W:%.*]], i1 true, i1 false)
1297 ; CHECK-NEXT:    call void @llvm.amdgcn.exp.f32(i32 0, i32 0, float undef, float undef, float undef, float undef, i1 true, i1 false)
1298 ; CHECK-NEXT:    call void @llvm.amdgcn.exp.f32(i32 0, i32 3, float 1.000000e+00, float 2.000000e+00, float undef, float undef, i1 true, i1 false)
1299 ; CHECK-NEXT:    call void @llvm.amdgcn.exp.f32(i32 0, i32 5, float 1.000000e+00, float undef, float 5.000000e-01, float undef, i1 true, i1 false)
1300 ; CHECK-NEXT:    call void @llvm.amdgcn.exp.f32(i32 0, i32 9, float 1.000000e+00, float undef, float undef, float 4.000000e+00, i1 false, i1 false)
1301 ; CHECK-NEXT:    call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float 1.000000e+00, float 2.000000e+00, float 5.000000e-01, float 4.000000e+00, i1 false, i1 false)
1302 ; CHECK-NEXT:    ret void
1303 ;
1304   call void @llvm.amdgcn.exp.f32(i32 0, i32 1, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
1305   call void @llvm.amdgcn.exp.f32(i32 0, i32 2, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
1306   call void @llvm.amdgcn.exp.f32(i32 0, i32 4, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
1307   call void @llvm.amdgcn.exp.f32(i32 0, i32 8, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
1308
1309   ; enable src0..src3 variables
1310   call void @llvm.amdgcn.exp.f32(i32 0, i32 1, float %x, float %y, float %z, float %w, i1 true, i1 false)
1311   call void @llvm.amdgcn.exp.f32(i32 0, i32 2, float %x, float %y, float %z, float %w, i1 true, i1 false)
1312   call void @llvm.amdgcn.exp.f32(i32 0, i32 4, float %x, float %y, float %z, float %w, i1 true, i1 false)
1313   call void @llvm.amdgcn.exp.f32(i32 0, i32 8, float %x, float %y, float %z, float %w, i1 true, i1 false)
1314
1315   ; enable none
1316   call void @llvm.amdgcn.exp.f32(i32 0, i32 0, float %x, float %y, float %z, float %w, i1 true, i1 false)
1317
1318   ; enable different source combinations
1319   call void @llvm.amdgcn.exp.f32(i32 0, i32 3, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
1320   call void @llvm.amdgcn.exp.f32(i32 0, i32 5, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
1321   call void @llvm.amdgcn.exp.f32(i32 0, i32 9, float 1.0, float 2.0, float 0.5, float 4.0, i1 false, i1 false)
1322   call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 false, i1 false)
1323
1324   ret void
1325 }
1326
1327 ; --------------------------------------------------------------------
1328 ; llvm.amdgcn.exp.compr
1329 ; --------------------------------------------------------------------
1330
1331 declare void @llvm.amdgcn.exp.compr.v2f16(i32 immarg, i32 immarg, <2 x half>, <2 x half>, i1 immarg, i1 immarg) nounwind inaccessiblememonly
1332
1333
1334
1335 define void @exp_compr_disabled_inputs_to_undef(<2 x half> %xy, <2 x half> %zw) {
1336 ; CHECK-LABEL: @exp_compr_disabled_inputs_to_undef(
1337 ; CHECK-NEXT:    call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 0, <2 x half> undef, <2 x half> undef, i1 true, i1 false)
1338 ; CHECK-NEXT:    call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 1, <2 x half> <half 0xH3C00, half 0xH4000>, <2 x half> undef, i1 true, i1 false)
1339 ; CHECK-NEXT:    call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 2, <2 x half> <half 0xH3C00, half 0xH4000>, <2 x half> undef, i1 true, i1 false)
1340 ; CHECK-NEXT:    call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 3, <2 x half> <half 0xH3C00, half 0xH4000>, <2 x half> undef, i1 true, i1 false)
1341 ; CHECK-NEXT:    call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 0, <2 x half> undef, <2 x half> undef, i1 true, i1 false)
1342 ; CHECK-NEXT:    call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 1, <2 x half> [[XY:%.*]], <2 x half> undef, i1 true, i1 false)
1343 ; CHECK-NEXT:    call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 2, <2 x half> [[XY]], <2 x half> undef, i1 true, i1 false)
1344 ; CHECK-NEXT:    call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 3, <2 x half> [[XY]], <2 x half> undef, i1 true, i1 false)
1345 ; CHECK-NEXT:    call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 12, <2 x half> undef, <2 x half> [[ZW:%.*]], i1 true, i1 false)
1346 ; CHECK-NEXT:    call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 15, <2 x half> [[XY]], <2 x half> [[ZW]], i1 true, i1 false)
1347 ; CHECK-NEXT:    ret void
1348 ;
1349   call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 0, <2 x half> <half 1.0, half 2.0>, <2 x half> <half 0.5, half 4.0>, i1 true, i1 false)
1350   call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 1, <2 x half> <half 1.0, half 2.0>, <2 x half> <half 0.5, half 4.0>, i1 true, i1 false)
1351   call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 2, <2 x half> <half 1.0, half 2.0>, <2 x half> <half 0.5, half 4.0>, i1 true, i1 false)
1352   call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 3, <2 x half> <half 1.0, half 2.0>, <2 x half> <half 0.5, half 4.0>, i1 true, i1 false)
1353
1354   call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 0, <2 x half> %xy, <2 x half> %zw, i1 true, i1 false)
1355   call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 1, <2 x half> %xy, <2 x half> %zw, i1 true, i1 false)
1356   call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 2, <2 x half> %xy, <2 x half> %zw, i1 true, i1 false)
1357   call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 3, <2 x half> %xy, <2 x half> %zw, i1 true, i1 false)
1358
1359   call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 12, <2 x half> %xy, <2 x half> %zw, i1 true, i1 false)
1360   call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 15, <2 x half> %xy, <2 x half> %zw, i1 true, i1 false)
1361   ret void
1362 }
1363
1364 ; --------------------------------------------------------------------
1365 ; llvm.amdgcn.fmed3
1366 ; --------------------------------------------------------------------
1367
1368 declare float @llvm.amdgcn.fmed3.f32(float, float, float) nounwind readnone
1369
1370 define float @fmed3_f32(float %x, float %y, float %z) {
1371 ; CHECK-LABEL: @fmed3_f32(
1372 ; CHECK-NEXT:    [[MED3:%.*]] = call float @llvm.amdgcn.fmed3.f32(float [[X:%.*]], float [[Y:%.*]], float [[Z:%.*]])
1373 ; CHECK-NEXT:    ret float [[MED3]]
1374 ;
1375   %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float %z)
1376   ret float %med3
1377 }
1378
1379 define float @fmed3_canonicalize_x_c0_c1_f32(float %x) {
1380 ; CHECK-LABEL: @fmed3_canonicalize_x_c0_c1_f32(
1381 ; CHECK-NEXT:    [[MED3:%.*]] = call float @llvm.amdgcn.fmed3.f32(float [[X:%.*]], float 0.000000e+00, float 1.000000e+00)
1382 ; CHECK-NEXT:    ret float [[MED3]]
1383 ;
1384   %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float 0.0, float 1.0)
1385   ret float %med3
1386 }
1387
1388 define float @fmed3_canonicalize_c0_x_c1_f32(float %x) {
1389 ; CHECK-LABEL: @fmed3_canonicalize_c0_x_c1_f32(
1390 ; CHECK-NEXT:    [[MED3:%.*]] = call float @llvm.amdgcn.fmed3.f32(float [[X:%.*]], float 0.000000e+00, float 1.000000e+00)
1391 ; CHECK-NEXT:    ret float [[MED3]]
1392 ;
1393   %med3 = call float @llvm.amdgcn.fmed3.f32(float 0.0, float %x, float 1.0)
1394   ret float %med3
1395 }
1396
1397 define float @fmed3_canonicalize_c0_c1_x_f32(float %x) {
1398 ; CHECK-LABEL: @fmed3_canonicalize_c0_c1_x_f32(
1399 ; CHECK-NEXT:    [[MED3:%.*]] = call float @llvm.amdgcn.fmed3.f32(float [[X:%.*]], float 0.000000e+00, float 1.000000e+00)
1400 ; CHECK-NEXT:    ret float [[MED3]]
1401 ;
1402   %med3 = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float %x)
1403   ret float %med3
1404 }
1405
1406 define float @fmed3_canonicalize_x_y_c_f32(float %x, float %y) {
1407 ; CHECK-LABEL: @fmed3_canonicalize_x_y_c_f32(
1408 ; CHECK-NEXT:    [[MED3:%.*]] = call float @llvm.amdgcn.fmed3.f32(float [[X:%.*]], float [[Y:%.*]], float 1.000000e+00)
1409 ; CHECK-NEXT:    ret float [[MED3]]
1410 ;
1411   %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float 1.0)
1412   ret float %med3
1413 }
1414
1415 define float @fmed3_canonicalize_x_c_y_f32(float %x, float %y) {
1416 ; CHECK-LABEL: @fmed3_canonicalize_x_c_y_f32(
1417 ; CHECK-NEXT:    [[MED3:%.*]] = call float @llvm.amdgcn.fmed3.f32(float [[X:%.*]], float [[Y:%.*]], float 1.000000e+00)
1418 ; CHECK-NEXT:    ret float [[MED3]]
1419 ;
1420   %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float 1.0, float %y)
1421   ret float %med3
1422 }
1423
1424 define float @fmed3_canonicalize_c_x_y_f32(float %x, float %y) {
1425 ; CHECK-LABEL: @fmed3_canonicalize_c_x_y_f32(
1426 ; CHECK-NEXT:    [[MED3:%.*]] = call float @llvm.amdgcn.fmed3.f32(float [[X:%.*]], float [[Y:%.*]], float 1.000000e+00)
1427 ; CHECK-NEXT:    ret float [[MED3]]
1428 ;
1429   %med3 = call float @llvm.amdgcn.fmed3.f32(float 1.0, float %x, float %y)
1430   ret float %med3
1431 }
1432
1433 define float @fmed3_undef_x_y_f32(float %x, float %y) {
1434 ; CHECK-LABEL: @fmed3_undef_x_y_f32(
1435 ; CHECK-NEXT:    [[MED3:%.*]] = call float @llvm.minnum.f32(float [[X:%.*]], float [[Y:%.*]])
1436 ; CHECK-NEXT:    ret float [[MED3]]
1437 ;
1438   %med3 = call float @llvm.amdgcn.fmed3.f32(float undef, float %x, float %y)
1439   ret float %med3
1440 }
1441
1442 define float @fmed3_fmf_undef_x_y_f32(float %x, float %y) {
1443 ; CHECK-LABEL: @fmed3_fmf_undef_x_y_f32(
1444 ; CHECK-NEXT:    [[MED3:%.*]] = call nnan float @llvm.minnum.f32(float [[X:%.*]], float [[Y:%.*]])
1445 ; CHECK-NEXT:    ret float [[MED3]]
1446 ;
1447   %med3 = call nnan float @llvm.amdgcn.fmed3.f32(float undef, float %x, float %y)
1448   ret float %med3
1449 }
1450
1451 define float @fmed3_x_undef_y_f32(float %x, float %y) {
1452 ; CHECK-LABEL: @fmed3_x_undef_y_f32(
1453 ; CHECK-NEXT:    [[MED3:%.*]] = call float @llvm.minnum.f32(float [[X:%.*]], float [[Y:%.*]])
1454 ; CHECK-NEXT:    ret float [[MED3]]
1455 ;
1456   %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float undef, float %y)
1457   ret float %med3
1458 }
1459
1460 define float @fmed3_x_y_undef_f32(float %x, float %y) {
1461 ; CHECK-LABEL: @fmed3_x_y_undef_f32(
1462 ; CHECK-NEXT:    [[MED3:%.*]] = call float @llvm.maxnum.f32(float [[X:%.*]], float [[Y:%.*]])
1463 ; CHECK-NEXT:    ret float [[MED3]]
1464 ;
1465   %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float undef)
1466   ret float %med3
1467 }
1468
1469 define float @fmed3_qnan0_x_y_f32(float %x, float %y) {
1470 ; CHECK-LABEL: @fmed3_qnan0_x_y_f32(
1471 ; CHECK-NEXT:    [[MED3:%.*]] = call float @llvm.minnum.f32(float [[X:%.*]], float [[Y:%.*]])
1472 ; CHECK-NEXT:    ret float [[MED3]]
1473 ;
1474   %med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF8000000000000, float %x, float %y)
1475   ret float %med3
1476 }
1477
1478 define float @fmed3_x_qnan0_y_f32(float %x, float %y) {
1479 ; CHECK-LABEL: @fmed3_x_qnan0_y_f32(
1480 ; CHECK-NEXT:    [[MED3:%.*]] = call float @llvm.minnum.f32(float [[X:%.*]], float [[Y:%.*]])
1481 ; CHECK-NEXT:    ret float [[MED3]]
1482 ;
1483   %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float 0x7FF8000000000000, float %y)
1484   ret float %med3
1485 }
1486
1487 define float @fmed3_x_y_qnan0_f32(float %x, float %y) {
1488 ; CHECK-LABEL: @fmed3_x_y_qnan0_f32(
1489 ; CHECK-NEXT:    [[MED3:%.*]] = call float @llvm.maxnum.f32(float [[X:%.*]], float [[Y:%.*]])
1490 ; CHECK-NEXT:    ret float [[MED3]]
1491 ;
1492   %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float 0x7FF8000000000000)
1493   ret float %med3
1494 }
1495
1496 define float @fmed3_qnan1_x_y_f32(float %x, float %y) {
1497 ; CHECK-LABEL: @fmed3_qnan1_x_y_f32(
1498 ; CHECK-NEXT:    [[MED3:%.*]] = call float @llvm.minnum.f32(float [[X:%.*]], float [[Y:%.*]])
1499 ; CHECK-NEXT:    ret float [[MED3]]
1500 ;
1501   %med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF8000100000000, float %x, float %y)
1502   ret float %med3
1503 }
1504
1505 ; This can return any of the qnans.
1506 define float @fmed3_qnan0_qnan1_qnan2_f32(float %x, float %y) {
1507 ; CHECK-LABEL: @fmed3_qnan0_qnan1_qnan2_f32(
1508 ; CHECK-NEXT:    ret float 0x7FF8030000000000
1509 ;
1510   %med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF8000100000000, float 0x7FF8002000000000, float 0x7FF8030000000000)
1511   ret float %med3
1512 }
1513
1514 define float @fmed3_constant_src0_0_f32(float %x, float %y) {
1515 ; CHECK-LABEL: @fmed3_constant_src0_0_f32(
1516 ; CHECK-NEXT:    ret float 5.000000e-01
1517 ;
1518   %med3 = call float @llvm.amdgcn.fmed3.f32(float 0.5, float -1.0, float 4.0)
1519   ret float %med3
1520 }
1521
1522 define float @fmed3_constant_src0_1_f32(float %x, float %y) {
1523 ; CHECK-LABEL: @fmed3_constant_src0_1_f32(
1524 ; CHECK-NEXT:    ret float 5.000000e-01
1525 ;
1526   %med3 = call float @llvm.amdgcn.fmed3.f32(float 0.5, float 4.0, float -1.0)
1527   ret float %med3
1528 }
1529
1530 define float @fmed3_constant_src1_0_f32(float %x, float %y) {
1531 ; CHECK-LABEL: @fmed3_constant_src1_0_f32(
1532 ; CHECK-NEXT:    ret float 5.000000e-01
1533 ;
1534   %med3 = call float @llvm.amdgcn.fmed3.f32(float -1.0, float 0.5, float 4.0)
1535   ret float %med3
1536 }
1537
1538 define float @fmed3_constant_src1_1_f32(float %x, float %y) {
1539 ; CHECK-LABEL: @fmed3_constant_src1_1_f32(
1540 ; CHECK-NEXT:    ret float 5.000000e-01
1541 ;
1542   %med3 = call float @llvm.amdgcn.fmed3.f32(float 4.0, float 0.5, float -1.0)
1543   ret float %med3
1544 }
1545
1546 define float @fmed3_constant_src2_0_f32(float %x, float %y) {
1547 ; CHECK-LABEL: @fmed3_constant_src2_0_f32(
1548 ; CHECK-NEXT:    ret float 5.000000e-01
1549 ;
1550   %med3 = call float @llvm.amdgcn.fmed3.f32(float -1.0, float 4.0, float 0.5)
1551   ret float %med3
1552 }
1553
1554 define float @fmed3_constant_src2_1_f32(float %x, float %y) {
1555 ; CHECK-LABEL: @fmed3_constant_src2_1_f32(
1556 ; CHECK-NEXT:    ret float 5.000000e-01
1557 ;
1558   %med3 = call float @llvm.amdgcn.fmed3.f32(float 4.0, float -1.0, float 0.5)
1559   ret float %med3
1560 }
1561
1562 define float @fmed3_x_qnan0_qnan1_f32(float %x) {
1563 ; CHECK-LABEL: @fmed3_x_qnan0_qnan1_f32(
1564 ; CHECK-NEXT:    ret float [[X:%.*]]
1565 ;
1566   %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float 0x7FF8001000000000, float 0x7FF8002000000000)
1567   ret float %med3
1568 }
1569
1570 define float @fmed3_qnan0_x_qnan1_f32(float %x) {
1571 ; CHECK-LABEL: @fmed3_qnan0_x_qnan1_f32(
1572 ; CHECK-NEXT:    ret float [[X:%.*]]
1573 ;
1574   %med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF8001000000000, float %x, float 0x7FF8002000000000)
1575   ret float %med3
1576 }
1577
1578 define float @fmed3_qnan0_qnan1_x_f32(float %x) {
1579 ; CHECK-LABEL: @fmed3_qnan0_qnan1_x_f32(
1580 ; CHECK-NEXT:    ret float [[X:%.*]]
1581 ;
1582   %med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF8001000000000, float 0x7FF8002000000000, float %x)
1583   ret float %med3
1584 }
1585
1586 define float @fmed3_nan_0_1_f32() {
1587 ; CHECK-LABEL: @fmed3_nan_0_1_f32(
1588 ; CHECK-NEXT:    ret float 0.000000e+00
1589 ;
1590   %med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF8001000000000, float 0.0, float 1.0)
1591   ret float %med3
1592 }
1593
1594 define float @fmed3_0_nan_1_f32() {
1595 ; CHECK-LABEL: @fmed3_0_nan_1_f32(
1596 ; CHECK-NEXT:    ret float 0.000000e+00
1597 ;
1598   %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 0x7FF8001000000000, float 1.0)
1599   ret float %med
1600 }
1601
1602 define float @fmed3_0_1_nan_f32() {
1603 ; CHECK-LABEL: @fmed3_0_1_nan_f32(
1604 ; CHECK-NEXT:    ret float 1.000000e+00
1605 ;
1606   %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float 0x7FF8001000000000)
1607   ret float %med
1608 }
1609
1610 define float @fmed3_undef_0_1_f32() {
1611 ; CHECK-LABEL: @fmed3_undef_0_1_f32(
1612 ; CHECK-NEXT:    ret float 0.000000e+00
1613 ;
1614   %med3 = call float @llvm.amdgcn.fmed3.f32(float undef, float 0.0, float 1.0)
1615   ret float %med3
1616 }
1617
1618 define float @fmed3_0_undef_1_f32() {
1619 ; CHECK-LABEL: @fmed3_0_undef_1_f32(
1620 ; CHECK-NEXT:    ret float 0.000000e+00
1621 ;
1622   %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float undef, float 1.0)
1623   ret float %med
1624 }
1625
1626 define float @fmed3_0_1_undef_f32() {
1627 ; CHECK-LABEL: @fmed3_0_1_undef_f32(
1628 ; CHECK-NEXT:    ret float 1.000000e+00
1629 ;
1630   %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float undef)
1631   ret float %med
1632 }
1633
1634 ; --------------------------------------------------------------------
1635 ; llvm.amdgcn.icmp
1636 ; --------------------------------------------------------------------
1637
1638 declare i64 @llvm.amdgcn.icmp.i64.i32(i32, i32, i32 immarg) nounwind readnone convergent
1639 declare i64 @llvm.amdgcn.icmp.i64.i64(i64, i64, i32 immarg) nounwind readnone convergent
1640 declare i64 @llvm.amdgcn.icmp.i64.i1(i1, i1, i32 immarg) nounwind readnone convergent
1641
1642 define i64 @invalid_icmp_code(i32 %a, i32 %b) {
1643 ; CHECK-LABEL: @invalid_icmp_code(
1644 ; CHECK-NEXT:    [[UNDER:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 31)
1645 ; CHECK-NEXT:    [[OVER:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[A]], i32 [[B]], i32 42)
1646 ; CHECK-NEXT:    [[OR:%.*]] = or i64 [[UNDER]], [[OVER]]
1647 ; CHECK-NEXT:    ret i64 [[OR]]
1648 ;
1649   %under = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %a, i32 %b, i32 31)
1650   %over = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %a, i32 %b, i32 42)
1651   %or = or i64 %under, %over
1652   ret i64 %or
1653 }
1654
1655 define i64 @icmp_constant_inputs_false() {
1656 ; CHECK-LABEL: @icmp_constant_inputs_false(
1657 ; CHECK-NEXT:    ret i64 0
1658 ;
1659   %result = call i64 @llvm.amdgcn.icmp.i64.i32(i32 9, i32 8, i32 32)
1660   ret i64 %result
1661 }
1662
1663 define i64 @icmp_constant_inputs_true() {
1664 ; CHECK-LABEL: @icmp_constant_inputs_true(
1665 ; CHECK-NEXT:    [[RESULT:%.*]] = call i64 @llvm.read_register.i64(metadata !0) [[ATTR12:#.*]]
1666 ; CHECK-NEXT:    ret i64 [[RESULT]]
1667 ;
1668   %result = call i64 @llvm.amdgcn.icmp.i64.i32(i32 9, i32 8, i32 34)
1669   ret i64 %result
1670 }
1671
1672 define i64 @icmp_constant_to_rhs_slt(i32 %x) {
1673 ; CHECK-LABEL: @icmp_constant_to_rhs_slt(
1674 ; CHECK-NEXT:    [[RESULT:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[X:%.*]], i32 9, i32 38)
1675 ; CHECK-NEXT:    ret i64 [[RESULT]]
1676 ;
1677   %result = call i64 @llvm.amdgcn.icmp.i64.i32(i32 9, i32 %x, i32 40)
1678   ret i64 %result
1679 }
1680
1681 define i64 @fold_icmp_ne_0_zext_icmp_eq_i32(i32 %a, i32 %b) {
1682 ; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_eq_i32(
1683 ; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 32)
1684 ; CHECK-NEXT:    ret i64 [[MASK]]
1685 ;
1686   %cmp = icmp eq i32 %a, %b
1687   %zext.cmp = zext i1 %cmp to i32
1688   %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33)
1689   ret i64 %mask
1690 }
1691
1692 define i64 @fold_icmp_ne_0_zext_icmp_ne_i32(i32 %a, i32 %b) {
1693 ; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_ne_i32(
1694 ; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 33)
1695 ; CHECK-NEXT:    ret i64 [[MASK]]
1696 ;
1697   %cmp = icmp ne i32 %a, %b
1698   %zext.cmp = zext i1 %cmp to i32
1699   %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33)
1700   ret i64 %mask
1701 }
1702
1703 define i64 @fold_icmp_ne_0_zext_icmp_sle_i32(i32 %a, i32 %b) {
1704 ; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_sle_i32(
1705 ; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 41)
1706 ; CHECK-NEXT:    ret i64 [[MASK]]
1707 ;
1708   %cmp = icmp sle i32 %a, %b
1709   %zext.cmp = zext i1 %cmp to i32
1710   %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33)
1711   ret i64 %mask
1712 }
1713
1714 define i64 @fold_icmp_ne_0_zext_icmp_ugt_i64(i64 %a, i64 %b) {
1715 ; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_ugt_i64(
1716 ; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i64(i64 [[A:%.*]], i64 [[B:%.*]], i32 34)
1717 ; CHECK-NEXT:    ret i64 [[MASK]]
1718 ;
1719   %cmp = icmp ugt i64 %a, %b
1720   %zext.cmp = zext i1 %cmp to i32
1721   %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33)
1722   ret i64 %mask
1723 }
1724
1725 define i64 @fold_icmp_ne_0_zext_icmp_ult_swap_i64(i64 %a, i64 %b) {
1726 ; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_ult_swap_i64(
1727 ; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i64(i64 [[A:%.*]], i64 [[B:%.*]], i32 34)
1728 ; CHECK-NEXT:    ret i64 [[MASK]]
1729 ;
1730   %cmp = icmp ugt i64 %a, %b
1731   %zext.cmp = zext i1 %cmp to i32
1732   %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 0, i32 %zext.cmp, i32 33)
1733   ret i64 %mask
1734 }
1735
1736 define i64 @fold_icmp_ne_0_zext_fcmp_oeq_f32(float %a, float %b) {
1737 ; CHECK-LABEL: @fold_icmp_ne_0_zext_fcmp_oeq_f32(
1738 ; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.fcmp.i64.f32(float [[A:%.*]], float [[B:%.*]], i32 1)
1739 ; CHECK-NEXT:    ret i64 [[MASK]]
1740 ;
1741   %cmp = fcmp oeq float %a, %b
1742   %zext.cmp = zext i1 %cmp to i32
1743   %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33)
1744   ret i64 %mask
1745 }
1746
1747 define i64 @fold_icmp_ne_0_zext_fcmp_une_f32(float %a, float %b) {
1748 ; CHECK-LABEL: @fold_icmp_ne_0_zext_fcmp_une_f32(
1749 ; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.fcmp.i64.f32(float [[A:%.*]], float [[B:%.*]], i32 14)
1750 ; CHECK-NEXT:    ret i64 [[MASK]]
1751 ;
1752   %cmp = fcmp une float %a, %b
1753   %zext.cmp = zext i1 %cmp to i32
1754   %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33)
1755   ret i64 %mask
1756 }
1757
1758 define i64 @fold_icmp_ne_0_zext_fcmp_olt_f64(double %a, double %b) {
1759 ; CHECK-LABEL: @fold_icmp_ne_0_zext_fcmp_olt_f64(
1760 ; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.fcmp.i64.f64(double [[A:%.*]], double [[B:%.*]], i32 4)
1761 ; CHECK-NEXT:    ret i64 [[MASK]]
1762 ;
1763   %cmp = fcmp olt double %a, %b
1764   %zext.cmp = zext i1 %cmp to i32
1765   %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33)
1766   ret i64 %mask
1767 }
1768
1769 define i64 @fold_icmp_sext_icmp_ne_0_i32(i32 %a, i32 %b) {
1770 ; CHECK-LABEL: @fold_icmp_sext_icmp_ne_0_i32(
1771 ; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 32)
1772 ; CHECK-NEXT:    ret i64 [[MASK]]
1773 ;
1774   %cmp = icmp eq i32 %a, %b
1775   %sext.cmp = sext i1 %cmp to i32
1776   %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %sext.cmp, i32 0, i32 33)
1777   ret i64 %mask
1778 }
1779
1780 define i64 @fold_icmp_eq_0_zext_icmp_eq_i32(i32 %a, i32 %b) {
1781 ; CHECK-LABEL: @fold_icmp_eq_0_zext_icmp_eq_i32(
1782 ; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 33)
1783 ; CHECK-NEXT:    ret i64 [[MASK]]
1784 ;
1785   %cmp = icmp eq i32 %a, %b
1786   %zext.cmp = zext i1 %cmp to i32
1787   %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 32)
1788   ret i64 %mask
1789 }
1790
1791 define i64 @fold_icmp_eq_0_zext_icmp_slt_i32(i32 %a, i32 %b) {
1792 ; CHECK-LABEL: @fold_icmp_eq_0_zext_icmp_slt_i32(
1793 ; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 39)
1794 ; CHECK-NEXT:    ret i64 [[MASK]]
1795 ;
1796   %cmp = icmp slt i32 %a, %b
1797   %zext.cmp = zext i1 %cmp to i32
1798   %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 32)
1799   ret i64 %mask
1800 }
1801
1802 define i64 @fold_icmp_eq_0_zext_fcmp_oeq_f32(float %a, float %b) {
1803 ; CHECK-LABEL: @fold_icmp_eq_0_zext_fcmp_oeq_f32(
1804 ; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.fcmp.i64.f32(float [[A:%.*]], float [[B:%.*]], i32 14)
1805 ; CHECK-NEXT:    ret i64 [[MASK]]
1806 ;
1807   %cmp = fcmp oeq float %a, %b
1808   %zext.cmp = zext i1 %cmp to i32
1809   %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 32)
1810   ret i64 %mask
1811 }
1812
1813 define i64 @fold_icmp_eq_0_zext_fcmp_ule_f32(float %a, float %b) {
1814 ; CHECK-LABEL: @fold_icmp_eq_0_zext_fcmp_ule_f32(
1815 ; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.fcmp.i64.f32(float [[A:%.*]], float [[B:%.*]], i32 2)
1816 ; CHECK-NEXT:    ret i64 [[MASK]]
1817 ;
1818   %cmp = fcmp ule float %a, %b
1819   %zext.cmp = zext i1 %cmp to i32
1820   %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 32)
1821   ret i64 %mask
1822 }
1823
1824 define i64 @fold_icmp_eq_0_zext_fcmp_ogt_f32(float %a, float %b) {
1825 ; CHECK-LABEL: @fold_icmp_eq_0_zext_fcmp_ogt_f32(
1826 ; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.fcmp.i64.f32(float [[A:%.*]], float [[B:%.*]], i32 13)
1827 ; CHECK-NEXT:    ret i64 [[MASK]]
1828 ;
1829   %cmp = fcmp ogt float %a, %b
1830   %zext.cmp = zext i1 %cmp to i32
1831   %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 32)
1832   ret i64 %mask
1833 }
1834
1835 define i64 @fold_icmp_zext_icmp_eq_1_i32(i32 %a, i32 %b) {
1836 ; CHECK-LABEL: @fold_icmp_zext_icmp_eq_1_i32(
1837 ; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 32)
1838 ; CHECK-NEXT:    ret i64 [[MASK]]
1839 ;
1840   %cmp = icmp eq i32 %a, %b
1841   %zext.cmp = zext i1 %cmp to i32
1842   %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 1, i32 32)
1843   ret i64 %mask
1844 }
1845
1846 define i64 @fold_icmp_zext_argi1_eq_1_i32(i1 %cond) {
1847 ; CHECK-LABEL: @fold_icmp_zext_argi1_eq_1_i32(
1848 ; CHECK-NEXT:    [[ZEXT_COND:%.*]] = zext i1 [[COND:%.*]] to i32
1849 ; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[ZEXT_COND]], i32 0, i32 33)
1850 ; CHECK-NEXT:    ret i64 [[MASK]]
1851 ;
1852   %zext.cond = zext i1 %cond to i32
1853   %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cond, i32 1, i32 32)
1854   ret i64 %mask
1855 }
1856
1857 define i64 @fold_icmp_zext_argi1_eq_neg1_i32(i1 %cond) {
1858 ; CHECK-LABEL: @fold_icmp_zext_argi1_eq_neg1_i32(
1859 ; CHECK-NEXT:    [[ZEXT_COND:%.*]] = zext i1 [[COND:%.*]] to i32
1860 ; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[ZEXT_COND]], i32 -1, i32 32)
1861 ; CHECK-NEXT:    ret i64 [[MASK]]
1862 ;
1863   %zext.cond = zext i1 %cond to i32
1864   %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cond, i32 -1, i32 32)
1865   ret i64 %mask
1866 }
1867
1868 define i64 @fold_icmp_sext_argi1_eq_1_i32(i1 %cond) {
1869 ; CHECK-LABEL: @fold_icmp_sext_argi1_eq_1_i32(
1870 ; CHECK-NEXT:    [[SEXT_COND:%.*]] = sext i1 [[COND:%.*]] to i32
1871 ; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[SEXT_COND]], i32 1, i32 32)
1872 ; CHECK-NEXT:    ret i64 [[MASK]]
1873 ;
1874   %sext.cond = sext i1 %cond to i32
1875   %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %sext.cond, i32 1, i32 32)
1876   ret i64 %mask
1877 }
1878
1879 define i64 @fold_icmp_sext_argi1_eq_neg1_i32(i1 %cond) {
1880 ; CHECK-LABEL: @fold_icmp_sext_argi1_eq_neg1_i32(
1881 ; CHECK-NEXT:    [[SEXT_COND:%.*]] = sext i1 [[COND:%.*]] to i32
1882 ; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[SEXT_COND]], i32 0, i32 33)
1883 ; CHECK-NEXT:    ret i64 [[MASK]]
1884 ;
1885   %sext.cond = sext i1 %cond to i32
1886   %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %sext.cond, i32 -1, i32 32)
1887   ret i64 %mask
1888 }
1889
1890 define i64 @fold_icmp_sext_argi1_eq_neg1_i64(i1 %cond) {
1891 ; CHECK-LABEL: @fold_icmp_sext_argi1_eq_neg1_i64(
1892 ; CHECK-NEXT:    [[SEXT_COND:%.*]] = sext i1 [[COND:%.*]] to i64
1893 ; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i64(i64 [[SEXT_COND]], i64 0, i32 33)
1894 ; CHECK-NEXT:    ret i64 [[MASK]]
1895 ;
1896   %sext.cond = sext i1 %cond to i64
1897   %mask = call i64 @llvm.amdgcn.icmp.i64.i64(i64 %sext.cond, i64 -1, i32 32)
1898   ret i64 %mask
1899 }
1900
1901 ; TODO: Should be able to fold to false
1902 define i64 @fold_icmp_sext_icmp_eq_1_i32(i32 %a, i32 %b) {
1903 ; CHECK-LABEL: @fold_icmp_sext_icmp_eq_1_i32(
1904 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[A:%.*]], [[B:%.*]]
1905 ; CHECK-NEXT:    [[SEXT_CMP:%.*]] = sext i1 [[CMP]] to i32
1906 ; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[SEXT_CMP]], i32 1, i32 32)
1907 ; CHECK-NEXT:    ret i64 [[MASK]]
1908 ;
1909   %cmp = icmp eq i32 %a, %b
1910   %sext.cmp = sext i1 %cmp to i32
1911   %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %sext.cmp, i32 1, i32 32)
1912   ret i64 %mask
1913 }
1914
1915 define i64 @fold_icmp_sext_icmp_eq_neg1_i32(i32 %a, i32 %b) {
1916 ; CHECK-LABEL: @fold_icmp_sext_icmp_eq_neg1_i32(
1917 ; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 32)
1918 ; CHECK-NEXT:    ret i64 [[MASK]]
1919 ;
1920   %cmp = icmp eq i32 %a, %b
1921   %sext.cmp = sext i1 %cmp to i32
1922   %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %sext.cmp, i32 -1, i32 32)
1923   ret i64 %mask
1924 }
1925
1926 define i64 @fold_icmp_sext_icmp_sge_neg1_i32(i32 %a, i32 %b) {
1927 ; CHECK-LABEL: @fold_icmp_sext_icmp_sge_neg1_i32(
1928 ; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 39)
1929 ; CHECK-NEXT:    ret i64 [[MASK]]
1930 ;
1931   %cmp = icmp sge i32 %a, %b
1932   %sext.cmp = sext i1 %cmp to i32
1933   %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %sext.cmp, i32 -1, i32 32)
1934   ret i64 %mask
1935 }
1936
1937 define i64 @fold_not_icmp_ne_0_zext_icmp_sle_i32(i32 %a, i32 %b) {
1938 ; CHECK-LABEL: @fold_not_icmp_ne_0_zext_icmp_sle_i32(
1939 ; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 38)
1940 ; CHECK-NEXT:    ret i64 [[MASK]]
1941 ;
1942   %cmp = icmp sle i32 %a, %b
1943   %not = xor i1 %cmp, true
1944   %zext.cmp = zext i1 %not to i32
1945   %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33)
1946   ret i64 %mask
1947 }
1948
1949 define i64 @fold_icmp_ne_0_zext_icmp_eq_i4(i4 %a, i4 %b) {
1950 ; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_eq_i4(
1951 ; CHECK-NEXT:    [[TMP1:%.*]] = zext i4 [[A:%.*]] to i16
1952 ; CHECK-NEXT:    [[TMP2:%.*]] = zext i4 [[B:%.*]] to i16
1953 ; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i16(i16 [[TMP1]], i16 [[TMP2]], i32 32)
1954 ; CHECK-NEXT:    ret i64 [[MASK]]
1955 ;
1956   %cmp = icmp eq i4 %a, %b
1957   %zext.cmp = zext i1 %cmp to i32
1958   %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33)
1959   ret i64 %mask
1960 }
1961
1962 define i64 @fold_icmp_ne_0_zext_icmp_eq_i8(i8 %a, i8 %b) {
1963 ; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_eq_i8(
1964 ; CHECK-NEXT:    [[TMP1:%.*]] = zext i8 [[A:%.*]] to i16
1965 ; CHECK-NEXT:    [[TMP2:%.*]] = zext i8 [[B:%.*]] to i16
1966 ; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i16(i16 [[TMP1]], i16 [[TMP2]], i32 32)
1967 ; CHECK-NEXT:    ret i64 [[MASK]]
1968 ;
1969   %cmp = icmp eq i8 %a, %b
1970   %zext.cmp = zext i1 %cmp to i32
1971   %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33)
1972   ret i64 %mask
1973 }
1974
1975 define i64 @fold_icmp_ne_0_zext_icmp_eq_i16(i16 %a, i16 %b) {
1976 ; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_eq_i16(
1977 ; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i16(i16 [[A:%.*]], i16 [[B:%.*]], i32 32)
1978 ; CHECK-NEXT:    ret i64 [[MASK]]
1979 ;
1980   %cmp = icmp eq i16 %a, %b
1981   %zext.cmp = zext i1 %cmp to i32
1982   %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33)
1983   ret i64 %mask
1984 }
1985
1986 define i64 @fold_icmp_ne_0_zext_icmp_eq_i36(i36 %a, i36 %b) {
1987 ; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_eq_i36(
1988 ; CHECK-NEXT:    [[TMP1:%.*]] = zext i36 [[A:%.*]] to i64
1989 ; CHECK-NEXT:    [[TMP2:%.*]] = zext i36 [[B:%.*]] to i64
1990 ; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i64(i64 [[TMP1]], i64 [[TMP2]], i32 32)
1991 ; CHECK-NEXT:    ret i64 [[MASK]]
1992 ;
1993   %cmp = icmp eq i36 %a, %b
1994   %zext.cmp = zext i1 %cmp to i32
1995   %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33)
1996   ret i64 %mask
1997 }
1998
1999 define i64 @fold_icmp_ne_0_zext_icmp_eq_i128(i128 %a, i128 %b) {
2000 ; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_eq_i128(
2001 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i128 [[A:%.*]], [[B:%.*]]
2002 ; CHECK-NEXT:    [[ZEXT_CMP:%.*]] = zext i1 [[CMP]] to i32
2003 ; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[ZEXT_CMP]], i32 0, i32 33)
2004 ; CHECK-NEXT:    ret i64 [[MASK]]
2005 ;
2006   %cmp = icmp eq i128 %a, %b
2007   %zext.cmp = zext i1 %cmp to i32
2008   %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33)
2009   ret i64 %mask
2010 }
2011
2012 define i64 @fold_icmp_ne_0_zext_fcmp_oeq_f16(half %a, half %b) {
2013 ; CHECK-LABEL: @fold_icmp_ne_0_zext_fcmp_oeq_f16(
2014 ; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.fcmp.i64.f16(half [[A:%.*]], half [[B:%.*]], i32 1)
2015 ; CHECK-NEXT:    ret i64 [[MASK]]
2016 ;
2017   %cmp = fcmp oeq half %a, %b
2018   %zext.cmp = zext i1 %cmp to i32
2019   %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33)
2020   ret i64 %mask
2021 }
2022
2023 define i64 @fold_icmp_ne_0_zext_fcmp_oeq_f128(fp128 %a, fp128 %b) {
2024 ; CHECK-LABEL: @fold_icmp_ne_0_zext_fcmp_oeq_f128(
2025 ; CHECK-NEXT:    [[CMP:%.*]] = fcmp oeq fp128 [[A:%.*]], [[B:%.*]]
2026 ; CHECK-NEXT:    [[ZEXT_CMP:%.*]] = zext i1 [[CMP]] to i32
2027 ; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[ZEXT_CMP]], i32 0, i32 33)
2028 ; CHECK-NEXT:    ret i64 [[MASK]]
2029 ;
2030   %cmp = fcmp oeq fp128 %a, %b
2031   %zext.cmp = zext i1 %cmp to i32
2032   %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33)
2033   ret i64 %mask
2034 }
2035
2036 define i64 @fold_icmp_ne_0_zext_icmp_slt_i4(i4 %a, i4 %b) {
2037 ; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_slt_i4(
2038 ; CHECK-NEXT:    [[TMP1:%.*]] = sext i4 [[A:%.*]] to i16
2039 ; CHECK-NEXT:    [[TMP2:%.*]] = sext i4 [[B:%.*]] to i16
2040 ; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i16(i16 [[TMP1]], i16 [[TMP2]], i32 40)
2041 ; CHECK-NEXT:    ret i64 [[MASK]]
2042 ;
2043   %cmp = icmp slt i4 %a, %b
2044   %zext.cmp = zext i1 %cmp to i32
2045   %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33)
2046   ret i64 %mask
2047 }
2048
2049 define i64 @fold_icmp_ne_0_zext_icmp_slt_i8(i8 %a, i8 %b) {
2050 ; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_slt_i8(
2051 ; CHECK-NEXT:    [[TMP1:%.*]] = sext i8 [[A:%.*]] to i16
2052 ; CHECK-NEXT:    [[TMP2:%.*]] = sext i8 [[B:%.*]] to i16
2053 ; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i16(i16 [[TMP1]], i16 [[TMP2]], i32 40)
2054 ; CHECK-NEXT:    ret i64 [[MASK]]
2055 ;
2056   %cmp = icmp slt i8 %a, %b
2057   %zext.cmp = zext i1 %cmp to i32
2058   %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33)
2059   ret i64 %mask
2060 }
2061
2062 define i64 @fold_icmp_ne_0_zext_icmp_slt_i16(i16 %a, i16 %b) {
2063 ; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_slt_i16(
2064 ; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i16(i16 [[A:%.*]], i16 [[B:%.*]], i32 40)
2065 ; CHECK-NEXT:    ret i64 [[MASK]]
2066 ;
2067   %cmp = icmp slt i16 %a, %b
2068   %zext.cmp = zext i1 %cmp to i32
2069   %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33)
2070   ret i64 %mask
2071 }
2072
2073 define i64 @fold_icmp_ne_0_zext_icmp_ult_i4(i4 %a, i4 %b) {
2074 ; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_ult_i4(
2075 ; CHECK-NEXT:    [[TMP1:%.*]] = zext i4 [[A:%.*]] to i16
2076 ; CHECK-NEXT:    [[TMP2:%.*]] = zext i4 [[B:%.*]] to i16
2077 ; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i16(i16 [[TMP1]], i16 [[TMP2]], i32 36)
2078 ; CHECK-NEXT:    ret i64 [[MASK]]
2079 ;
2080   %cmp = icmp ult i4 %a, %b
2081   %zext.cmp = zext i1 %cmp to i32
2082   %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33)
2083   ret i64 %mask
2084 }
2085
2086 define i64 @fold_icmp_ne_0_zext_icmp_ult_i8(i8 %a, i8 %b) {
2087 ; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_ult_i8(
2088 ; CHECK-NEXT:    [[TMP1:%.*]] = zext i8 [[A:%.*]] to i16
2089 ; CHECK-NEXT:    [[TMP2:%.*]] = zext i8 [[B:%.*]] to i16
2090 ; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i16(i16 [[TMP1]], i16 [[TMP2]], i32 36)
2091 ; CHECK-NEXT:    ret i64 [[MASK]]
2092 ;
2093   %cmp = icmp ult i8 %a, %b
2094   %zext.cmp = zext i1 %cmp to i32
2095   %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33)
2096   ret i64 %mask
2097 }
2098
2099 define i64 @fold_icmp_ne_0_zext_icmp_ult_i16(i16 %a, i16 %b) {
2100 ; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_ult_i16(
2101 ; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i16(i16 [[A:%.*]], i16 [[B:%.*]], i32 36)
2102 ; CHECK-NEXT:    ret i64 [[MASK]]
2103 ;
2104   %cmp = icmp ult i16 %a, %b
2105   %zext.cmp = zext i1 %cmp to i32
2106   %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33)
2107   ret i64 %mask
2108 }
2109
2110 ; 1-bit NE comparisons
2111
2112 define i64 @fold_icmp_i1_ne_0_icmp_eq_i1(i32 %a, i32 %b) {
2113 ; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_eq_i1(
2114 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[A:%.*]], [[B:%.*]]
2115 ; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33)
2116 ; CHECK-NEXT:    ret i64 [[MASK]]
2117 ;
2118   %cmp = icmp eq i32 %a, %b
2119   %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33)
2120   ret i64 %mask
2121 }
2122
2123 define i64 @fold_icmp_i1_ne_0_icmp_ne_i1(i32 %a, i32 %b) {
2124 ; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_ne_i1(
2125 ; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[A:%.*]], [[B:%.*]]
2126 ; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33)
2127 ; CHECK-NEXT:    ret i64 [[MASK]]
2128 ;
2129   %cmp = icmp ne i32 %a, %b
2130   %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33)
2131   ret i64 %mask
2132 }
2133
2134 define i64 @fold_icmp_i1_ne_0_icmp_sle_i1(i32 %a, i32 %b) {
2135 ; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_sle_i1(
2136 ; CHECK-NEXT:    [[CMP:%.*]] = icmp sle i32 [[A:%.*]], [[B:%.*]]
2137 ; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33)
2138 ; CHECK-NEXT:    ret i64 [[MASK]]
2139 ;
2140   %cmp = icmp sle i32 %a, %b
2141   %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33)
2142   ret i64 %mask
2143 }
2144
2145 define i64 @fold_icmp_i1_ne_0_icmp_ugt_i64(i64 %a, i64 %b) {
2146 ; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_ugt_i64(
2147 ; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i64 [[A:%.*]], [[B:%.*]]
2148 ; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33)
2149 ; CHECK-NEXT:    ret i64 [[MASK]]
2150 ;
2151   %cmp = icmp ugt i64 %a, %b
2152   %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33)
2153   ret i64 %mask
2154 }
2155
2156 define i64 @fold_icmp_i1_ne_0_icmp_ult_swap_i64(i64 %a, i64 %b) {
2157 ; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_ult_swap_i64(
2158 ; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i64 [[A:%.*]], [[B:%.*]]
2159 ; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33)
2160 ; CHECK-NEXT:    ret i64 [[MASK]]
2161 ;
2162   %cmp = icmp ugt i64 %a, %b
2163   %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 false, i1 %cmp, i32 33)
2164   ret i64 %mask
2165 }
2166
2167 define i64 @fold_icmp_i1_ne_0_fcmp_oeq_f32(float %a, float %b) {
2168 ; CHECK-LABEL: @fold_icmp_i1_ne_0_fcmp_oeq_f32(
2169 ; CHECK-NEXT:    [[CMP:%.*]] = fcmp oeq float [[A:%.*]], [[B:%.*]]
2170 ; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33)
2171 ; CHECK-NEXT:    ret i64 [[MASK]]
2172 ;
2173   %cmp = fcmp oeq float %a, %b
2174   %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33)
2175   ret i64 %mask
2176 }
2177
2178 define i64 @fold_icmp_i1_ne_0_fcmp_une_f32(float %a, float %b) {
2179 ; CHECK-LABEL: @fold_icmp_i1_ne_0_fcmp_une_f32(
2180 ; CHECK-NEXT:    [[CMP:%.*]] = fcmp une float [[A:%.*]], [[B:%.*]]
2181 ; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33)
2182 ; CHECK-NEXT:    ret i64 [[MASK]]
2183 ;
2184   %cmp = fcmp une float %a, %b
2185   %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33)
2186   ret i64 %mask
2187 }
2188
2189 define i64 @fold_icmp_i1_ne_0_fcmp_olt_f64(double %a, double %b) {
2190 ; CHECK-LABEL: @fold_icmp_i1_ne_0_fcmp_olt_f64(
2191 ; CHECK-NEXT:    [[CMP:%.*]] = fcmp olt double [[A:%.*]], [[B:%.*]]
2192 ; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33)
2193 ; CHECK-NEXT:    ret i64 [[MASK]]
2194 ;
2195   %cmp = fcmp olt double %a, %b
2196   %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33)
2197   ret i64 %mask
2198 }
2199
2200 define i64 @fold_icmp_i1_ne_0_icmp_eq_i4(i4 %a, i4 %b) {
2201 ; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_eq_i4(
2202 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i4 [[A:%.*]], [[B:%.*]]
2203 ; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33)
2204 ; CHECK-NEXT:    ret i64 [[MASK]]
2205 ;
2206   %cmp = icmp eq i4 %a, %b
2207   %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33)
2208   ret i64 %mask
2209 }
2210
2211 define i64 @fold_icmp_i1_ne_0_icmp_eq_i8(i8 %a, i8 %b) {
2212 ; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_eq_i8(
2213 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8 [[A:%.*]], [[B:%.*]]
2214 ; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33)
2215 ; CHECK-NEXT:    ret i64 [[MASK]]
2216 ;
2217   %cmp = icmp eq i8 %a, %b
2218   %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33)
2219   ret i64 %mask
2220 }
2221
2222 define i64 @fold_icmp_i1_ne_0_icmp_eq_i16(i16 %a, i16 %b) {
2223 ; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_eq_i16(
2224 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i16 [[A:%.*]], [[B:%.*]]
2225 ; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33)
2226 ; CHECK-NEXT:    ret i64 [[MASK]]
2227 ;
2228   %cmp = icmp eq i16 %a, %b
2229   %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33)
2230   ret i64 %mask
2231 }
2232
2233 define i64 @fold_icmp_i1_ne_0_icmp_eq_i36(i36 %a, i36 %b) {
2234 ; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_eq_i36(
2235 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i36 [[A:%.*]], [[B:%.*]]
2236 ; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33)
2237 ; CHECK-NEXT:    ret i64 [[MASK]]
2238 ;
2239   %cmp = icmp eq i36 %a, %b
2240   %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33)
2241   ret i64 %mask
2242 }
2243
2244 define i64 @fold_icmp_i1_ne_0_icmp_eq_i128(i128 %a, i128 %b) {
2245 ; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_eq_i128(
2246 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i128 [[A:%.*]], [[B:%.*]]
2247 ; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33)
2248 ; CHECK-NEXT:    ret i64 [[MASK]]
2249 ;
2250   %cmp = icmp eq i128 %a, %b
2251   %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33)
2252   ret i64 %mask
2253 }
2254
2255 define i64 @fold_icmp_i1_ne_0_fcmp_oeq_f16(half %a, half %b) {
2256 ; CHECK-LABEL: @fold_icmp_i1_ne_0_fcmp_oeq_f16(
2257 ; CHECK-NEXT:    [[CMP:%.*]] = fcmp oeq half [[A:%.*]], [[B:%.*]]
2258 ; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33)
2259 ; CHECK-NEXT:    ret i64 [[MASK]]
2260 ;
2261   %cmp = fcmp oeq half %a, %b
2262   %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33)
2263   ret i64 %mask
2264 }
2265
2266 define i64 @fold_icmp_i1_ne_0_fcmp_oeq_f128(fp128 %a, fp128 %b) {
2267 ; CHECK-LABEL: @fold_icmp_i1_ne_0_fcmp_oeq_f128(
2268 ; CHECK-NEXT:    [[CMP:%.*]] = fcmp oeq fp128 [[A:%.*]], [[B:%.*]]
2269 ; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33)
2270 ; CHECK-NEXT:    ret i64 [[MASK]]
2271 ;
2272   %cmp = fcmp oeq fp128 %a, %b
2273   %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33)
2274   ret i64 %mask
2275 }
2276
2277 define i64 @fold_icmp_i1_ne_0_icmp_slt_i4(i4 %a, i4 %b) {
2278 ; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_slt_i4(
2279 ; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i4 [[A:%.*]], [[B:%.*]]
2280 ; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33)
2281 ; CHECK-NEXT:    ret i64 [[MASK]]
2282 ;
2283   %cmp = icmp slt i4 %a, %b
2284   %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33)
2285   ret i64 %mask
2286 }
2287
2288 define i64 @fold_icmp_i1_ne_0_icmp_slt_i8(i8 %a, i8 %b) {
2289 ; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_slt_i8(
2290 ; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 [[A:%.*]], [[B:%.*]]
2291 ; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33)
2292 ; CHECK-NEXT:    ret i64 [[MASK]]
2293 ;
2294   %cmp = icmp slt i8 %a, %b
2295   %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33)
2296   ret i64 %mask
2297 }
2298
2299 define i64 @fold_icmp_i1_ne_0_icmp_slt_i16(i16 %a, i16 %b) {
2300 ; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_slt_i16(
2301 ; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i16 [[A:%.*]], [[B:%.*]]
2302 ; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33)
2303 ; CHECK-NEXT:    ret i64 [[MASK]]
2304 ;
2305   %cmp = icmp slt i16 %a, %b
2306   %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33)
2307   ret i64 %mask
2308 }
2309
2310 define i64 @fold_icmp_i1_ne_0_icmp_ult_i4(i4 %a, i4 %b) {
2311 ; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_ult_i4(
2312 ; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i4 [[A:%.*]], [[B:%.*]]
2313 ; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33)
2314 ; CHECK-NEXT:    ret i64 [[MASK]]
2315 ;
2316   %cmp = icmp ult i4 %a, %b
2317   %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33)
2318   ret i64 %mask
2319 }
2320
2321 define i64 @fold_icmp_i1_ne_0_icmp_ult_i8(i8 %a, i8 %b) {
2322 ; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_ult_i8(
2323 ; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i8 [[A:%.*]], [[B:%.*]]
2324 ; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33)
2325 ; CHECK-NEXT:    ret i64 [[MASK]]
2326 ;
2327   %cmp = icmp ult i8 %a, %b
2328   %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33)
2329   ret i64 %mask
2330 }
2331
2332 define i64 @fold_icmp_i1_ne_0_icmp_ult_i16(i16 %a, i16 %b) {
2333 ; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_ult_i16(
2334 ; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i16 [[A:%.*]], [[B:%.*]]
2335 ; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33)
2336 ; CHECK-NEXT:    ret i64 [[MASK]]
2337 ;
2338   %cmp = icmp ult i16 %a, %b
2339   %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33)
2340   ret i64 %mask
2341 }
2342
2343 ; --------------------------------------------------------------------
2344 ; llvm.amdgcn.fcmp
2345 ; --------------------------------------------------------------------
2346
2347 declare i64 @llvm.amdgcn.fcmp.i64.f32(float, float, i32 immarg) nounwind readnone convergent
2348
2349 define i64 @invalid_fcmp_code(float %a, float %b) {
2350 ; CHECK-LABEL: @invalid_fcmp_code(
2351 ; CHECK-NEXT:    [[UNDER:%.*]] = call i64 @llvm.amdgcn.fcmp.i64.f32(float [[A:%.*]], float [[B:%.*]], i32 -1)
2352 ; CHECK-NEXT:    [[OVER:%.*]] = call i64 @llvm.amdgcn.fcmp.i64.f32(float [[A]], float [[B]], i32 16)
2353 ; CHECK-NEXT:    [[OR:%.*]] = or i64 [[UNDER]], [[OVER]]
2354 ; CHECK-NEXT:    ret i64 [[OR]]
2355 ;
2356   %under = call i64 @llvm.amdgcn.fcmp.i64.f32(float %a, float %b, i32 -1)
2357   %over = call i64 @llvm.amdgcn.fcmp.i64.f32(float %a, float %b, i32 16)
2358   %or = or i64 %under, %over
2359   ret i64 %or
2360 }
2361
2362 define i64 @fcmp_constant_inputs_false() {
2363 ; CHECK-LABEL: @fcmp_constant_inputs_false(
2364 ; CHECK-NEXT:    ret i64 0
2365 ;
2366   %result = call i64 @llvm.amdgcn.fcmp.i64.f32(float 2.0, float 4.0, i32 1)
2367   ret i64 %result
2368 }
2369
2370 define i64 @fcmp_constant_inputs_true() {
2371 ; CHECK-LABEL: @fcmp_constant_inputs_true(
2372 ; CHECK-NEXT:    [[RESULT:%.*]] = call i64 @llvm.read_register.i64(metadata !0) [[ATTR12]]
2373 ; CHECK-NEXT:    ret i64 [[RESULT]]
2374 ;
2375   %result = call i64 @llvm.amdgcn.fcmp.i64.f32(float 2.0, float 4.0, i32 4)
2376   ret i64 %result
2377 }
2378
2379 define i64 @fcmp_constant_to_rhs_olt(float %x) {
2380 ; CHECK-LABEL: @fcmp_constant_to_rhs_olt(
2381 ; CHECK-NEXT:    [[RESULT:%.*]] = call i64 @llvm.amdgcn.fcmp.i64.f32(float [[X:%.*]], float 4.000000e+00, i32 2)
2382 ; CHECK-NEXT:    ret i64 [[RESULT]]
2383 ;
2384   %result = call i64 @llvm.amdgcn.fcmp.i64.f32(float 4.0, float %x, i32 4)
2385   ret i64 %result
2386 }
2387
2388 ; --------------------------------------------------------------------
2389 ; llvm.amdgcn.ballot
2390 ; --------------------------------------------------------------------
2391
2392 declare i64 @llvm.amdgcn.ballot.i64(i1) nounwind readnone convergent
2393 declare i32 @llvm.amdgcn.ballot.i32(i1) nounwind readnone convergent
2394
2395 define i64 @ballot_nocombine_64(i1 %i) {
2396 ; CHECK-LABEL: @ballot_nocombine_64(
2397 ; CHECK-NEXT:    [[B:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[I:%.*]])
2398 ; CHECK-NEXT:    ret i64 [[B]]
2399 ;
2400   %b = call i64 @llvm.amdgcn.ballot.i64(i1 %i)
2401   ret i64 %b
2402 }
2403
2404 define i64 @ballot_zero_64() {
2405 ; CHECK-LABEL: @ballot_zero_64(
2406 ; CHECK-NEXT:    ret i64 0
2407 ;
2408   %b = call i64 @llvm.amdgcn.ballot.i64(i1 0)
2409   ret i64 %b
2410 }
2411
2412 define i64 @ballot_one_64() {
2413 ; CHECK-LABEL: @ballot_one_64(
2414 ; CHECK-NEXT:    [[B:%.*]] = call i64 @llvm.read_register.i64(metadata !0) [[ATTR12]]
2415 ; CHECK-NEXT:    ret i64 [[B]]
2416 ;
2417   %b = call i64 @llvm.amdgcn.ballot.i64(i1 1)
2418   ret i64 %b
2419 }
2420
2421 define i32 @ballot_nocombine_32(i1 %i) {
2422 ; CHECK-LABEL: @ballot_nocombine_32(
2423 ; CHECK-NEXT:    [[B:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[I:%.*]])
2424 ; CHECK-NEXT:    ret i32 [[B]]
2425 ;
2426   %b = call i32 @llvm.amdgcn.ballot.i32(i1 %i)
2427   ret i32 %b
2428 }
2429
2430 define i32 @ballot_zero_32() {
2431 ; CHECK-LABEL: @ballot_zero_32(
2432 ; CHECK-NEXT:    ret i32 0
2433 ;
2434   %b = call i32 @llvm.amdgcn.ballot.i32(i1 0)
2435   ret i32 %b
2436 }
2437
2438 define i32 @ballot_one_32() {
2439 ; CHECK-LABEL: @ballot_one_32(
2440 ; CHECK-NEXT:    [[B:%.*]] = call i32 @llvm.read_register.i32(metadata !1) [[ATTR12]]
2441 ; CHECK-NEXT:    ret i32 [[B]]
2442 ;
2443   %b = call i32 @llvm.amdgcn.ballot.i32(i1 1)
2444   ret i32 %b
2445 }
2446
2447 ; --------------------------------------------------------------------
2448 ; llvm.amdgcn.wqm.vote
2449 ; --------------------------------------------------------------------
2450
2451 declare i1 @llvm.amdgcn.wqm.vote(i1)
2452
2453 define float @wqm_vote_true() {
2454 ; CHECK-LABEL: @wqm_vote_true(
2455 ; CHECK-NEXT:  main_body:
2456 ; CHECK-NEXT:    ret float 1.000000e+00
2457 ;
2458 main_body:
2459   %w = call i1 @llvm.amdgcn.wqm.vote(i1 true)
2460   %r = select i1 %w, float 1.0, float 0.0
2461   ret float %r
2462 }
2463
2464 define float @wqm_vote_false() {
2465 ; CHECK-LABEL: @wqm_vote_false(
2466 ; CHECK-NEXT:  main_body:
2467 ; CHECK-NEXT:    ret float 0.000000e+00
2468 ;
2469 main_body:
2470   %w = call i1 @llvm.amdgcn.wqm.vote(i1 false)
2471   %r = select i1 %w, float 1.0, float 0.0
2472   ret float %r
2473 }
2474
2475 define float @wqm_vote_undef() {
2476 ; CHECK-LABEL: @wqm_vote_undef(
2477 ; CHECK-NEXT:  main_body:
2478 ; CHECK-NEXT:    ret float 0.000000e+00
2479 ;
2480 main_body:
2481   %w = call i1 @llvm.amdgcn.wqm.vote(i1 undef)
2482   %r = select i1 %w, float 1.0, float 0.0
2483   ret float %r
2484 }
2485
2486 ; --------------------------------------------------------------------
2487 ; llvm.amdgcn.kill
2488 ; --------------------------------------------------------------------
2489
2490 declare void @llvm.amdgcn.kill(i1)
2491
2492 define void @kill_true() {
2493 ; CHECK-LABEL: @kill_true(
2494 ; CHECK-NEXT:    ret void
2495 ;
2496   call void @llvm.amdgcn.kill(i1 true)
2497   ret void
2498 }
2499
2500 ; --------------------------------------------------------------------
2501 ; llvm.amdgcn.readfirstlane
2502 ; --------------------------------------------------------------------
2503
2504 declare i32 @llvm.amdgcn.readfirstlane(i32)
2505
2506 @gv = constant i32 0
2507
2508 define amdgpu_kernel void @readfirstlane_constant(i32 %arg) {
2509 ; CHECK-LABEL: @readfirstlane_constant(
2510 ; CHECK-NEXT:    [[VAR:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[ARG:%.*]])
2511 ; CHECK-NEXT:    store volatile i32 [[VAR]], i32* undef, align 4
2512 ; CHECK-NEXT:    store volatile i32 0, i32* undef, align 4
2513 ; CHECK-NEXT:    store volatile i32 123, i32* undef, align 4
2514 ; CHECK-NEXT:    store volatile i32 ptrtoint (i32* @gv to i32), i32* undef, align 4
2515 ; CHECK-NEXT:    store volatile i32 undef, i32* undef, align 4
2516 ; CHECK-NEXT:    ret void
2517 ;
2518   %var = call i32 @llvm.amdgcn.readfirstlane(i32 %arg)
2519   %zero = call i32 @llvm.amdgcn.readfirstlane(i32 0)
2520   %imm = call i32 @llvm.amdgcn.readfirstlane(i32 123)
2521   %constexpr = call i32 @llvm.amdgcn.readfirstlane(i32 ptrtoint (i32* @gv to i32))
2522   %undef = call i32 @llvm.amdgcn.readfirstlane(i32 undef)
2523   store volatile i32 %var, i32* undef
2524   store volatile i32 %zero, i32* undef
2525   store volatile i32 %imm, i32* undef
2526   store volatile i32 %constexpr, i32* undef
2527   store volatile i32 %undef, i32* undef
2528   ret void
2529 }
2530
2531 define i32 @readfirstlane_idempotent(i32 %arg) {
2532 ; CHECK-LABEL: @readfirstlane_idempotent(
2533 ; CHECK-NEXT:    [[READ0:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[ARG:%.*]])
2534 ; CHECK-NEXT:    ret i32 [[READ0]]
2535 ;
2536   %read0 = call i32 @llvm.amdgcn.readfirstlane(i32 %arg)
2537   %read1 = call i32 @llvm.amdgcn.readfirstlane(i32 %read0)
2538   %read2 = call i32 @llvm.amdgcn.readfirstlane(i32 %read1)
2539   ret i32 %read2
2540 }
2541
2542 define i32 @readfirstlane_readlane(i32 %arg) {
2543 ; CHECK-LABEL: @readfirstlane_readlane(
2544 ; CHECK-NEXT:    [[READ0:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[ARG:%.*]])
2545 ; CHECK-NEXT:    ret i32 [[READ0]]
2546 ;
2547   %read0 = call i32 @llvm.amdgcn.readfirstlane(i32 %arg)
2548   %read1 = call i32 @llvm.amdgcn.readlane(i32 %read0, i32 0)
2549   ret i32 %read1
2550 }
2551
2552 define i32 @readfirstlane_readfirstlane_different_block(i32 %arg) {
2553 ; CHECK-LABEL: @readfirstlane_readfirstlane_different_block(
2554 ; CHECK-NEXT:  bb0:
2555 ; CHECK-NEXT:    [[READ0:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[ARG:%.*]])
2556 ; CHECK-NEXT:    br label [[BB1:%.*]]
2557 ; CHECK:       bb1:
2558 ; CHECK-NEXT:    [[READ1:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[READ0]])
2559 ; CHECK-NEXT:    ret i32 [[READ1]]
2560 ;
2561 bb0:
2562   %read0 = call i32 @llvm.amdgcn.readfirstlane(i32 %arg)
2563   br label %bb1
2564
2565 bb1:
2566   %read1 = call i32 @llvm.amdgcn.readfirstlane(i32 %read0)
2567   ret i32 %read1
2568 }
2569
2570 define i32 @readfirstlane_readlane_different_block(i32 %arg) {
2571 ; CHECK-LABEL: @readfirstlane_readlane_different_block(
2572 ; CHECK-NEXT:  bb0:
2573 ; CHECK-NEXT:    [[READ0:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[ARG:%.*]], i32 0)
2574 ; CHECK-NEXT:    br label [[BB1:%.*]]
2575 ; CHECK:       bb1:
2576 ; CHECK-NEXT:    [[READ1:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[READ0]])
2577 ; CHECK-NEXT:    ret i32 [[READ1]]
2578 ;
2579 bb0:
2580   %read0 = call i32 @llvm.amdgcn.readlane(i32 %arg, i32 0)
2581   br label %bb1
2582
2583 bb1:
2584   %read1 = call i32 @llvm.amdgcn.readfirstlane(i32 %read0)
2585   ret i32 %read1
2586 }
2587
2588 ; --------------------------------------------------------------------
2589 ; llvm.amdgcn.readlane
2590 ; --------------------------------------------------------------------
2591
2592 declare i32 @llvm.amdgcn.readlane(i32, i32)
2593
2594 define amdgpu_kernel void @readlane_constant(i32 %arg, i32 %lane) {
2595 ; CHECK-LABEL: @readlane_constant(
2596 ; CHECK-NEXT:    [[VAR:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[ARG:%.*]], i32 7)
2597 ; CHECK-NEXT:    store volatile i32 [[VAR]], i32* undef, align 4
2598 ; CHECK-NEXT:    store volatile i32 0, i32* undef, align 4
2599 ; CHECK-NEXT:    store volatile i32 123, i32* undef, align 4
2600 ; CHECK-NEXT:    store volatile i32 ptrtoint (i32* @gv to i32), i32* undef, align 4
2601 ; CHECK-NEXT:    store volatile i32 undef, i32* undef, align 4
2602 ; CHECK-NEXT:    ret void
2603 ;
2604   %var = call i32 @llvm.amdgcn.readlane(i32 %arg, i32 7)
2605   %zero = call i32 @llvm.amdgcn.readlane(i32 0, i32 %lane)
2606   %imm = call i32 @llvm.amdgcn.readlane(i32 123, i32 %lane)
2607   %constexpr = call i32 @llvm.amdgcn.readlane(i32 ptrtoint (i32* @gv to i32), i32 %lane)
2608   %undef = call i32 @llvm.amdgcn.readlane(i32 undef, i32 %lane)
2609   store volatile i32 %var, i32* undef
2610   store volatile i32 %zero, i32* undef
2611   store volatile i32 %imm, i32* undef
2612   store volatile i32 %constexpr, i32* undef
2613   store volatile i32 %undef, i32* undef
2614   ret void
2615 }
2616
2617 define i32 @readlane_idempotent(i32 %arg, i32 %lane) {
2618 ; CHECK-LABEL: @readlane_idempotent(
2619 ; CHECK-NEXT:    [[READ0:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[ARG:%.*]], i32 [[LANE:%.*]])
2620 ; CHECK-NEXT:    ret i32 [[READ0]]
2621 ;
2622   %read0 = call i32 @llvm.amdgcn.readlane(i32 %arg, i32 %lane)
2623   %read1 = call i32 @llvm.amdgcn.readlane(i32 %read0, i32 %lane)
2624   ret i32 %read1
2625 }
2626
2627 define i32 @readlane_idempotent_different_lanes(i32 %arg, i32 %lane0, i32 %lane1) {
2628 ; CHECK-LABEL: @readlane_idempotent_different_lanes(
2629 ; CHECK-NEXT:    [[READ0:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[ARG:%.*]], i32 [[LANE0:%.*]])
2630 ; CHECK-NEXT:    [[READ1:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[READ0]], i32 [[LANE1:%.*]])
2631 ; CHECK-NEXT:    ret i32 [[READ1]]
2632 ;
2633   %read0 = call i32 @llvm.amdgcn.readlane(i32 %arg, i32 %lane0)
2634   %read1 = call i32 @llvm.amdgcn.readlane(i32 %read0, i32 %lane1)
2635   ret i32 %read1
2636 }
2637
2638 define i32 @readlane_readfirstlane(i32 %arg) {
2639 ; CHECK-LABEL: @readlane_readfirstlane(
2640 ; CHECK-NEXT:    [[READ0:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[ARG:%.*]])
2641 ; CHECK-NEXT:    ret i32 [[READ0]]
2642 ;
2643   %read0 = call i32 @llvm.amdgcn.readfirstlane(i32 %arg)
2644   %read1 = call i32 @llvm.amdgcn.readlane(i32 %read0, i32 0)
2645   ret i32 %read1
2646 }
2647
2648 define i32 @readlane_idempotent_different_block(i32 %arg, i32 %lane) {
2649 ; CHECK-LABEL: @readlane_idempotent_different_block(
2650 ; CHECK-NEXT:  bb0:
2651 ; CHECK-NEXT:    [[READ0:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[ARG:%.*]], i32 [[LANE:%.*]])
2652 ; CHECK-NEXT:    br label [[BB1:%.*]]
2653 ; CHECK:       bb1:
2654 ; CHECK-NEXT:    [[READ1:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[READ0]], i32 [[LANE]])
2655 ; CHECK-NEXT:    ret i32 [[READ1]]
2656 ;
2657 bb0:
2658   %read0 = call i32 @llvm.amdgcn.readlane(i32 %arg, i32 %lane)
2659   br label %bb1
2660
2661 bb1:
2662   %read1 = call i32 @llvm.amdgcn.readlane(i32 %read0, i32 %lane)
2663   ret i32 %read1
2664 }
2665
2666
2667 define i32 @readlane_readfirstlane_different_block(i32 %arg) {
2668 ; CHECK-LABEL: @readlane_readfirstlane_different_block(
2669 ; CHECK-NEXT:  bb0:
2670 ; CHECK-NEXT:    [[READ0:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[ARG:%.*]])
2671 ; CHECK-NEXT:    br label [[BB1:%.*]]
2672 ; CHECK:       bb1:
2673 ; CHECK-NEXT:    [[READ1:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[READ0]], i32 0)
2674 ; CHECK-NEXT:    ret i32 [[READ1]]
2675 ;
2676 bb0:
2677   %read0 = call i32 @llvm.amdgcn.readfirstlane(i32 %arg)
2678   br label %bb1
2679
2680 bb1:
2681   %read1 = call i32 @llvm.amdgcn.readlane(i32 %read0, i32 0)
2682   ret i32 %read1
2683 }
2684
2685 ; --------------------------------------------------------------------
2686 ; llvm.amdgcn.update.dpp.i32
2687 ; --------------------------------------------------------------------
2688
2689 declare i32 @llvm.amdgcn.update.dpp.i32(i32, i32, i32, i32, i32, i1)
2690
2691 define amdgpu_kernel void @update_dpp_no_combine(i32 addrspace(1)* %out, i32 %in1, i32 %in2) {
2692 ; CHECK-LABEL: @update_dpp_no_combine(
2693 ; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @llvm.amdgcn.update.dpp.i32(i32 [[IN1:%.*]], i32 [[IN2:%.*]], i32 1, i32 1, i32 1, i1 false)
2694 ; CHECK-NEXT:    store i32 [[TMP0]], i32 addrspace(1)* [[OUT:%.*]], align 4
2695 ; CHECK-NEXT:    ret void
2696 ;
2697   %tmp0 = call i32 @llvm.amdgcn.update.dpp.i32(i32 %in1, i32 %in2, i32 1, i32 1, i32 1, i1 0)
2698   store i32 %tmp0, i32 addrspace(1)* %out
2699   ret void
2700 }
2701
2702 define amdgpu_kernel void @update_dpp_drop_old(i32 addrspace(1)* %out, i32 %in1, i32 %in2) {
2703 ; CHECK-LABEL: @update_dpp_drop_old(
2704 ; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @llvm.amdgcn.update.dpp.i32(i32 undef, i32 [[IN2:%.*]], i32 3, i32 15, i32 15, i1 true)
2705 ; CHECK-NEXT:    store i32 [[TMP0]], i32 addrspace(1)* [[OUT:%.*]], align 4
2706 ; CHECK-NEXT:    ret void
2707 ;
2708   %tmp0 = call i32 @llvm.amdgcn.update.dpp.i32(i32 %in1, i32 %in2, i32 3, i32 15, i32 15, i1 1)
2709   store i32 %tmp0, i32 addrspace(1)* %out
2710   ret void
2711 }
2712
2713 define amdgpu_kernel void @update_dpp_undef_old(i32 addrspace(1)* %out, i32 %in1) {
2714 ; CHECK-LABEL: @update_dpp_undef_old(
2715 ; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @llvm.amdgcn.update.dpp.i32(i32 undef, i32 [[IN1:%.*]], i32 4, i32 15, i32 15, i1 true)
2716 ; CHECK-NEXT:    store i32 [[TMP0]], i32 addrspace(1)* [[OUT:%.*]], align 4
2717 ; CHECK-NEXT:    ret void
2718 ;
2719   %tmp0 = call i32 @llvm.amdgcn.update.dpp.i32(i32 undef, i32 %in1, i32 4, i32 15, i32 15, i1 1)
2720   store i32 %tmp0, i32 addrspace(1)* %out
2721   ret void
2722 }
2723
2724
2725 ; --------------------------------------------------------------------
2726 ; llvm.amdgcn.permlane16
2727 ; --------------------------------------------------------------------
2728
2729 declare i32 @llvm.amdgcn.permlane16(i32, i32, i32, i32, i1 immarg, i1 immarg)
2730
2731 define amdgpu_kernel void @permlane16(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) {
2732 ; CHECK-LABEL: @permlane16(
2733 ; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.amdgcn.permlane16(i32 12345, i32 [[SRC0:%.*]], i32 [[SRC1:%.*]], i32 [[SRC2:%.*]], i1 false, i1 false)
2734 ; CHECK-NEXT:    store i32 [[RES]], i32 addrspace(1)* [[OUT:%.*]], align 4
2735 ; CHECK-NEXT:    ret void
2736 ;
2737   %res = call i32 @llvm.amdgcn.permlane16(i32 12345, i32 %src0, i32 %src1, i32 %src2, i1 false, i1 false)
2738   store i32 %res, i32 addrspace(1)* %out
2739   ret void
2740 }
2741
2742 define amdgpu_kernel void @permlane16_bound_ctrl(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) {
2743 ; CHECK-LABEL: @permlane16_bound_ctrl(
2744 ; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.amdgcn.permlane16(i32 undef, i32 [[SRC0:%.*]], i32 [[SRC1:%.*]], i32 [[SRC2:%.*]], i1 false, i1 true)
2745 ; CHECK-NEXT:    store i32 [[RES]], i32 addrspace(1)* [[OUT:%.*]], align 4
2746 ; CHECK-NEXT:    ret void
2747 ;
2748   %res = call i32 @llvm.amdgcn.permlane16(i32 12345, i32 %src0, i32 %src1, i32 %src2, i1 false, i1 true)
2749   store i32 %res, i32 addrspace(1)* %out
2750   ret void
2751 }
2752
2753 define amdgpu_kernel void @permlane16_fetch_invalid_bound_ctrl(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) {
2754 ; CHECK-LABEL: @permlane16_fetch_invalid_bound_ctrl(
2755 ; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.amdgcn.permlane16(i32 undef, i32 [[SRC0:%.*]], i32 [[SRC1:%.*]], i32 [[SRC2:%.*]], i1 true, i1 true)
2756 ; CHECK-NEXT:    store i32 [[RES]], i32 addrspace(1)* [[OUT:%.*]], align 4
2757 ; CHECK-NEXT:    ret void
2758 ;
2759   %res = call i32 @llvm.amdgcn.permlane16(i32 12345, i32 %src0, i32 %src1, i32 %src2, i1 true, i1 true)
2760   store i32 %res, i32 addrspace(1)* %out
2761   ret void
2762 }
2763
2764 ; --------------------------------------------------------------------
2765 ; llvm.amdgcn.permlanex16
2766 ; --------------------------------------------------------------------
2767
2768 declare i32 @llvm.amdgcn.permlanex16(i32, i32, i32, i32, i1 immarg, i1 immarg)
2769
2770 define amdgpu_kernel void @permlanex16(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) {
2771 ; CHECK-LABEL: @permlanex16(
2772 ; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.amdgcn.permlanex16(i32 12345, i32 [[SRC0:%.*]], i32 [[SRC1:%.*]], i32 [[SRC2:%.*]], i1 false, i1 false)
2773 ; CHECK-NEXT:    store i32 [[RES]], i32 addrspace(1)* [[OUT:%.*]], align 4
2774 ; CHECK-NEXT:    ret void
2775 ;
2776   %res = call i32 @llvm.amdgcn.permlanex16(i32 12345, i32 %src0, i32 %src1, i32 %src2, i1 false, i1 false)
2777   store i32 %res, i32 addrspace(1)* %out
2778   ret void
2779 }
2780
2781 define amdgpu_kernel void @permlanex16_bound_ctrl(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) {
2782 ; CHECK-LABEL: @permlanex16_bound_ctrl(
2783 ; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.amdgcn.permlanex16(i32 undef, i32 [[SRC0:%.*]], i32 [[SRC1:%.*]], i32 [[SRC2:%.*]], i1 false, i1 true)
2784 ; CHECK-NEXT:    store i32 [[RES]], i32 addrspace(1)* [[OUT:%.*]], align 4
2785 ; CHECK-NEXT:    ret void
2786 ;
2787   %res = call i32 @llvm.amdgcn.permlanex16(i32 12345, i32 %src0, i32 %src1, i32 %src2, i1 false, i1 true)
2788   store i32 %res, i32 addrspace(1)* %out
2789   ret void
2790 }
2791
2792 define amdgpu_kernel void @permlanex16_fetch_invalid_bound_ctrl(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) {
2793 ; CHECK-LABEL: @permlanex16_fetch_invalid_bound_ctrl(
2794 ; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.amdgcn.permlanex16(i32 undef, i32 [[SRC0:%.*]], i32 [[SRC1:%.*]], i32 [[SRC2:%.*]], i1 true, i1 true)
2795 ; CHECK-NEXT:    store i32 [[RES]], i32 addrspace(1)* [[OUT:%.*]], align 4
2796 ; CHECK-NEXT:    ret void
2797 ;
2798   %res = call i32 @llvm.amdgcn.permlanex16(i32 12345, i32 %src0, i32 %src1, i32 %src2, i1 true, i1 true)
2799   store i32 %res, i32 addrspace(1)* %out
2800   ret void
2801 }
2802
2803 ; --------------------------------------------------------------------
2804 ; llvm.amdgcn.image.sample a16
2805 ; --------------------------------------------------------------------
2806
2807 declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2808 declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2809 declare <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2810 declare <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2811 declare <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2812 declare <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2813
2814 declare <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2815 declare <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2816 declare <4 x float> @llvm.amdgcn.image.sample.cl.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2817 declare <4 x float> @llvm.amdgcn.image.sample.cl.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2818 declare <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2819 declare <4 x float> @llvm.amdgcn.image.sample.c.cl.2d.v4f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2820
2821 declare <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2822 declare <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2823 declare <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2824 declare <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2825 declare <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2826 declare <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2827 declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2828 declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f32.f32(i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2829
2830 declare <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2831 declare <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2832 declare <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f32.f32(i32, float, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2833 declare <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2834 declare <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2835 declare <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2836 declare <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2837 declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f32(i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2838 declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2839
2840 declare <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2841 declare <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2842 declare <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2843 declare <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2844 declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2845 declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2846 declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f32(i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2847 declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2848
2849 declare <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2850 declare <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2851 declare <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2852 declare <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2853
2854 declare <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2855 declare <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2856 declare <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2857 declare <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2858
2859 declare float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f32.f32(i32, i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2860 declare <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f32(i32, i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2861
2862 define amdgpu_kernel void @image_sample_a16_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s) {
2863 ; CHECK-LABEL: @image_sample_a16_1d(
2864 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f16(i32 15, half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
2865 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
2866 ; CHECK-NEXT:    ret void
2867 ;
2868   %s32 = fpext half %s to float
2869   %res = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
2870   store <4 x float> %res, <4 x float> addrspace(1)* %out
2871   ret void
2872 }
2873
2874 define amdgpu_kernel void @image_sample_a16_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) {
2875 ; CHECK-LABEL: @image_sample_a16_2d(
2876 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f16(i32 15, half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
2877 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
2878 ; CHECK-NEXT:    ret void
2879 ;
2880   %s32 = fpext half %s to float
2881   %t32 = fpext half %t to float
2882   %res = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %s32, float %t32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
2883   store <4 x float> %res, <4 x float> addrspace(1)* %out
2884   ret void
2885 }
2886
2887 define amdgpu_kernel void @image_sample_a16_3d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %r) {
2888 ; CHECK-LABEL: @image_sample_a16_3d(
2889 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f16(i32 15, half [[S:%.*]], half [[T:%.*]], half [[R:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
2890 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
2891 ; CHECK-NEXT:    ret void
2892 ;
2893   %s32 = fpext half %s to float
2894   %t32 = fpext half %t to float
2895   %r32 = fpext half %r to float
2896   %res = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32 15, float %s32, float %t32, float %r32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
2897   store <4 x float> %res, <4 x float> addrspace(1)* %out
2898   ret void
2899 }
2900
2901 define amdgpu_kernel void @image_sample_a16_cube(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %face) {
2902 ;
2903 ; CHECK-LABEL: @image_sample_a16_cube(
2904 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f16(i32 15, half [[S:%.*]], half [[T:%.*]], half [[FACE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
2905 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
2906 ; CHECK-NEXT:    ret void
2907 ;
2908   %s32 = fpext half %s to float
2909   %t32 = fpext half %t to float
2910   %face32 = fpext half %face to float
2911   %res = call <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f32(i32 15, float %s32, float %t32, float %face32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
2912   store <4 x float> %res, <4 x float> addrspace(1)* %out
2913   ret void
2914 }
2915
2916 define amdgpu_kernel void @image_sample_a16_1darray(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %slice) {
2917 ; CHECK-LABEL: @image_sample_a16_1darray(
2918 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f16(i32 15, half [[S:%.*]], half [[SLICE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
2919 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
2920 ; CHECK-NEXT:    ret void
2921 ;
2922   %s32 = fpext half %s to float
2923   %slice32 = fpext half %slice to float
2924   %res = call <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32(i32 15, float %s32, float %slice32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
2925   store <4 x float> %res, <4 x float> addrspace(1)* %out
2926   ret void
2927 }
2928
2929 define amdgpu_kernel void @image_sample_a16_2darray(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %slice) {
2930 ; CHECK-LABEL: @image_sample_a16_2darray(
2931 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f16(i32 15, half [[S:%.*]], half [[T:%.*]], half [[SLICE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
2932 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
2933 ; CHECK-NEXT:    ret void
2934 ;
2935   %s32 = fpext half %s to float
2936   %t32 = fpext half %t to float
2937   %slice32 = fpext half %slice to float
2938   %res = call <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f32(i32 15, float %s32, float %t32, float %slice32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
2939   store <4 x float> %res, <4 x float> addrspace(1)* %out
2940   ret void
2941 }
2942
2943 define amdgpu_kernel void @image_sample_a16_c_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s) {
2944 ; CHECK-LABEL: @image_sample_a16_c_1d(
2945 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f16(i32 15, float [[ZCOMPARE:%.*]], half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
2946 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
2947 ; CHECK-NEXT:    ret void
2948 ;
2949   %s32 = fpext half %s to float
2950   %res = call <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32 15, float %zcompare, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
2951   store <4 x float> %res, <4 x float> addrspace(1)* %out
2952   ret void
2953 }
2954
2955 define amdgpu_kernel void @image_sample_a16_c_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t) {
2956 ; CHECK-LABEL: @image_sample_a16_c_2d(
2957 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f16(i32 15, float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
2958 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
2959 ; CHECK-NEXT:    ret void
2960 ;
2961   %s32 = fpext half %s to float
2962   %t32 = fpext half %t to float
2963   %res = call <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f32(i32 15, float %zcompare, float %s32, float %t32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
2964   store <4 x float> %res, <4 x float> addrspace(1)* %out
2965   ret void
2966 }
2967
2968 define amdgpu_kernel void @image_sample_a16_cl_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %clamp) {
2969 ; CHECK-LABEL: @image_sample_a16_cl_1d(
2970 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cl.1d.v4f32.f16(i32 15, half [[S:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
2971 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
2972 ; CHECK-NEXT:    ret void
2973 ;
2974   %s32 = fpext half %s to float
2975   %clamp32 = fpext half %clamp to float
2976   %res = call <4 x float> @llvm.amdgcn.image.sample.cl.1d.v4f32.f32(i32 15, float %s32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
2977   store <4 x float> %res, <4 x float> addrspace(1)* %out
2978   ret void
2979 }
2980
2981 define amdgpu_kernel void @image_sample_a16_cl_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %clamp) {
2982 ; CHECK-LABEL: @image_sample_a16_cl_2d(
2983 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cl.2d.v4f32.f16(i32 15, half [[S:%.*]], half [[T:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
2984 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
2985 ; CHECK-NEXT:    ret void
2986 ;
2987   %s32 = fpext half %s to float
2988   %t32 = fpext half %t to float
2989   %clamp32 = fpext half %clamp to float
2990   %res = call <4 x float> @llvm.amdgcn.image.sample.cl.2d.v4f32.f32(i32 15, float %s32, float %t32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
2991   store <4 x float> %res, <4 x float> addrspace(1)* %out
2992   ret void
2993 }
2994
2995 define amdgpu_kernel void @image_sample_a16_c_cl_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %clamp) {
2996 ; CHECK-LABEL: @image_sample_a16_c_cl_1d(
2997 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f16(i32 15, float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
2998 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
2999 ; CHECK-NEXT:    ret void
3000 ;
3001   %s32 = fpext half %s to float
3002   %clamp32 = fpext half %clamp to float
3003   %res = call <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f32(i32 15, float %zcompare, float %s32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3004   store <4 x float> %res, <4 x float> addrspace(1)* %out
3005   ret void
3006 }
3007
3008 define amdgpu_kernel void @image_sample_a16_c_cl_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t, half %clamp) {
3009 ; CHECK-LABEL: @image_sample_a16_c_cl_2d(
3010 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cl.2d.v4f32.f16(i32 15, float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[T:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3011 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
3012 ; CHECK-NEXT:    ret void
3013 ;
3014   %s32 = fpext half %s to float
3015   %t32 = fpext half %t to float
3016   %clamp32 = fpext half %clamp to float
3017   %res = call <4 x float> @llvm.amdgcn.image.sample.c.cl.2d.v4f32.f32(i32 15, float %zcompare, float %s32, float %t32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3018   store <4 x float> %res, <4 x float> addrspace(1)* %out
3019   ret void
3020 }
3021
3022 define amdgpu_kernel void @image_sample_a16_b_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s) {
3023 ; CHECK-LABEL: @image_sample_a16_b_1d(
3024 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f16(i32 15, float [[BIAS:%.*]], half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3025 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
3026 ; CHECK-NEXT:    ret void
3027 ;
3028   %s32 = fpext half %s to float
3029   %res = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f32(i32 15, float %bias, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3030   store <4 x float> %res, <4 x float> addrspace(1)* %out
3031   ret void
3032 }
3033
3034 define amdgpu_kernel void @image_sample_a16_b_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %t) {
3035 ; CHECK-LABEL: @image_sample_a16_b_2d(
3036 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32.f16(i32 15, float [[BIAS:%.*]], half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3037 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
3038 ; CHECK-NEXT:    ret void
3039 ;
3040   %s32 = fpext half %s to float
3041   %t32 = fpext half %t to float
3042   %res = call <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32.f32(i32 15, float %bias, float %s32, float %t32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3043   store <4 x float> %res, <4 x float> addrspace(1)* %out
3044   ret void
3045 }
3046
3047 define amdgpu_kernel void @image_sample_a16_c_b_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s) {
3048 ; CHECK-LABEL: @image_sample_a16_c_b_1d(
3049 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f16(i32 15, float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3050 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
3051 ; CHECK-NEXT:    ret void
3052 ;
3053   %s32 = fpext half %s to float
3054   %res = call <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f32(i32 15, float %bias, float %zcompare, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3055   store <4 x float> %res, <4 x float> addrspace(1)* %out
3056   ret void
3057 }
3058
3059 define amdgpu_kernel void @image_sample_a16_c_b_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %t) {
3060 ; CHECK-LABEL: @image_sample_a16_c_b_2d(
3061 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32.f16(i32 15, float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3062 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
3063 ; CHECK-NEXT:    ret void
3064 ;
3065   %s32 = fpext half %s to float
3066   %t32 = fpext half %t to float
3067   %res = call <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32.f32(i32 15, float %bias, float %zcompare, float %s32, float %t32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3068   store <4 x float> %res, <4 x float> addrspace(1)* %out
3069   ret void
3070 }
3071
3072 define amdgpu_kernel void @image_sample_a16_b_cl_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %clamp) {
3073 ; CHECK-LABEL: @image_sample_a16_b_cl_1d(
3074 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f16(i32 15, float [[BIAS:%.*]], half [[S:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3075 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
3076 ; CHECK-NEXT:    ret void
3077 ;
3078   %s32 = fpext half %s to float
3079   %clamp32 = fpext half %clamp to float
3080   %res = call <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f32(i32 15, float %bias, float %s32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3081   store <4 x float> %res, <4 x float> addrspace(1)* %out
3082   ret void
3083 }
3084
3085 define amdgpu_kernel void @image_sample_a16_b_cl_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %t, half %clamp) {
3086 ; CHECK-LABEL: @image_sample_a16_b_cl_2d(
3087 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f32.f16(i32 15, float [[BIAS:%.*]], half [[S:%.*]], half [[T:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3088 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
3089 ; CHECK-NEXT:    ret void
3090 ;
3091   %s32 = fpext half %s to float
3092   %t32 = fpext half %t to float
3093   %clamp32 = fpext half %clamp to float
3094   %res = call <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f32.f32(i32 15, float %bias, float %s32, float %t32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3095   store <4 x float> %res, <4 x float> addrspace(1)* %out
3096   ret void
3097 }
3098
3099 define amdgpu_kernel void @image_sample_a16_c_b_cl_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %clamp) {
3100 ; CHECK-LABEL: @image_sample_a16_c_b_cl_1d(
3101 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f16(i32 15, float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3102 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
3103 ; CHECK-NEXT:    ret void
3104 ;
3105   %s32 = fpext half %s to float
3106   %clamp32 = fpext half %clamp to float
3107   %res = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f32(i32 15, float %bias, float %zcompare, float %s32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3108   store <4 x float> %res, <4 x float> addrspace(1)* %out
3109   ret void
3110 }
3111
3112 define amdgpu_kernel void @image_sample_a16_c_b_cl_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %t, half %clamp) {
3113 ; CHECK-LABEL: @image_sample_a16_c_b_cl_2d(
3114 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f32.f16(i32 15, float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[T:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3115 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
3116 ; CHECK-NEXT:    ret void
3117 ;
3118   %s32 = fpext half %s to float
3119   %t32 = fpext half %t to float
3120   %clamp32 = fpext half %clamp to float
3121   %res = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f32.f32(i32 15, float %bias, float %zcompare, float %s32, float %t32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3122   store <4 x float> %res, <4 x float> addrspace(1)* %out
3123   ret void
3124 }
3125
3126 define amdgpu_kernel void @image_sample_a16_d_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s) {
3127 ; CHECK-LABEL: @image_sample_a16_d_1d(
3128 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f16(i32 15, half [[DSDH:%.*]], half [[DSDV:%.*]], half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3129 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
3130 ; CHECK-NEXT:    ret void
3131 ;
3132   %dsdh32 = fpext half %dsdh to float
3133   %dsdv32 = fpext half %dsdv to float
3134   %s32 = fpext half %s to float
3135   %res = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f32.f32(i32 15, float %dsdh32, float %dsdv32, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3136   store <4 x float> %res, <4 x float> addrspace(1)* %out
3137   ret void
3138 }
3139
3140 define amdgpu_kernel void @image_sample_a16_d_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) {
3141 ; CHECK-LABEL: @image_sample_a16_d_2d(
3142 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f16(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3143 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
3144 ; CHECK-NEXT:    ret void
3145 ;
3146   %dsdh32 = fpext half %dsdh to float
3147   %dtdh32 = fpext half %dtdh to float
3148   %dsdv32 = fpext half %dsdv to float
3149   %dtdv32 = fpext half %dtdv to float
3150   %s32 = fpext half %s to float
3151   %t32 = fpext half %t to float
3152   %res = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32(i32 15, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s32, float %t32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3153   store <4 x float> %res, <4 x float> addrspace(1)* %out
3154   ret void
3155 }
3156
3157 define amdgpu_kernel void @image_sample_a16_d_3d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, half %s, half %t, half %r) {
3158 ; CHECK-LABEL: @image_sample_a16_d_3d(
3159 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f16(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DRDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[DRDV:%.*]], half [[S:%.*]], half [[T:%.*]], half [[R:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3160 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
3161 ; CHECK-NEXT:    ret void
3162 ;
3163   %dsdh32 = fpext half %dsdh to float
3164   %dtdh32 = fpext half %dtdh to float
3165   %drdh32 = fpext half %drdh to float
3166   %dsdv32 = fpext half %dsdv to float
3167   %dtdv32 = fpext half %dtdv to float
3168   %drdv32 = fpext half %drdv to float
3169   %s32 = fpext half %s to float
3170   %t32 = fpext half %t to float
3171   %r32 = fpext half %r to float
3172   %res = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f32.f32(i32 15, float %dsdh32, float %dtdh32, float %drdh32, float %dsdv32, float %dtdv32, float %drdv32, float %s32, float %t32, float %r32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3173   store <4 x float> %res, <4 x float> addrspace(1)* %out
3174   ret void
3175 }
3176
3177 define amdgpu_kernel void @image_sample_a16_c_d_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s) {
3178 ; CHECK-LABEL: @image_sample_a16_c_d_1d(
3179 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f16.f16(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DSDV:%.*]], half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3180 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
3181 ; CHECK-NEXT:    ret void
3182 ;
3183   %dsdh32 = fpext half %dsdh to float
3184   %dsdv32 = fpext half %dsdv to float
3185   %s32 = fpext half %s to float
3186   %res = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh32, float %dsdv32, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3187   store <4 x float> %res, <4 x float> addrspace(1)* %out
3188   ret void
3189 }
3190
3191 define amdgpu_kernel void @image_sample_a16_c_d_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) {
3192 ; CHECK-LABEL: @image_sample_a16_c_d_2d(
3193 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f16.f16(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3194 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
3195 ; CHECK-NEXT:    ret void
3196 ;
3197   %dsdh32 = fpext half %dsdh to float
3198   %dtdh32 = fpext half %dtdh to float
3199   %dsdv32 = fpext half %dsdv to float
3200   %dtdv32 = fpext half %dtdv to float
3201   %s32 = fpext half %s to float
3202   %t32 = fpext half %t to float
3203   %res = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s32, float %t32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3204   store <4 x float> %res, <4 x float> addrspace(1)* %out
3205   ret void
3206 }
3207
3208 define amdgpu_kernel void @image_sample_a16_d_cl_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s, half %clamp) {
3209 ; CHECK-LABEL: @image_sample_a16_d_cl_1d(
3210 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f16(i32 15, half [[DSDH:%.*]], half [[DSDV:%.*]], half [[S:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3211 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
3212 ; CHECK-NEXT:    ret void
3213 ;
3214   %dsdh32 = fpext half %dsdh to float
3215   %dsdv32 = fpext half %dsdv to float
3216   %s32 = fpext half %s to float
3217   %clamp32 = fpext half %clamp to float
3218   %res = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f32.f32(i32 15, float %dsdh32, float %dsdv32, float %s32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3219   store <4 x float> %res, <4 x float> addrspace(1)* %out
3220   ret void
3221 }
3222
3223 define amdgpu_kernel void @image_sample_a16_d_cl_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) {
3224 ; CHECK-LABEL: @image_sample_a16_d_cl_2d(
3225 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f16(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3226 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
3227 ; CHECK-NEXT:    ret void
3228 ;
3229   %dsdh32 = fpext half %dsdh to float
3230   %dtdh32 = fpext half %dtdh to float
3231   %dsdv32 = fpext half %dsdv to float
3232   %dtdv32 = fpext half %dtdv to float
3233   %s32 = fpext half %s to float
3234   %t32 = fpext half %t to float
3235   %clamp32 = fpext half %clamp to float
3236   %res = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f32.f32(i32 15, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s32, float %t32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3237   store <4 x float> %res, <4 x float> addrspace(1)* %out
3238   ret void
3239 }
3240
3241 define amdgpu_kernel void @image_sample_a16_c_d_cl_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s, half %clamp) {
3242 ; CHECK-LABEL: @image_sample_a16_c_d_cl_1d(
3243 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f16.f16(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DSDV:%.*]], half [[S:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3244 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
3245 ; CHECK-NEXT:    ret void
3246 ;
3247   %dsdh32 = fpext half %dsdh to float
3248   %dsdv32 = fpext half %dsdv to float
3249   %s32 = fpext half %s to float
3250   %clamp32 = fpext half %clamp to float
3251   %res = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh32, float %dsdv32, float %s32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3252   store <4 x float> %res, <4 x float> addrspace(1)* %out
3253   ret void
3254 }
3255
3256 define amdgpu_kernel void @image_sample_a16_c_d_cl_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) {
3257 ; CHECK-LABEL: @image_sample_a16_c_d_cl_2d(
3258 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f16.f16(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3259 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
3260 ; CHECK-NEXT:    ret void
3261 ;
3262   %dsdh32 = fpext half %dsdh to float
3263   %dtdh32 = fpext half %dtdh to float
3264   %dsdv32 = fpext half %dsdv to float
3265   %dtdv32 = fpext half %dtdv to float
3266   %s32 = fpext half %s to float
3267   %t32 = fpext half %t to float
3268   %clamp32 = fpext half %clamp to float
3269   %res = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s32, float %t32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3270   store <4 x float> %res, <4 x float> addrspace(1)* %out
3271   ret void
3272 }
3273
3274 define amdgpu_kernel void @image_sample_a16_cd_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s) {
3275 ; CHECK-LABEL: @image_sample_a16_cd_1d(
3276 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f16(i32 15, half [[DSDH:%.*]], half [[DSDV:%.*]], half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3277 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
3278 ; CHECK-NEXT:    ret void
3279 ;
3280   %dsdh32 = fpext half %dsdh to float
3281   %dsdv32 = fpext half %dsdv to float
3282   %s32 = fpext half %s to float
3283   %res = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f32.f32(i32 15, float %dsdh32, float %dsdv32, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3284   store <4 x float> %res, <4 x float> addrspace(1)* %out
3285   ret void
3286 }
3287
3288 define amdgpu_kernel void @image_sample_a16_cd_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) {
3289 ; CHECK-LABEL: @image_sample_a16_cd_2d(
3290 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f16(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3291 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
3292 ; CHECK-NEXT:    ret void
3293 ;
3294   %dsdh32 = fpext half %dsdh to float
3295   %dtdh32 = fpext half %dtdh to float
3296   %dsdv32 = fpext half %dsdv to float
3297   %dtdv32 = fpext half %dtdv to float
3298   %s32 = fpext half %s to float
3299   %t32 = fpext half %t to float
3300   %res = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f32.f32(i32 15, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s32, float %t32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3301   store <4 x float> %res, <4 x float> addrspace(1)* %out
3302   ret void
3303 }
3304
3305 define amdgpu_kernel void @image_sample_a16_c_cd_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s) {
3306 ; CHECK-LABEL: @image_sample_a16_c_cd_1d(
3307 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f16.f16(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DSDV:%.*]], half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3308 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
3309 ; CHECK-NEXT:    ret void
3310 ;
3311   %dsdh32 = fpext half %dsdh to float
3312   %dsdv32 = fpext half %dsdv to float
3313   %s32 = fpext half %s to float
3314   %res = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh32, float %dsdv32, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3315   store <4 x float> %res, <4 x float> addrspace(1)* %out
3316   ret void
3317 }
3318
3319 define amdgpu_kernel void @image_sample_a16_c_cd_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) {
3320 ; CHECK-LABEL: @image_sample_a16_c_cd_2d(
3321 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f16.f16(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3322 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
3323 ; CHECK-NEXT:    ret void
3324 ;
3325   %dsdh32 = fpext half %dsdh to float
3326   %dtdh32 = fpext half %dtdh to float
3327   %dsdv32 = fpext half %dsdv to float
3328   %dtdv32 = fpext half %dtdv to float
3329   %s32 = fpext half %s to float
3330   %t32 = fpext half %t to float
3331   %res = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s32, float %t32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3332   store <4 x float> %res, <4 x float> addrspace(1)* %out
3333   ret void
3334 }
3335
3336 define amdgpu_kernel void @image_sample_a16_cd_cl_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s, half %clamp) {
3337 ; CHECK-LABEL: @image_sample_a16_cd_cl_1d(
3338 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f16(i32 15, half [[DSDH:%.*]], half [[DSDV:%.*]], half [[S:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3339 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
3340 ; CHECK-NEXT:    ret void
3341 ;
3342   %dsdh32 = fpext half %dsdh to float
3343   %dsdv32 = fpext half %dsdv to float
3344   %s32 = fpext half %s to float
3345   %clamp32 = fpext half %clamp to float
3346   %res = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f32.f32(i32 15, float %dsdh32, float %dsdv32, float %s32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3347   store <4 x float> %res, <4 x float> addrspace(1)* %out
3348   ret void
3349 }
3350
3351 define amdgpu_kernel void @image_sample_a16_cd_cl_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) {
3352 ; CHECK-LABEL: @image_sample_a16_cd_cl_2d(
3353 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f16(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3354 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
3355 ; CHECK-NEXT:    ret void
3356 ;
3357   %dsdh32 = fpext half %dsdh to float
3358   %dtdh32 = fpext half %dtdh to float
3359   %dsdv32 = fpext half %dsdv to float
3360   %dtdv32 = fpext half %dtdv to float
3361   %s32 = fpext half %s to float
3362   %t32 = fpext half %t to float
3363   %clamp32 = fpext half %clamp to float
3364   %res = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f32.f32(i32 15, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s32, float %t32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3365   store <4 x float> %res, <4 x float> addrspace(1)* %out
3366   ret void
3367 }
3368
3369 define amdgpu_kernel void @image_sample_a16_c_cd_cl_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s, half %clamp) {
3370 ; CHECK-LABEL: @image_sample_a16_c_cd_cl_1d(
3371 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f16.f16(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DSDV:%.*]], half [[S:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3372 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
3373 ; CHECK-NEXT:    ret void
3374 ;
3375   %dsdh32 = fpext half %dsdh to float
3376   %dsdv32 = fpext half %dsdv to float
3377   %s32 = fpext half %s to float
3378   %clamp32 = fpext half %clamp to float
3379   %res = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh32, float %dsdv32, float %s32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3380   store <4 x float> %res, <4 x float> addrspace(1)* %out
3381   ret void
3382 }
3383
3384 define amdgpu_kernel void @image_sample_a16_c_cd_cl_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) {
3385 ; CHECK-LABEL: @image_sample_a16_c_cd_cl_2d(
3386 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f16.f16(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3387 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
3388 ; CHECK-NEXT:    ret void
3389 ;
3390   %dsdh32 = fpext half %dsdh to float
3391   %dtdh32 = fpext half %dtdh to float
3392   %dsdv32 = fpext half %dsdv to float
3393   %dtdv32 = fpext half %dtdv to float
3394   %s32 = fpext half %s to float
3395   %t32 = fpext half %t to float
3396   %clamp32 = fpext half %clamp to float
3397   %res = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s32, float %t32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3398   store <4 x float> %res, <4 x float> addrspace(1)* %out
3399   ret void
3400 }
3401
3402 define amdgpu_kernel void @image_sample_a16_l_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %lod) {
3403 ; CHECK-LABEL: @image_sample_a16_l_1d(
3404 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f16(i32 15, half [[S:%.*]], half [[LOD:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3405 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
3406 ; CHECK-NEXT:    ret void
3407 ;
3408   %s32 = fpext half %s to float
3409   %lod32 = fpext half %lod to float
3410   %res = call <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f32(i32 15, float %s32, float %lod32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3411   store <4 x float> %res, <4 x float> addrspace(1)* %out
3412   ret void
3413 }
3414
3415 define amdgpu_kernel void @image_sample_a16_l_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %lod) {
3416 ; CHECK-LABEL: @image_sample_a16_l_2d(
3417 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f16(i32 15, half [[S:%.*]], half [[T:%.*]], half [[LOD:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3418 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
3419 ; CHECK-NEXT:    ret void
3420 ;
3421   %s32 = fpext half %s to float
3422   %t32 = fpext half %t to float
3423   %lod32 = fpext half %lod to float
3424   %res = call <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32(i32 15, float %s32, float %t32, float %lod32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3425   store <4 x float> %res, <4 x float> addrspace(1)* %out
3426   ret void
3427 }
3428
3429 define amdgpu_kernel void @image_sample_a16_c_l_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %lod) {
3430 ; CHECK-LABEL: @image_sample_a16_c_l_1d(
3431 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f16(i32 15, float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[LOD:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3432 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
3433 ; CHECK-NEXT:    ret void
3434 ;
3435   %s32 = fpext half %s to float
3436   %lod32 = fpext half %lod to float
3437   %res = call <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f32(i32 15, float %zcompare, float %s32, float %lod32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3438   store <4 x float> %res, <4 x float> addrspace(1)* %out
3439   ret void
3440 }
3441
3442 define amdgpu_kernel void @image_sample_a16_c_l_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t, half %lod) {
3443 ; CHECK-LABEL: @image_sample_a16_c_l_2d(
3444 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f16(i32 15, float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[T:%.*]], half [[LOD:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3445 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
3446 ; CHECK-NEXT:    ret void
3447 ;
3448   %s32 = fpext half %s to float
3449   %t32 = fpext half %t to float
3450   %lod32 = fpext half %lod to float
3451   %res = call <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f32(i32 15, float %zcompare, float %s32, float %t32, float %lod32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3452   store <4 x float> %res, <4 x float> addrspace(1)* %out
3453   ret void
3454 }
3455
3456 define amdgpu_kernel void @image_sample_a16_lz_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s) {
3457 ; CHECK-LABEL: @image_sample_a16_lz_1d(
3458 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f16(i32 15, half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3459 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
3460 ; CHECK-NEXT:    ret void
3461 ;
3462   %s32 = fpext half %s to float
3463   %res = call <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f32(i32 15, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3464   store <4 x float> %res, <4 x float> addrspace(1)* %out
3465   ret void
3466 }
3467
3468 define amdgpu_kernel void @image_sample_a16_lz_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) {
3469 ; CHECK-LABEL: @image_sample_a16_lz_2d(
3470 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f16(i32 15, half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3471 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
3472 ; CHECK-NEXT:    ret void
3473 ;
3474   %s32 = fpext half %s to float
3475   %t32 = fpext half %t to float
3476   %res = call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32(i32 15, float %s32, float %t32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3477   store <4 x float> %res, <4 x float> addrspace(1)* %out
3478   ret void
3479 }
3480
3481 define amdgpu_kernel void @image_sample_a16_c_lz_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s) {
3482 ; CHECK-LABEL: @image_sample_a16_c_lz_1d(
3483 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f16(i32 15, float [[ZCOMPARE:%.*]], half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3484 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
3485 ; CHECK-NEXT:    ret void
3486 ;
3487   %s32 = fpext half %s to float
3488   %res = call <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f32(i32 15, float %zcompare, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3489   store <4 x float> %res, <4 x float> addrspace(1)* %out
3490   ret void
3491 }
3492
3493 define amdgpu_kernel void @image_sample_a16_c_lz_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t) {
3494 ; CHECK-LABEL: @image_sample_a16_c_lz_2d(
3495 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f16(i32 15, float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3496 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
3497 ; CHECK-NEXT:    ret void
3498 ;
3499   %s32 = fpext half %s to float
3500   %t32 = fpext half %t to float
3501   %res = call <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f32(i32 15, float %zcompare, float %s32, float %t32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3502   store <4 x float> %res, <4 x float> addrspace(1)* %out
3503   ret void
3504 }
3505
3506 define amdgpu_kernel void @image_sample_a16_c_d_o_2darray_V1(float addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice) {
3507 ; CHECK-LABEL: @image_sample_a16_c_d_o_2darray_V1(
3508 ; CHECK-NEXT:    [[TMP1:%.*]] = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f16.f16(i32 4, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], half [[SLICE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3509 ; CHECK-NEXT:    store float [[TMP1]], float addrspace(1)* [[OUT:%.*]], align 4
3510 ; CHECK-NEXT:    ret void
3511 ;
3512   %dsdh32 = fpext half %dsdh to float
3513   %dtdh32 = fpext half %dtdh to float
3514   %dsdv32 = fpext half %dsdv to float
3515   %dtdv32 = fpext half %dtdv to float
3516   %s32 = fpext half %s to float
3517   %t32 = fpext half %t to float
3518   %slice32 = fpext half %slice to float
3519   %res = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f32.f32(i32 4, i32 %offset, float %zcompare, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s32, float %t32, float %slice32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3520   store float %res, float addrspace(1)* %out
3521   ret void
3522 }
3523
3524 define amdgpu_kernel void @image_sample_a16_c_d_o_2darray_V2(<2 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice) {
3525 ; CHECK-LABEL: @image_sample_a16_c_d_o_2darray_V2(
3526 ; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f16(i32 6, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], half [[SLICE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3527 ; CHECK-NEXT:    store <2 x float> [[TMP1]], <2 x float> addrspace(1)* [[OUT:%.*]], align 8
3528 ; CHECK-NEXT:    ret void
3529 ;
3530   %dsdh32 = fpext half %dsdh to float
3531   %dtdh32 = fpext half %dtdh to float
3532   %dsdv32 = fpext half %dsdv to float
3533   %dtdv32 = fpext half %dtdv to float
3534   %s32 = fpext half %s to float
3535   %t32 = fpext half %t to float
3536   %slice32 = fpext half %slice to float
3537   %res = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f32(i32 6, i32 %offset, float %zcompare, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s32, float %t32, float %slice32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3538   store <2 x float> %res, <2 x float> addrspace(1)* %out
3539   ret void
3540 }
3541
3542 ; --------------------------------------------------------------------
3543 ; llvm.amdgcn.image.sample g16
3544 ; --------------------------------------------------------------------
3545
3546 define amdgpu_kernel void @image_sample_g16_d_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s) {
3547 ; CHECK-LABEL: @image_sample_g16_d_1d(
3548 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f32(i32 15, half [[DSDH:%.*]], half [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3549 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
3550 ; CHECK-NEXT:    ret void
3551 ;
3552   %dsdh32 = fpext half %dsdh to float
3553   %dsdv32 = fpext half %dsdv to float
3554   %res = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f32.f32(i32 15, float %dsdh32, float %dsdv32, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3555   store <4 x float> %res, <4 x float> addrspace(1)* %out
3556   ret void
3557 }
3558
3559 define amdgpu_kernel void @image_sample_g16_d_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) {
3560 ; CHECK-LABEL: @image_sample_g16_d_2d(
3561 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f32(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3562 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
3563 ; CHECK-NEXT:    ret void
3564 ;
3565   %dsdh32 = fpext half %dsdh to float
3566   %dtdh32 = fpext half %dtdh to float
3567   %dsdv32 = fpext half %dsdv to float
3568   %dtdv32 = fpext half %dtdv to float
3569   %res = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32(i32 15, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3570   store <4 x float> %res, <4 x float> addrspace(1)* %out
3571   ret void
3572 }
3573
3574 define amdgpu_kernel void @image_sample_g16_d_3d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, float %s, float %t, float %r) {
3575 ; CHECK-LABEL: @image_sample_g16_d_3d(
3576 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f32(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DRDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[DRDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[R:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3577 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
3578 ; CHECK-NEXT:    ret void
3579 ;
3580   %dsdh32 = fpext half %dsdh to float
3581   %dtdh32 = fpext half %dtdh to float
3582   %drdh32 = fpext half %drdh to float
3583   %dsdv32 = fpext half %dsdv to float
3584   %dtdv32 = fpext half %dtdv to float
3585   %drdv32 = fpext half %drdv to float
3586   %res = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f32.f32(i32 15, float %dsdh32, float %dtdh32, float %drdh32, float %dsdv32, float %dtdv32, float %drdv32, float %s, float %t, float %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3587   store <4 x float> %res, <4 x float> addrspace(1)* %out
3588   ret void
3589 }
3590
3591 define amdgpu_kernel void @image_sample_g16_c_d_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s) {
3592 ; CHECK-LABEL: @image_sample_g16_c_d_1d(
3593 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f16.f32(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3594 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
3595 ; CHECK-NEXT:    ret void
3596 ;
3597   %dsdh32 = fpext half %dsdh to float
3598   %dsdv32 = fpext half %dsdv to float
3599   %res = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh32, float %dsdv32, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3600   store <4 x float> %res, <4 x float> addrspace(1)* %out
3601   ret void
3602 }
3603
3604 define amdgpu_kernel void @image_sample_g16_c_d_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) {
3605 ; CHECK-LABEL: @image_sample_g16_c_d_2d(
3606 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f16.f32(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3607 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
3608 ; CHECK-NEXT:    ret void
3609 ;
3610   %dsdh32 = fpext half %dsdh to float
3611   %dtdh32 = fpext half %dtdh to float
3612   %dsdv32 = fpext half %dsdv to float
3613   %dtdv32 = fpext half %dtdv to float
3614   %res = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3615   store <4 x float> %res, <4 x float> addrspace(1)* %out
3616   ret void
3617 }
3618
3619 define amdgpu_kernel void @image_sample_g16_d_cl_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s, float %clamp) {
3620 ; CHECK-LABEL: @image_sample_g16_d_cl_1d(
3621 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f32(i32 15, half [[DSDH:%.*]], half [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3622 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
3623 ; CHECK-NEXT:    ret void
3624 ;
3625   %dsdh32 = fpext half %dsdh to float
3626   %dsdv32 = fpext half %dsdv to float
3627   %res = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f32.f32(i32 15, float %dsdh32, float %dsdv32, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3628   store <4 x float> %res, <4 x float> addrspace(1)* %out
3629   ret void
3630 }
3631
3632 define amdgpu_kernel void @image_sample_g16_d_cl_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) {
3633 ; CHECK-LABEL: @image_sample_g16_d_cl_2d(
3634 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f32(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3635 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
3636 ; CHECK-NEXT:    ret void
3637 ;
3638   %dsdh32 = fpext half %dsdh to float
3639   %dtdh32 = fpext half %dtdh to float
3640   %dsdv32 = fpext half %dsdv to float
3641   %dtdv32 = fpext half %dtdv to float
3642   %res = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f32.f32(i32 15, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3643   store <4 x float> %res, <4 x float> addrspace(1)* %out
3644   ret void
3645 }
3646
3647 define amdgpu_kernel void @image_sample_g16_c_d_cl_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp) {
3648 ; CHECK-LABEL: @image_sample_g16_c_d_cl_1d(
3649 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f16.f32(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3650 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
3651 ; CHECK-NEXT:    ret void
3652 ;
3653   %dsdh32 = fpext half %dsdh to float
3654   %dsdv32 = fpext half %dsdv to float
3655   %res = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh32, float %dsdv32, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3656   store <4 x float> %res, <4 x float> addrspace(1)* %out
3657   ret void
3658 }
3659
3660 define amdgpu_kernel void @image_sample_g16_c_d_cl_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) {
3661 ; CHECK-LABEL: @image_sample_g16_c_d_cl_2d(
3662 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f16.f32(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3663 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
3664 ; CHECK-NEXT:    ret void
3665 ;
3666   %dsdh32 = fpext half %dsdh to float
3667   %dtdh32 = fpext half %dtdh to float
3668   %dsdv32 = fpext half %dsdv to float
3669   %dtdv32 = fpext half %dtdv to float
3670   %res = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3671   store <4 x float> %res, <4 x float> addrspace(1)* %out
3672   ret void
3673 }
3674
3675 define amdgpu_kernel void @image_sample_g16_cd_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s) {
3676 ; CHECK-LABEL: @image_sample_g16_cd_1d(
3677 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f32(i32 15, half [[DSDH:%.*]], half [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3678 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
3679 ; CHECK-NEXT:    ret void
3680 ;
3681   %dsdh32 = fpext half %dsdh to float
3682   %dsdv32 = fpext half %dsdv to float
3683   %res = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f32.f32(i32 15, float %dsdh32, float %dsdv32, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3684   store <4 x float> %res, <4 x float> addrspace(1)* %out
3685   ret void
3686 }
3687
3688 define amdgpu_kernel void @image_sample_g16_cd_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) {
3689 ; CHECK-LABEL: @image_sample_g16_cd_2d(
3690 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f32(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3691 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
3692 ; CHECK-NEXT:    ret void
3693 ;
3694   %dsdh32 = fpext half %dsdh to float
3695   %dtdh32 = fpext half %dtdh to float
3696   %dsdv32 = fpext half %dsdv to float
3697   %dtdv32 = fpext half %dtdv to float
3698   %res = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f32.f32(i32 15, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3699   store <4 x float> %res, <4 x float> addrspace(1)* %out
3700   ret void
3701 }
3702
3703 define amdgpu_kernel void @image_sample_g16_c_cd_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s) {
3704 ; CHECK-LABEL: @image_sample_g16_c_cd_1d(
3705 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f16.f32(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3706 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
3707 ; CHECK-NEXT:    ret void
3708 ;
3709   %dsdh32 = fpext half %dsdh to float
3710   %dsdv32 = fpext half %dsdv to float
3711   %res = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh32, float %dsdv32, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3712   store <4 x float> %res, <4 x float> addrspace(1)* %out
3713   ret void
3714 }
3715
3716 define amdgpu_kernel void @image_sample_g16_c_cd_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) {
3717 ; CHECK-LABEL: @image_sample_g16_c_cd_2d(
3718 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f16.f32(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3719 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
3720 ; CHECK-NEXT:    ret void
3721 ;
3722   %dsdh32 = fpext half %dsdh to float
3723   %dtdh32 = fpext half %dtdh to float
3724   %dsdv32 = fpext half %dsdv to float
3725   %dtdv32 = fpext half %dtdv to float
3726   %res = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3727   store <4 x float> %res, <4 x float> addrspace(1)* %out
3728   ret void
3729 }
3730
3731 define amdgpu_kernel void @image_sample_g16_cd_cl_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s, float %clamp) {
3732 ; CHECK-LABEL: @image_sample_g16_cd_cl_1d(
3733 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f32(i32 15, half [[DSDH:%.*]], half [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3734 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
3735 ; CHECK-NEXT:    ret void
3736 ;
3737   %dsdh32 = fpext half %dsdh to float
3738   %dsdv32 = fpext half %dsdv to float
3739   %res = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f32.f32(i32 15, float %dsdh32, float %dsdv32, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3740   store <4 x float> %res, <4 x float> addrspace(1)* %out
3741   ret void
3742 }
3743
3744 define amdgpu_kernel void @image_sample_g16_cd_cl_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) {
3745 ; CHECK-LABEL: @image_sample_g16_cd_cl_2d(
3746 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f32(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3747 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
3748 ; CHECK-NEXT:    ret void
3749 ;
3750   %dsdh32 = fpext half %dsdh to float
3751   %dtdh32 = fpext half %dtdh to float
3752   %dsdv32 = fpext half %dsdv to float
3753   %dtdv32 = fpext half %dtdv to float
3754   %res = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f32.f32(i32 15, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3755   store <4 x float> %res, <4 x float> addrspace(1)* %out
3756   ret void
3757 }
3758
3759 define amdgpu_kernel void @image_sample_g16_c_cd_cl_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp) {
3760 ; CHECK-LABEL: @image_sample_g16_c_cd_cl_1d(
3761 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f16.f32(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3762 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
3763 ; CHECK-NEXT:    ret void
3764 ;
3765   %dsdh32 = fpext half %dsdh to float
3766   %dsdv32 = fpext half %dsdv to float
3767   %res = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh32, float %dsdv32, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3768   store <4 x float> %res, <4 x float> addrspace(1)* %out
3769   ret void
3770 }
3771
3772 define amdgpu_kernel void @image_sample_g16_c_cd_cl_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) {
3773 ; CHECK-LABEL: @image_sample_g16_c_cd_cl_2d(
3774 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f16.f32(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3775 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
3776 ; CHECK-NEXT:    ret void
3777 ;
3778   %dsdh32 = fpext half %dsdh to float
3779   %dtdh32 = fpext half %dtdh to float
3780   %dsdv32 = fpext half %dsdv to float
3781   %dtdv32 = fpext half %dtdv to float
3782   %res = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3783   store <4 x float> %res, <4 x float> addrspace(1)* %out
3784   ret void
3785 }
3786
3787 define amdgpu_kernel void @image_sample_g16_c_d_o_2darray_V1(float addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice) {
3788 ; CHECK-LABEL: @image_sample_g16_c_d_o_2darray_V1(
3789 ; CHECK-NEXT:    [[TMP1:%.*]] = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f16.f32(i32 4, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[SLICE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3790 ; CHECK-NEXT:    store float [[TMP1]], float addrspace(1)* [[OUT:%.*]], align 4
3791 ; CHECK-NEXT:    ret void
3792 ;
3793   %dsdh32 = fpext half %dsdh to float
3794   %dtdh32 = fpext half %dtdh to float
3795   %dsdv32 = fpext half %dsdv to float
3796   %dtdv32 = fpext half %dtdv to float
3797   %res = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f32.f32(i32 4, i32 %offset, float %zcompare, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3798   store float %res, float addrspace(1)* %out
3799   ret void
3800 }
3801
3802 define amdgpu_kernel void @image_sample_g16_c_d_o_2darray_V2(<2 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice) {
3803 ; CHECK-LABEL: @image_sample_g16_c_d_o_2darray_V2(
3804 ; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f32(i32 6, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[SLICE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3805 ; CHECK-NEXT:    store <2 x float> [[TMP1]], <2 x float> addrspace(1)* [[OUT:%.*]], align 8
3806 ; CHECK-NEXT:    ret void
3807 ;
3808   %dsdh32 = fpext half %dsdh to float
3809   %dtdh32 = fpext half %dtdh to float
3810   %dsdv32 = fpext half %dsdv to float
3811   %dtdv32 = fpext half %dtdv to float
3812   %res = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f32(i32 6, i32 %offset, float %zcompare, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3813   store <2 x float> %res, <2 x float> addrspace(1)* %out
3814   ret void
3815 }
3816
3817 ; --------------------------------------------------------------------
3818 ; llvm.amdgcn.image.sample a16 preserve fast-math flags
3819 ; --------------------------------------------------------------------
3820
3821 define amdgpu_kernel void @image_sample_a16_1d_nnan(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s) {
3822 ; CHECK-LABEL: @image_sample_a16_1d_nnan(
3823 ; CHECK-NEXT:    [[TMP1:%.*]] = call nnan <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f16(i32 15, half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3824 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
3825 ; CHECK-NEXT:    ret void
3826 ;
3827   %s32 = fpext half %s to float
3828   %res = call nnan <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3829   store <4 x float> %res, <4 x float> addrspace(1)* %out
3830   ret void
3831 }
3832
3833 define amdgpu_kernel void @image_sample_a16_1d_nnan_ninf_nsz(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s) {
3834 ; CHECK-LABEL: @image_sample_a16_1d_nnan_ninf_nsz(
3835 ; CHECK-NEXT:    [[TMP1:%.*]] = call nnan ninf nsz <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f16(i32 15, half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3836 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
3837 ; CHECK-NEXT:    ret void
3838 ;
3839   %s32 = fpext half %s to float
3840   %res = call nnan ninf nsz <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3841   store <4 x float> %res, <4 x float> addrspace(1)* %out
3842   ret void
3843 }
3844
3845 define amdgpu_kernel void @image_sample_a16_1d_fast(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s) {
3846 ; CHECK-LABEL: @image_sample_a16_1d_fast(
3847 ; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f16(i32 15, half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3848 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
3849 ; CHECK-NEXT:    ret void
3850 ;
3851   %s32 = fpext half %s to float
3852   %res = call fast <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3853   store <4 x float> %res, <4 x float> addrspace(1)* %out
3854   ret void
3855 }
3856
3857 define amdgpu_kernel void @image_sample_a16_2d_nnan(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) {
3858 ; CHECK-LABEL: @image_sample_a16_2d_nnan(
3859 ; CHECK-NEXT:    [[TMP1:%.*]] = call nnan <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f16(i32 15, half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3860 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
3861 ; CHECK-NEXT:    ret void
3862 ;
3863   %s32 = fpext half %s to float
3864   %t32 = fpext half %t to float
3865   %res = call nnan <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %s32, float %t32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3866   store <4 x float> %res, <4 x float> addrspace(1)* %out
3867   ret void
3868 }
3869
3870 define amdgpu_kernel void @image_sample_a16_3d_nnan(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %r) {
3871 ; CHECK-LABEL: @image_sample_a16_3d_nnan(
3872 ; CHECK-NEXT:    [[TMP1:%.*]] = call nnan <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f16(i32 15, half [[S:%.*]], half [[T:%.*]], half [[R:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3873 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
3874 ; CHECK-NEXT:    ret void
3875 ;
3876   %s32 = fpext half %s to float
3877   %t32 = fpext half %t to float
3878   %r32 = fpext half %r to float
3879   %res = call nnan <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32 15, float %s32, float %t32, float %r32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3880   store <4 x float> %res, <4 x float> addrspace(1)* %out
3881   ret void
3882 }
3883
3884 define amdgpu_kernel void @image_sample_a16_cube_nnan(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %face) {
3885 ;
3886 ; CHECK-LABEL: @image_sample_a16_cube_nnan(
3887 ; CHECK-NEXT:    [[TMP1:%.*]] = call nnan <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f16(i32 15, half [[S:%.*]], half [[T:%.*]], half [[FACE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3888 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
3889 ; CHECK-NEXT:    ret void
3890 ;
3891   %s32 = fpext half %s to float
3892   %t32 = fpext half %t to float
3893   %face32 = fpext half %face to float
3894   %res = call nnan <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f32(i32 15, float %s32, float %t32, float %face32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3895   store <4 x float> %res, <4 x float> addrspace(1)* %out
3896   ret void
3897 }
3898
3899 define amdgpu_kernel void @image_sample_a16_1darray_nnan(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %slice) {
3900 ; CHECK-LABEL: @image_sample_a16_1darray_nnan(
3901 ; CHECK-NEXT:    [[TMP1:%.*]] = call nnan <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f16(i32 15, half [[S:%.*]], half [[SLICE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3902 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
3903 ; CHECK-NEXT:    ret void
3904 ;
3905   %s32 = fpext half %s to float
3906   %slice32 = fpext half %slice to float
3907   %res = call nnan <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32(i32 15, float %s32, float %slice32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3908   store <4 x float> %res, <4 x float> addrspace(1)* %out
3909   ret void
3910 }
3911
3912 define amdgpu_kernel void @image_sample_a16_2darray_nnan(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %slice) {
3913 ; CHECK-LABEL: @image_sample_a16_2darray_nnan(
3914 ; CHECK-NEXT:    [[TMP1:%.*]] = call nnan <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f16(i32 15, half [[S:%.*]], half [[T:%.*]], half [[SLICE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3915 ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
3916 ; CHECK-NEXT:    ret void
3917 ;
3918   %s32 = fpext half %s to float
3919   %t32 = fpext half %t to float
3920   %slice32 = fpext half %slice to float
3921   %res = call nnan <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f32(i32 15, float %s32, float %t32, float %slice32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3922   store <4 x float> %res, <4 x float> addrspace(1)* %out
3923   ret void
3924 }