1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -instcombine -S < %s | FileCheck %s
4 ; --------------------------------------------------------------------
6 ; --------------------------------------------------------------------
8 declare float @llvm.amdgcn.rcp.f32(float) nounwind readnone
9 declare double @llvm.amdgcn.rcp.f64(double) nounwind readnone
11 define float @test_constant_fold_rcp_f32_undef() nounwind {
12 ; CHECK-LABEL: @test_constant_fold_rcp_f32_undef(
13 ; CHECK-NEXT: ret float 0x7FF8000000000000
15 %val = call float @llvm.amdgcn.rcp.f32(float undef) nounwind readnone
19 define float @test_constant_fold_rcp_f32_1() nounwind {
20 ; CHECK-LABEL: @test_constant_fold_rcp_f32_1(
21 ; CHECK-NEXT: ret float 1.000000e+00
23 %val = call float @llvm.amdgcn.rcp.f32(float 1.0) nounwind readnone
27 define double @test_constant_fold_rcp_f64_1() nounwind {
28 ; CHECK-LABEL: @test_constant_fold_rcp_f64_1(
29 ; CHECK-NEXT: ret double 1.000000e+00
31 %val = call double @llvm.amdgcn.rcp.f64(double 1.0) nounwind readnone
35 define float @test_constant_fold_rcp_f32_half() nounwind {
36 ; CHECK-LABEL: @test_constant_fold_rcp_f32_half(
37 ; CHECK-NEXT: ret float 2.000000e+00
39 %val = call float @llvm.amdgcn.rcp.f32(float 0.5) nounwind readnone
43 define double @test_constant_fold_rcp_f64_half() nounwind {
44 ; CHECK-LABEL: @test_constant_fold_rcp_f64_half(
45 ; CHECK-NEXT: ret double 2.000000e+00
47 %val = call double @llvm.amdgcn.rcp.f64(double 0.5) nounwind readnone
51 define float @test_constant_fold_rcp_f32_43() nounwind {
52 ; CHECK-LABEL: @test_constant_fold_rcp_f32_43(
53 ; CHECK-NEXT: ret float 0x3F97D05F40000000
55 %val = call float @llvm.amdgcn.rcp.f32(float 4.300000e+01) nounwind readnone
59 define double @test_constant_fold_rcp_f64_43() nounwind {
60 ; CHECK-LABEL: @test_constant_fold_rcp_f64_43(
61 ; CHECK-NEXT: ret double 0x3F97D05F417D05F4
63 %val = call double @llvm.amdgcn.rcp.f64(double 4.300000e+01) nounwind readnone
67 define float @test_constant_fold_rcp_f32_43_strictfp() nounwind strictfp {
68 ; CHECK-LABEL: @test_constant_fold_rcp_f32_43_strictfp(
69 ; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.rcp.f32(float 4.300000e+01) [[ATTR11:#.*]]
70 ; CHECK-NEXT: ret float [[VAL]]
72 %val = call float @llvm.amdgcn.rcp.f32(float 4.300000e+01) strictfp nounwind readnone
76 ; --------------------------------------------------------------------
78 ; --------------------------------------------------------------------
80 declare float @llvm.amdgcn.rsq.f32(float) nounwind readnone
82 define float @test_constant_fold_rsq_f32_undef() nounwind {
83 ; CHECK-LABEL: @test_constant_fold_rsq_f32_undef(
84 ; CHECK-NEXT: ret float 0x7FF8000000000000
86 %val = call float @llvm.amdgcn.rsq.f32(float undef) nounwind readnone
90 ; --------------------------------------------------------------------
91 ; llvm.amdgcn.frexp.mant
92 ; --------------------------------------------------------------------
94 declare float @llvm.amdgcn.frexp.mant.f32(float) nounwind readnone
95 declare double @llvm.amdgcn.frexp.mant.f64(double) nounwind readnone
98 define float @test_constant_fold_frexp_mant_f32_undef() nounwind {
99 ; CHECK-LABEL: @test_constant_fold_frexp_mant_f32_undef(
100 ; CHECK-NEXT: ret float undef
102 %val = call float @llvm.amdgcn.frexp.mant.f32(float undef)
106 define double @test_constant_fold_frexp_mant_f64_undef() nounwind {
107 ; CHECK-LABEL: @test_constant_fold_frexp_mant_f64_undef(
108 ; CHECK-NEXT: ret double undef
110 %val = call double @llvm.amdgcn.frexp.mant.f64(double undef)
114 define float @test_constant_fold_frexp_mant_f32_0() nounwind {
115 ; CHECK-LABEL: @test_constant_fold_frexp_mant_f32_0(
116 ; CHECK-NEXT: ret float 0.000000e+00
118 %val = call float @llvm.amdgcn.frexp.mant.f32(float 0.0)
122 define double @test_constant_fold_frexp_mant_f64_0() nounwind {
123 ; CHECK-LABEL: @test_constant_fold_frexp_mant_f64_0(
124 ; CHECK-NEXT: ret double 0.000000e+00
126 %val = call double @llvm.amdgcn.frexp.mant.f64(double 0.0)
130 define float @test_constant_fold_frexp_mant_f32_n0() nounwind {
131 ; CHECK-LABEL: @test_constant_fold_frexp_mant_f32_n0(
132 ; CHECK-NEXT: ret float -0.000000e+00
134 %val = call float @llvm.amdgcn.frexp.mant.f32(float -0.0)
138 define double @test_constant_fold_frexp_mant_f64_n0() nounwind {
139 ; CHECK-LABEL: @test_constant_fold_frexp_mant_f64_n0(
140 ; CHECK-NEXT: ret double -0.000000e+00
142 %val = call double @llvm.amdgcn.frexp.mant.f64(double -0.0)
146 define float @test_constant_fold_frexp_mant_f32_1() nounwind {
147 ; CHECK-LABEL: @test_constant_fold_frexp_mant_f32_1(
148 ; CHECK-NEXT: ret float 5.000000e-01
150 %val = call float @llvm.amdgcn.frexp.mant.f32(float 1.0)
154 define double @test_constant_fold_frexp_mant_f64_1() nounwind {
155 ; CHECK-LABEL: @test_constant_fold_frexp_mant_f64_1(
156 ; CHECK-NEXT: ret double 5.000000e-01
158 %val = call double @llvm.amdgcn.frexp.mant.f64(double 1.0)
162 define float @test_constant_fold_frexp_mant_f32_n1() nounwind {
163 ; CHECK-LABEL: @test_constant_fold_frexp_mant_f32_n1(
164 ; CHECK-NEXT: ret float -5.000000e-01
166 %val = call float @llvm.amdgcn.frexp.mant.f32(float -1.0)
170 define double @test_constant_fold_frexp_mant_f64_n1() nounwind {
171 ; CHECK-LABEL: @test_constant_fold_frexp_mant_f64_n1(
172 ; CHECK-NEXT: ret double -5.000000e-01
174 %val = call double @llvm.amdgcn.frexp.mant.f64(double -1.0)
178 define float @test_constant_fold_frexp_mant_f32_nan() nounwind {
179 ; CHECK-LABEL: @test_constant_fold_frexp_mant_f32_nan(
180 ; CHECK-NEXT: ret float 0x7FF8000000000000
182 %val = call float @llvm.amdgcn.frexp.mant.f32(float 0x7FF8000000000000)
186 define double @test_constant_fold_frexp_mant_f64_nan() nounwind {
187 ; CHECK-LABEL: @test_constant_fold_frexp_mant_f64_nan(
188 ; CHECK-NEXT: ret double 0x7FF8000000000000
190 %val = call double @llvm.amdgcn.frexp.mant.f64(double 0x7FF8000000000000)
194 define float @test_constant_fold_frexp_mant_f32_inf() nounwind {
195 ; CHECK-LABEL: @test_constant_fold_frexp_mant_f32_inf(
196 ; CHECK-NEXT: ret float 0x7FF0000000000000
198 %val = call float @llvm.amdgcn.frexp.mant.f32(float 0x7FF0000000000000)
202 define double @test_constant_fold_frexp_mant_f64_inf() nounwind {
203 ; CHECK-LABEL: @test_constant_fold_frexp_mant_f64_inf(
204 ; CHECK-NEXT: ret double 0x7FF0000000000000
206 %val = call double @llvm.amdgcn.frexp.mant.f64(double 0x7FF0000000000000)
210 define float @test_constant_fold_frexp_mant_f32_ninf() nounwind {
211 ; CHECK-LABEL: @test_constant_fold_frexp_mant_f32_ninf(
212 ; CHECK-NEXT: ret float 0xFFF0000000000000
214 %val = call float @llvm.amdgcn.frexp.mant.f32(float 0xFFF0000000000000)
218 define double @test_constant_fold_frexp_mant_f64_ninf() nounwind {
219 ; CHECK-LABEL: @test_constant_fold_frexp_mant_f64_ninf(
220 ; CHECK-NEXT: ret double 0xFFF0000000000000
222 %val = call double @llvm.amdgcn.frexp.mant.f64(double 0xFFF0000000000000)
226 define float @test_constant_fold_frexp_mant_f32_max_num() nounwind {
227 ; CHECK-LABEL: @test_constant_fold_frexp_mant_f32_max_num(
228 ; CHECK-NEXT: ret float 0x3FEFFFFFE0000000
230 %val = call float @llvm.amdgcn.frexp.mant.f32(float 0x47EFFFFFE0000000)
234 define double @test_constant_fold_frexp_mant_f64_max_num() nounwind {
235 ; CHECK-LABEL: @test_constant_fold_frexp_mant_f64_max_num(
236 ; CHECK-NEXT: ret double 0x3FEFFFFFFFFFFFFF
238 %val = call double @llvm.amdgcn.frexp.mant.f64(double 0x7FEFFFFFFFFFFFFF)
242 define float @test_constant_fold_frexp_mant_f32_min_num() nounwind {
243 ; CHECK-LABEL: @test_constant_fold_frexp_mant_f32_min_num(
244 ; CHECK-NEXT: ret float 5.000000e-01
246 %val = call float @llvm.amdgcn.frexp.mant.f32(float 0x36A0000000000000)
250 define double @test_constant_fold_frexp_mant_f64_min_num() nounwind {
251 ; CHECK-LABEL: @test_constant_fold_frexp_mant_f64_min_num(
252 ; CHECK-NEXT: ret double 5.000000e-01
254 %val = call double @llvm.amdgcn.frexp.mant.f64(double 4.940656e-324)
259 ; --------------------------------------------------------------------
260 ; llvm.amdgcn.frexp.exp
261 ; --------------------------------------------------------------------
263 declare i32 @llvm.amdgcn.frexp.exp.f32(float) nounwind readnone
264 declare i32 @llvm.amdgcn.frexp.exp.f64(double) nounwind readnone
266 define i32 @test_constant_fold_frexp_exp_f32_undef() nounwind {
267 ; CHECK-LABEL: @test_constant_fold_frexp_exp_f32_undef(
268 ; CHECK-NEXT: ret i32 undef
270 %val = call i32 @llvm.amdgcn.frexp.exp.f32(float undef)
274 define i32 @test_constant_fold_frexp_exp_f64_undef() nounwind {
275 ; CHECK-LABEL: @test_constant_fold_frexp_exp_f64_undef(
276 ; CHECK-NEXT: ret i32 undef
278 %val = call i32 @llvm.amdgcn.frexp.exp.f64(double undef)
282 define i32 @test_constant_fold_frexp_exp_f32_0() nounwind {
283 ; CHECK-LABEL: @test_constant_fold_frexp_exp_f32_0(
284 ; CHECK-NEXT: ret i32 0
286 %val = call i32 @llvm.amdgcn.frexp.exp.f32(float 0.0)
290 define i32 @test_constant_fold_frexp_exp_f64_0() nounwind {
291 ; CHECK-LABEL: @test_constant_fold_frexp_exp_f64_0(
292 ; CHECK-NEXT: ret i32 0
294 %val = call i32 @llvm.amdgcn.frexp.exp.f64(double 0.0)
298 define i32 @test_constant_fold_frexp_exp_f32_n0() nounwind {
299 ; CHECK-LABEL: @test_constant_fold_frexp_exp_f32_n0(
300 ; CHECK-NEXT: ret i32 0
302 %val = call i32 @llvm.amdgcn.frexp.exp.f32(float -0.0)
306 define i32 @test_constant_fold_frexp_exp_f64_n0() nounwind {
307 ; CHECK-LABEL: @test_constant_fold_frexp_exp_f64_n0(
308 ; CHECK-NEXT: ret i32 0
310 %val = call i32 @llvm.amdgcn.frexp.exp.f64(double -0.0)
314 define i32 @test_constant_fold_frexp_exp_f32_1024() nounwind {
315 ; CHECK-LABEL: @test_constant_fold_frexp_exp_f32_1024(
316 ; CHECK-NEXT: ret i32 11
318 %val = call i32 @llvm.amdgcn.frexp.exp.f32(float 1024.0)
322 define i32 @test_constant_fold_frexp_exp_f64_1024() nounwind {
323 ; CHECK-LABEL: @test_constant_fold_frexp_exp_f64_1024(
324 ; CHECK-NEXT: ret i32 11
326 %val = call i32 @llvm.amdgcn.frexp.exp.f64(double 1024.0)
330 define i32 @test_constant_fold_frexp_exp_f32_n1024() nounwind {
331 ; CHECK-LABEL: @test_constant_fold_frexp_exp_f32_n1024(
332 ; CHECK-NEXT: ret i32 11
334 %val = call i32 @llvm.amdgcn.frexp.exp.f32(float -1024.0)
338 define i32 @test_constant_fold_frexp_exp_f64_n1024() nounwind {
339 ; CHECK-LABEL: @test_constant_fold_frexp_exp_f64_n1024(
340 ; CHECK-NEXT: ret i32 11
342 %val = call i32 @llvm.amdgcn.frexp.exp.f64(double -1024.0)
346 define i32 @test_constant_fold_frexp_exp_f32_1_1024() nounwind {
347 ; CHECK-LABEL: @test_constant_fold_frexp_exp_f32_1_1024(
348 ; CHECK-NEXT: ret i32 -9
350 %val = call i32 @llvm.amdgcn.frexp.exp.f32(float 0.0009765625)
354 define i32 @test_constant_fold_frexp_exp_f64_1_1024() nounwind {
355 ; CHECK-LABEL: @test_constant_fold_frexp_exp_f64_1_1024(
356 ; CHECK-NEXT: ret i32 -9
358 %val = call i32 @llvm.amdgcn.frexp.exp.f64(double 0.0009765625)
362 define i32 @test_constant_fold_frexp_exp_f32_nan() nounwind {
363 ; CHECK-LABEL: @test_constant_fold_frexp_exp_f32_nan(
364 ; CHECK-NEXT: ret i32 0
366 %val = call i32 @llvm.amdgcn.frexp.exp.f32(float 0x7FF8000000000000)
370 define i32 @test_constant_fold_frexp_exp_f64_nan() nounwind {
371 ; CHECK-LABEL: @test_constant_fold_frexp_exp_f64_nan(
372 ; CHECK-NEXT: ret i32 0
374 %val = call i32 @llvm.amdgcn.frexp.exp.f64(double 0x7FF8000000000000)
378 define i32 @test_constant_fold_frexp_exp_f32_inf() nounwind {
379 ; CHECK-LABEL: @test_constant_fold_frexp_exp_f32_inf(
380 ; CHECK-NEXT: ret i32 0
382 %val = call i32 @llvm.amdgcn.frexp.exp.f32(float 0x7FF0000000000000)
386 define i32 @test_constant_fold_frexp_exp_f64_inf() nounwind {
387 ; CHECK-LABEL: @test_constant_fold_frexp_exp_f64_inf(
388 ; CHECK-NEXT: ret i32 0
390 %val = call i32 @llvm.amdgcn.frexp.exp.f64(double 0x7FF0000000000000)
394 define i32 @test_constant_fold_frexp_exp_f32_ninf() nounwind {
395 ; CHECK-LABEL: @test_constant_fold_frexp_exp_f32_ninf(
396 ; CHECK-NEXT: ret i32 0
398 %val = call i32 @llvm.amdgcn.frexp.exp.f32(float 0xFFF0000000000000)
402 define i32 @test_constant_fold_frexp_exp_f64_ninf() nounwind {
403 ; CHECK-LABEL: @test_constant_fold_frexp_exp_f64_ninf(
404 ; CHECK-NEXT: ret i32 0
406 %val = call i32 @llvm.amdgcn.frexp.exp.f64(double 0xFFF0000000000000)
410 define i32 @test_constant_fold_frexp_exp_f32_max_num() nounwind {
411 ; CHECK-LABEL: @test_constant_fold_frexp_exp_f32_max_num(
412 ; CHECK-NEXT: ret i32 128
414 %val = call i32 @llvm.amdgcn.frexp.exp.f32(float 0x47EFFFFFE0000000)
418 define i32 @test_constant_fold_frexp_exp_f64_max_num() nounwind {
419 ; CHECK-LABEL: @test_constant_fold_frexp_exp_f64_max_num(
420 ; CHECK-NEXT: ret i32 1024
422 %val = call i32 @llvm.amdgcn.frexp.exp.f64(double 0x7FEFFFFFFFFFFFFF)
426 define i32 @test_constant_fold_frexp_exp_f32_min_num() nounwind {
427 ; CHECK-LABEL: @test_constant_fold_frexp_exp_f32_min_num(
428 ; CHECK-NEXT: ret i32 -148
430 %val = call i32 @llvm.amdgcn.frexp.exp.f32(float 0x36A0000000000000)
434 define i32 @test_constant_fold_frexp_exp_f64_min_num() nounwind {
435 ; CHECK-LABEL: @test_constant_fold_frexp_exp_f64_min_num(
436 ; CHECK-NEXT: ret i32 -1073
438 %val = call i32 @llvm.amdgcn.frexp.exp.f64(double 4.940656e-324)
442 ; --------------------------------------------------------------------
444 ; --------------------------------------------------------------------
446 declare i1 @llvm.amdgcn.class.f32(float, i32) nounwind readnone
447 declare i1 @llvm.amdgcn.class.f64(double, i32) nounwind readnone
449 define i1 @test_class_undef_mask_f32(float %x) nounwind {
450 ; CHECK-LABEL: @test_class_undef_mask_f32(
451 ; CHECK-NEXT: ret i1 false
453 %val = call i1 @llvm.amdgcn.class.f32(float %x, i32 undef)
457 define i1 @test_class_over_max_mask_f32(float %x) nounwind {
458 ; CHECK-LABEL: @test_class_over_max_mask_f32(
459 ; CHECK-NEXT: [[VAL:%.*]] = call i1 @llvm.amdgcn.class.f32(float [[X:%.*]], i32 1)
460 ; CHECK-NEXT: ret i1 [[VAL]]
462 %val = call i1 @llvm.amdgcn.class.f32(float %x, i32 1025)
466 define i1 @test_class_no_mask_f32(float %x) nounwind {
467 ; CHECK-LABEL: @test_class_no_mask_f32(
468 ; CHECK-NEXT: ret i1 false
470 %val = call i1 @llvm.amdgcn.class.f32(float %x, i32 0)
474 define i1 @test_class_full_mask_f32(float %x) nounwind {
475 ; CHECK-LABEL: @test_class_full_mask_f32(
476 ; CHECK-NEXT: ret i1 true
478 %val = call i1 @llvm.amdgcn.class.f32(float %x, i32 1023)
482 define i1 @test_class_undef_no_mask_f32() nounwind {
483 ; CHECK-LABEL: @test_class_undef_no_mask_f32(
484 ; CHECK-NEXT: ret i1 false
486 %val = call i1 @llvm.amdgcn.class.f32(float undef, i32 0)
490 define i1 @test_class_undef_full_mask_f32() nounwind {
491 ; CHECK-LABEL: @test_class_undef_full_mask_f32(
492 ; CHECK-NEXT: ret i1 true
494 %val = call i1 @llvm.amdgcn.class.f32(float undef, i32 1023)
498 define i1 @test_class_undef_val_f32() nounwind {
499 ; CHECK-LABEL: @test_class_undef_val_f32(
500 ; CHECK-NEXT: ret i1 undef
502 %val = call i1 @llvm.amdgcn.class.f32(float undef, i32 4)
506 define i1 @test_class_undef_undef_f32() nounwind {
507 ; CHECK-LABEL: @test_class_undef_undef_f32(
508 ; CHECK-NEXT: ret i1 undef
510 %val = call i1 @llvm.amdgcn.class.f32(float undef, i32 undef)
514 define i1 @test_class_var_mask_f32(float %x, i32 %mask) nounwind {
515 ; CHECK-LABEL: @test_class_var_mask_f32(
516 ; CHECK-NEXT: [[VAL:%.*]] = call i1 @llvm.amdgcn.class.f32(float [[X:%.*]], i32 [[MASK:%.*]])
517 ; CHECK-NEXT: ret i1 [[VAL]]
519 %val = call i1 @llvm.amdgcn.class.f32(float %x, i32 %mask)
523 define i1 @test_class_isnan_f32(float %x) nounwind {
524 ; CHECK-LABEL: @test_class_isnan_f32(
525 ; CHECK-NEXT: [[VAL:%.*]] = fcmp uno float [[X:%.*]], 0.000000e+00
526 ; CHECK-NEXT: ret i1 [[VAL]]
528 %val = call i1 @llvm.amdgcn.class.f32(float %x, i32 3)
532 define i1 @test_class_is_p0_n0_f32(float %x) nounwind {
533 ; CHECK-LABEL: @test_class_is_p0_n0_f32(
534 ; CHECK-NEXT: [[VAL:%.*]] = fcmp oeq float [[X:%.*]], 0.000000e+00
535 ; CHECK-NEXT: ret i1 [[VAL]]
537 %val = call i1 @llvm.amdgcn.class.f32(float %x, i32 96)
541 define i1 @test_constant_class_snan_test_snan_f64() nounwind {
542 ; CHECK-LABEL: @test_constant_class_snan_test_snan_f64(
543 ; CHECK-NEXT: ret i1 true
545 %val = call i1 @llvm.amdgcn.class.f64(double 0x7FF0000000000001, i32 1)
549 define i1 @test_constant_class_qnan_test_qnan_f64() nounwind {
550 ; CHECK-LABEL: @test_constant_class_qnan_test_qnan_f64(
551 ; CHECK-NEXT: ret i1 true
553 %val = call i1 @llvm.amdgcn.class.f64(double 0x7FF8000000000000, i32 2)
557 define i1 @test_constant_class_qnan_test_snan_f64() nounwind {
558 ; CHECK-LABEL: @test_constant_class_qnan_test_snan_f64(
559 ; CHECK-NEXT: ret i1 false
561 %val = call i1 @llvm.amdgcn.class.f64(double 0x7FF8000000000000, i32 1)
565 define i1 @test_constant_class_ninf_test_ninf_f64() nounwind {
566 ; CHECK-LABEL: @test_constant_class_ninf_test_ninf_f64(
567 ; CHECK-NEXT: ret i1 true
569 %val = call i1 @llvm.amdgcn.class.f64(double 0xFFF0000000000000, i32 4)
573 define i1 @test_constant_class_pinf_test_ninf_f64() nounwind {
574 ; CHECK-LABEL: @test_constant_class_pinf_test_ninf_f64(
575 ; CHECK-NEXT: ret i1 false
577 %val = call i1 @llvm.amdgcn.class.f64(double 0x7FF0000000000000, i32 4)
581 define i1 @test_constant_class_qnan_test_ninf_f64() nounwind {
582 ; CHECK-LABEL: @test_constant_class_qnan_test_ninf_f64(
583 ; CHECK-NEXT: ret i1 false
585 %val = call i1 @llvm.amdgcn.class.f64(double 0x7FF8000000000000, i32 4)
589 define i1 @test_constant_class_snan_test_ninf_f64() nounwind {
590 ; CHECK-LABEL: @test_constant_class_snan_test_ninf_f64(
591 ; CHECK-NEXT: ret i1 false
593 %val = call i1 @llvm.amdgcn.class.f64(double 0x7FF0000000000001, i32 4)
597 define i1 @test_constant_class_nnormal_test_nnormal_f64() nounwind {
598 ; CHECK-LABEL: @test_constant_class_nnormal_test_nnormal_f64(
599 ; CHECK-NEXT: ret i1 true
601 %val = call i1 @llvm.amdgcn.class.f64(double -1.0, i32 8)
605 define i1 @test_constant_class_pnormal_test_nnormal_f64() nounwind {
606 ; CHECK-LABEL: @test_constant_class_pnormal_test_nnormal_f64(
607 ; CHECK-NEXT: ret i1 false
609 %val = call i1 @llvm.amdgcn.class.f64(double 1.0, i32 8)
613 define i1 @test_constant_class_nsubnormal_test_nsubnormal_f64() nounwind {
614 ; CHECK-LABEL: @test_constant_class_nsubnormal_test_nsubnormal_f64(
615 ; CHECK-NEXT: ret i1 true
617 %val = call i1 @llvm.amdgcn.class.f64(double 0x800fffffffffffff, i32 16)
621 define i1 @test_constant_class_psubnormal_test_nsubnormal_f64() nounwind {
622 ; CHECK-LABEL: @test_constant_class_psubnormal_test_nsubnormal_f64(
623 ; CHECK-NEXT: ret i1 false
625 %val = call i1 @llvm.amdgcn.class.f64(double 0x000fffffffffffff, i32 16)
629 define i1 @test_constant_class_nzero_test_nzero_f64() nounwind {
630 ; CHECK-LABEL: @test_constant_class_nzero_test_nzero_f64(
631 ; CHECK-NEXT: ret i1 true
633 %val = call i1 @llvm.amdgcn.class.f64(double -0.0, i32 32)
637 define i1 @test_constant_class_pzero_test_nzero_f64() nounwind {
638 ; CHECK-LABEL: @test_constant_class_pzero_test_nzero_f64(
639 ; CHECK-NEXT: ret i1 false
641 %val = call i1 @llvm.amdgcn.class.f64(double 0.0, i32 32)
645 define i1 @test_constant_class_pzero_test_pzero_f64() nounwind {
646 ; CHECK-LABEL: @test_constant_class_pzero_test_pzero_f64(
647 ; CHECK-NEXT: ret i1 true
649 %val = call i1 @llvm.amdgcn.class.f64(double 0.0, i32 64)
653 define i1 @test_constant_class_nzero_test_pzero_f64() nounwind {
654 ; CHECK-LABEL: @test_constant_class_nzero_test_pzero_f64(
655 ; CHECK-NEXT: ret i1 false
657 %val = call i1 @llvm.amdgcn.class.f64(double -0.0, i32 64)
661 define i1 @test_constant_class_psubnormal_test_psubnormal_f64() nounwind {
662 ; CHECK-LABEL: @test_constant_class_psubnormal_test_psubnormal_f64(
663 ; CHECK-NEXT: ret i1 true
665 %val = call i1 @llvm.amdgcn.class.f64(double 0x000fffffffffffff, i32 128)
669 define i1 @test_constant_class_nsubnormal_test_psubnormal_f64() nounwind {
670 ; CHECK-LABEL: @test_constant_class_nsubnormal_test_psubnormal_f64(
671 ; CHECK-NEXT: ret i1 false
673 %val = call i1 @llvm.amdgcn.class.f64(double 0x800fffffffffffff, i32 128)
677 define i1 @test_constant_class_pnormal_test_pnormal_f64() nounwind {
678 ; CHECK-LABEL: @test_constant_class_pnormal_test_pnormal_f64(
679 ; CHECK-NEXT: ret i1 true
681 %val = call i1 @llvm.amdgcn.class.f64(double 1.0, i32 256)
685 define i1 @test_constant_class_nnormal_test_pnormal_f64() nounwind {
686 ; CHECK-LABEL: @test_constant_class_nnormal_test_pnormal_f64(
687 ; CHECK-NEXT: ret i1 false
689 %val = call i1 @llvm.amdgcn.class.f64(double -1.0, i32 256)
693 define i1 @test_constant_class_pinf_test_pinf_f64() nounwind {
694 ; CHECK-LABEL: @test_constant_class_pinf_test_pinf_f64(
695 ; CHECK-NEXT: ret i1 true
697 %val = call i1 @llvm.amdgcn.class.f64(double 0x7FF0000000000000, i32 512)
701 define i1 @test_constant_class_ninf_test_pinf_f64() nounwind {
702 ; CHECK-LABEL: @test_constant_class_ninf_test_pinf_f64(
703 ; CHECK-NEXT: ret i1 false
705 %val = call i1 @llvm.amdgcn.class.f64(double 0xFFF0000000000000, i32 512)
709 define i1 @test_constant_class_qnan_test_pinf_f64() nounwind {
710 ; CHECK-LABEL: @test_constant_class_qnan_test_pinf_f64(
711 ; CHECK-NEXT: ret i1 false
713 %val = call i1 @llvm.amdgcn.class.f64(double 0x7FF8000000000000, i32 512)
717 define i1 @test_constant_class_snan_test_pinf_f64() nounwind {
718 ; CHECK-LABEL: @test_constant_class_snan_test_pinf_f64(
719 ; CHECK-NEXT: ret i1 false
721 %val = call i1 @llvm.amdgcn.class.f64(double 0x7FF0000000000001, i32 512)
725 define i1 @test_class_is_snan_nnan_src(float %x) {
726 ; CHECK-LABEL: @test_class_is_snan_nnan_src(
727 ; CHECK-NEXT: ret i1 false
729 %nnan = fadd nnan float %x, 1.0
730 %class = call i1 @llvm.amdgcn.class.f32(float %nnan, i32 1)
734 define i1 @test_class_is_qnan_nnan_src(float %x) {
735 ; CHECK-LABEL: @test_class_is_qnan_nnan_src(
736 ; CHECK-NEXT: ret i1 false
738 %nnan = fadd nnan float %x, 1.0
739 %class = call i1 @llvm.amdgcn.class.f32(float %nnan, i32 2)
743 define i1 @test_class_is_nan_nnan_src(float %x) {
744 ; CHECK-LABEL: @test_class_is_nan_nnan_src(
745 ; CHECK-NEXT: ret i1 false
747 %nnan = fadd nnan float %x, 1.0
748 %class = call i1 @llvm.amdgcn.class.f32(float %nnan, i32 3)
752 define i1 @test_class_is_nan_other_nnan_src(float %x) {
753 ; CHECK-LABEL: @test_class_is_nan_other_nnan_src(
754 ; CHECK-NEXT: [[NNAN:%.*]] = fadd nnan float [[X:%.*]], 1.000000e+00
755 ; CHECK-NEXT: [[CLASS:%.*]] = call i1 @llvm.amdgcn.class.f32(float [[NNAN]], i32 264)
756 ; CHECK-NEXT: ret i1 [[CLASS]]
758 %nnan = fadd nnan float %x, 1.0
759 %class = call i1 @llvm.amdgcn.class.f32(float %nnan, i32 267)
763 ; --------------------------------------------------------------------
765 ; --------------------------------------------------------------------
766 declare float @llvm.amdgcn.cos.f32(float) nounwind readnone
767 declare float @llvm.fabs.f32(float) nounwind readnone
769 define float @cos_fneg_f32(float %x) {
770 ; CHECK-LABEL: @cos_fneg_f32(
771 ; CHECK-NEXT: [[COS:%.*]] = call float @llvm.amdgcn.cos.f32(float [[X:%.*]])
772 ; CHECK-NEXT: ret float [[COS]]
774 %x.fneg = fsub float -0.0, %x
775 %cos = call float @llvm.amdgcn.cos.f32(float %x.fneg)
779 define float @cos_unary_fneg_f32(float %x) {
780 ; CHECK-LABEL: @cos_unary_fneg_f32(
781 ; CHECK-NEXT: [[COS:%.*]] = call float @llvm.amdgcn.cos.f32(float [[X:%.*]])
782 ; CHECK-NEXT: ret float [[COS]]
784 %x.fneg = fneg float %x
785 %cos = call float @llvm.amdgcn.cos.f32(float %x.fneg)
789 define float @cos_fabs_f32(float %x) {
790 ; CHECK-LABEL: @cos_fabs_f32(
791 ; CHECK-NEXT: [[COS:%.*]] = call float @llvm.amdgcn.cos.f32(float [[X:%.*]])
792 ; CHECK-NEXT: ret float [[COS]]
794 %x.fabs = call float @llvm.fabs.f32(float %x)
795 %cos = call float @llvm.amdgcn.cos.f32(float %x.fabs)
799 define float @cos_fabs_fneg_f32(float %x) {
800 ; CHECK-LABEL: @cos_fabs_fneg_f32(
801 ; CHECK-NEXT: [[COS:%.*]] = call float @llvm.amdgcn.cos.f32(float [[X:%.*]])
802 ; CHECK-NEXT: ret float [[COS]]
804 %x.fabs = call float @llvm.fabs.f32(float %x)
805 %x.fabs.fneg = fsub float -0.0, %x.fabs
806 %cos = call float @llvm.amdgcn.cos.f32(float %x.fabs.fneg)
810 define float @cos_fabs_unary_fneg_f32(float %x) {
811 ; CHECK-LABEL: @cos_fabs_unary_fneg_f32(
812 ; CHECK-NEXT: [[COS:%.*]] = call float @llvm.amdgcn.cos.f32(float [[X:%.*]])
813 ; CHECK-NEXT: ret float [[COS]]
815 %x.fabs = call float @llvm.fabs.f32(float %x)
816 %x.fabs.fneg = fneg float %x.fabs
817 %cos = call float @llvm.amdgcn.cos.f32(float %x.fabs.fneg)
821 ; --------------------------------------------------------------------
822 ; llvm.amdgcn.cvt.pkrtz
823 ; --------------------------------------------------------------------
825 declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) nounwind readnone
827 define <2 x half> @vars_lhs_cvt_pkrtz(float %x, float %y) {
828 ; CHECK-LABEL: @vars_lhs_cvt_pkrtz(
829 ; CHECK-NEXT: [[CVT:%.*]] = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float [[X:%.*]], float [[Y:%.*]])
830 ; CHECK-NEXT: ret <2 x half> [[CVT]]
832 %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %x, float %y)
836 define <2 x half> @constant_lhs_cvt_pkrtz(float %y) {
837 ; CHECK-LABEL: @constant_lhs_cvt_pkrtz(
838 ; CHECK-NEXT: [[CVT:%.*]] = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float 0.000000e+00, float [[Y:%.*]])
839 ; CHECK-NEXT: ret <2 x half> [[CVT]]
841 %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float 0.0, float %y)
845 define <2 x half> @constant_rhs_cvt_pkrtz(float %x) {
846 ; CHECK-LABEL: @constant_rhs_cvt_pkrtz(
847 ; CHECK-NEXT: [[CVT:%.*]] = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float [[X:%.*]], float 0.000000e+00)
848 ; CHECK-NEXT: ret <2 x half> [[CVT]]
850 %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %x, float 0.0)
854 define <2 x half> @undef_lhs_cvt_pkrtz(float %y) {
855 ; CHECK-LABEL: @undef_lhs_cvt_pkrtz(
856 ; CHECK-NEXT: [[CVT:%.*]] = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float undef, float [[Y:%.*]])
857 ; CHECK-NEXT: ret <2 x half> [[CVT]]
859 %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float undef, float %y)
863 define <2 x half> @undef_rhs_cvt_pkrtz(float %x) {
864 ; CHECK-LABEL: @undef_rhs_cvt_pkrtz(
865 ; CHECK-NEXT: [[CVT:%.*]] = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float [[X:%.*]], float undef)
866 ; CHECK-NEXT: ret <2 x half> [[CVT]]
868 %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %x, float undef)
872 define <2 x half> @undef_cvt_pkrtz() {
873 ; CHECK-LABEL: @undef_cvt_pkrtz(
874 ; CHECK-NEXT: ret <2 x half> undef
876 %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float undef, float undef)
880 define <2 x half> @constant_splat0_cvt_pkrtz() {
881 ; CHECK-LABEL: @constant_splat0_cvt_pkrtz(
882 ; CHECK-NEXT: ret <2 x half> zeroinitializer
884 %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float 0.0, float 0.0)
888 define <2 x half> @constant_cvt_pkrtz() {
889 ; CHECK-LABEL: @constant_cvt_pkrtz(
890 ; CHECK-NEXT: ret <2 x half> <half 0xH4000, half 0xH4400>
892 %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float 2.0, float 4.0)
896 ; Test constant values where rtz changes result
897 define <2 x half> @constant_rtz_pkrtz() {
898 ; CHECK-LABEL: @constant_rtz_pkrtz(
899 ; CHECK-NEXT: ret <2 x half> <half 0xH7BFF, half 0xH7BFF>
901 %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float 65535.0, float 65535.0)
905 ; --------------------------------------------------------------------
906 ; llvm.amdgcn.cvt.pknorm.i16
907 ; --------------------------------------------------------------------
909 declare <2 x i16> @llvm.amdgcn.cvt.pknorm.i16(float, float) nounwind readnone
911 define <2 x i16> @undef_lhs_cvt_pknorm_i16(float %y) {
912 ; CHECK-LABEL: @undef_lhs_cvt_pknorm_i16(
913 ; CHECK-NEXT: [[CVT:%.*]] = call <2 x i16> @llvm.amdgcn.cvt.pknorm.i16(float undef, float [[Y:%.*]])
914 ; CHECK-NEXT: ret <2 x i16> [[CVT]]
916 %cvt = call <2 x i16> @llvm.amdgcn.cvt.pknorm.i16(float undef, float %y)
920 define <2 x i16> @undef_rhs_cvt_pknorm_i16(float %x) {
921 ; CHECK-LABEL: @undef_rhs_cvt_pknorm_i16(
922 ; CHECK-NEXT: [[CVT:%.*]] = call <2 x i16> @llvm.amdgcn.cvt.pknorm.i16(float [[X:%.*]], float undef)
923 ; CHECK-NEXT: ret <2 x i16> [[CVT]]
925 %cvt = call <2 x i16> @llvm.amdgcn.cvt.pknorm.i16(float %x, float undef)
929 define <2 x i16> @undef_cvt_pknorm_i16() {
930 ; CHECK-LABEL: @undef_cvt_pknorm_i16(
931 ; CHECK-NEXT: ret <2 x i16> undef
933 %cvt = call <2 x i16> @llvm.amdgcn.cvt.pknorm.i16(float undef, float undef)
937 ; --------------------------------------------------------------------
938 ; llvm.amdgcn.cvt.pknorm.u16
939 ; --------------------------------------------------------------------
941 declare <2 x i16> @llvm.amdgcn.cvt.pknorm.u16(float, float) nounwind readnone
943 define <2 x i16> @undef_lhs_cvt_pknorm_u16(float %y) {
944 ; CHECK-LABEL: @undef_lhs_cvt_pknorm_u16(
945 ; CHECK-NEXT: [[CVT:%.*]] = call <2 x i16> @llvm.amdgcn.cvt.pknorm.u16(float undef, float [[Y:%.*]])
946 ; CHECK-NEXT: ret <2 x i16> [[CVT]]
948 %cvt = call <2 x i16> @llvm.amdgcn.cvt.pknorm.u16(float undef, float %y)
952 define <2 x i16> @undef_rhs_cvt_pknorm_u16(float %x) {
953 ; CHECK-LABEL: @undef_rhs_cvt_pknorm_u16(
954 ; CHECK-NEXT: [[CVT:%.*]] = call <2 x i16> @llvm.amdgcn.cvt.pknorm.u16(float [[X:%.*]], float undef)
955 ; CHECK-NEXT: ret <2 x i16> [[CVT]]
957 %cvt = call <2 x i16> @llvm.amdgcn.cvt.pknorm.u16(float %x, float undef)
961 define <2 x i16> @undef_cvt_pknorm_u16() {
962 ; CHECK-LABEL: @undef_cvt_pknorm_u16(
963 ; CHECK-NEXT: ret <2 x i16> undef
965 %cvt = call <2 x i16> @llvm.amdgcn.cvt.pknorm.u16(float undef, float undef)
969 ; --------------------------------------------------------------------
970 ; llvm.amdgcn.cvt.pk.i16
971 ; --------------------------------------------------------------------
973 declare <2 x i16> @llvm.amdgcn.cvt.pk.i16(i32, i32) nounwind readnone
975 define <2 x i16> @undef_lhs_cvt_pk_i16(i32 %y) {
976 ; CHECK-LABEL: @undef_lhs_cvt_pk_i16(
977 ; CHECK-NEXT: [[CVT:%.*]] = call <2 x i16> @llvm.amdgcn.cvt.pk.i16(i32 undef, i32 [[Y:%.*]])
978 ; CHECK-NEXT: ret <2 x i16> [[CVT]]
980 %cvt = call <2 x i16> @llvm.amdgcn.cvt.pk.i16(i32 undef, i32 %y)
984 define <2 x i16> @undef_rhs_cvt_pk_i16(i32 %x) {
985 ; CHECK-LABEL: @undef_rhs_cvt_pk_i16(
986 ; CHECK-NEXT: [[CVT:%.*]] = call <2 x i16> @llvm.amdgcn.cvt.pk.i16(i32 [[X:%.*]], i32 undef)
987 ; CHECK-NEXT: ret <2 x i16> [[CVT]]
989 %cvt = call <2 x i16> @llvm.amdgcn.cvt.pk.i16(i32 %x, i32 undef)
993 define <2 x i16> @undef_cvt_pk_i16() {
994 ; CHECK-LABEL: @undef_cvt_pk_i16(
995 ; CHECK-NEXT: ret <2 x i16> undef
997 %cvt = call <2 x i16> @llvm.amdgcn.cvt.pk.i16(i32 undef, i32 undef)
1001 ; --------------------------------------------------------------------
1002 ; llvm.amdgcn.cvt.pk.u16
1003 ; --------------------------------------------------------------------
1005 declare <2 x i16> @llvm.amdgcn.cvt.pk.u16(i32, i32) nounwind readnone
1007 define <2 x i16> @undef_lhs_cvt_pk_u16(i32 %y) {
1008 ; CHECK-LABEL: @undef_lhs_cvt_pk_u16(
1009 ; CHECK-NEXT: [[CVT:%.*]] = call <2 x i16> @llvm.amdgcn.cvt.pk.u16(i32 undef, i32 [[Y:%.*]])
1010 ; CHECK-NEXT: ret <2 x i16> [[CVT]]
1012 %cvt = call <2 x i16> @llvm.amdgcn.cvt.pk.u16(i32 undef, i32 %y)
1016 define <2 x i16> @undef_rhs_cvt_pk_u16(i32 %x) {
1017 ; CHECK-LABEL: @undef_rhs_cvt_pk_u16(
1018 ; CHECK-NEXT: [[CVT:%.*]] = call <2 x i16> @llvm.amdgcn.cvt.pk.u16(i32 [[X:%.*]], i32 undef)
1019 ; CHECK-NEXT: ret <2 x i16> [[CVT]]
1021 %cvt = call <2 x i16> @llvm.amdgcn.cvt.pk.u16(i32 %x, i32 undef)
1025 define <2 x i16> @undef_cvt_pk_u16() {
1026 ; CHECK-LABEL: @undef_cvt_pk_u16(
1027 ; CHECK-NEXT: ret <2 x i16> undef
1029 %cvt = call <2 x i16> @llvm.amdgcn.cvt.pk.u16(i32 undef, i32 undef)
1033 ; --------------------------------------------------------------------
1035 ; --------------------------------------------------------------------
1037 declare i32 @llvm.amdgcn.ubfe.i32(i32, i32, i32) nounwind readnone
1038 declare i64 @llvm.amdgcn.ubfe.i64(i64, i32, i32) nounwind readnone
1040 define i32 @ubfe_var_i32(i32 %src, i32 %offset, i32 %width) {
1041 ; CHECK-LABEL: @ubfe_var_i32(
1042 ; CHECK-NEXT: [[BFE:%.*]] = call i32 @llvm.amdgcn.ubfe.i32(i32 [[SRC:%.*]], i32 [[OFFSET:%.*]], i32 [[WIDTH:%.*]])
1043 ; CHECK-NEXT: ret i32 [[BFE]]
1045 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 %offset, i32 %width)
1049 define i32 @ubfe_clear_high_bits_constant_offset_i32(i32 %src, i32 %width) {
1050 ; CHECK-LABEL: @ubfe_clear_high_bits_constant_offset_i32(
1051 ; CHECK-NEXT: [[BFE:%.*]] = call i32 @llvm.amdgcn.ubfe.i32(i32 [[SRC:%.*]], i32 5, i32 [[WIDTH:%.*]])
1052 ; CHECK-NEXT: ret i32 [[BFE]]
1054 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 133, i32 %width)
1058 define i32 @ubfe_clear_high_bits_constant_width_i32(i32 %src, i32 %offset) {
1059 ; CHECK-LABEL: @ubfe_clear_high_bits_constant_width_i32(
1060 ; CHECK-NEXT: [[BFE:%.*]] = call i32 @llvm.amdgcn.ubfe.i32(i32 [[SRC:%.*]], i32 [[OFFSET:%.*]], i32 5)
1061 ; CHECK-NEXT: ret i32 [[BFE]]
1063 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 %offset, i32 133)
1067 define i32 @ubfe_width_0(i32 %src, i32 %offset) {
1068 ; CHECK-LABEL: @ubfe_width_0(
1069 ; CHECK-NEXT: ret i32 0
1071 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 %offset, i32 0)
1075 define i32 @ubfe_width_31(i32 %src, i32 %offset) {
1076 ; CHECK-LABEL: @ubfe_width_31(
1077 ; CHECK-NEXT: [[BFE:%.*]] = call i32 @llvm.amdgcn.ubfe.i32(i32 [[SRC:%.*]], i32 [[OFFSET:%.*]], i32 31)
1078 ; CHECK-NEXT: ret i32 [[BFE]]
1080 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 %offset, i32 31)
1084 define i32 @ubfe_width_32(i32 %src, i32 %offset) {
1085 ; CHECK-LABEL: @ubfe_width_32(
1086 ; CHECK-NEXT: ret i32 0
1088 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 %offset, i32 32)
1092 define i32 @ubfe_width_33(i32 %src, i32 %offset) {
1093 ; CHECK-LABEL: @ubfe_width_33(
1094 ; CHECK-NEXT: [[BFE:%.*]] = call i32 @llvm.amdgcn.ubfe.i32(i32 [[SRC:%.*]], i32 [[OFFSET:%.*]], i32 1)
1095 ; CHECK-NEXT: ret i32 [[BFE]]
1097 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 %offset, i32 33)
1101 define i32 @ubfe_offset_33(i32 %src, i32 %width) {
1102 ; CHECK-LABEL: @ubfe_offset_33(
1103 ; CHECK-NEXT: [[BFE:%.*]] = call i32 @llvm.amdgcn.ubfe.i32(i32 [[SRC:%.*]], i32 1, i32 [[WIDTH:%.*]])
1104 ; CHECK-NEXT: ret i32 [[BFE]]
1106 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 33, i32 %width)
1110 define i32 @ubfe_offset_0(i32 %src, i32 %width) {
1111 ; CHECK-LABEL: @ubfe_offset_0(
1112 ; CHECK-NEXT: [[BFE:%.*]] = call i32 @llvm.amdgcn.ubfe.i32(i32 [[SRC:%.*]], i32 0, i32 [[WIDTH:%.*]])
1113 ; CHECK-NEXT: ret i32 [[BFE]]
1115 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 0, i32 %width)
1119 define i32 @ubfe_offset_32(i32 %src, i32 %width) {
1120 ; CHECK-LABEL: @ubfe_offset_32(
1121 ; CHECK-NEXT: [[BFE:%.*]] = call i32 @llvm.amdgcn.ubfe.i32(i32 [[SRC:%.*]], i32 0, i32 [[WIDTH:%.*]])
1122 ; CHECK-NEXT: ret i32 [[BFE]]
1124 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 32, i32 %width)
1128 define i32 @ubfe_offset_31(i32 %src, i32 %width) {
1129 ; CHECK-LABEL: @ubfe_offset_31(
1130 ; CHECK-NEXT: [[BFE:%.*]] = call i32 @llvm.amdgcn.ubfe.i32(i32 [[SRC:%.*]], i32 31, i32 [[WIDTH:%.*]])
1131 ; CHECK-NEXT: ret i32 [[BFE]]
1133 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 31, i32 %width)
1137 define i32 @ubfe_offset_0_width_0(i32 %src) {
1138 ; CHECK-LABEL: @ubfe_offset_0_width_0(
1139 ; CHECK-NEXT: ret i32 0
1141 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 0, i32 0)
1145 define i32 @ubfe_offset_0_width_3(i32 %src) {
1146 ; CHECK-LABEL: @ubfe_offset_0_width_3(
1147 ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[SRC:%.*]], 7
1148 ; CHECK-NEXT: ret i32 [[TMP1]]
1150 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 0, i32 3)
1154 define i32 @ubfe_offset_3_width_1(i32 %src) {
1155 ; CHECK-LABEL: @ubfe_offset_3_width_1(
1156 ; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[SRC:%.*]], 3
1157 ; CHECK-NEXT: [[BFE:%.*]] = and i32 [[TMP1]], 1
1158 ; CHECK-NEXT: ret i32 [[BFE]]
1160 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 3, i32 1)
1164 define i32 @ubfe_offset_3_width_4(i32 %src) {
1165 ; CHECK-LABEL: @ubfe_offset_3_width_4(
1166 ; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[SRC:%.*]], 3
1167 ; CHECK-NEXT: [[BFE:%.*]] = and i32 [[TMP1]], 15
1168 ; CHECK-NEXT: ret i32 [[BFE]]
1170 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 3, i32 4)
1174 define i32 @ubfe_0_0_0() {
1175 ; CHECK-LABEL: @ubfe_0_0_0(
1176 ; CHECK-NEXT: ret i32 0
1178 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 0, i32 0, i32 0)
1182 define i32 @ubfe_neg1_5_7() {
1183 ; CHECK-LABEL: @ubfe_neg1_5_7(
1184 ; CHECK-NEXT: ret i32 127
1186 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 -1, i32 5, i32 7)
1190 define i32 @ubfe_undef_src_i32(i32 %offset, i32 %width) {
1191 ; CHECK-LABEL: @ubfe_undef_src_i32(
1192 ; CHECK-NEXT: ret i32 undef
1194 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 undef, i32 %offset, i32 %width)
1198 define i32 @ubfe_undef_offset_i32(i32 %src, i32 %width) {
1199 ; CHECK-LABEL: @ubfe_undef_offset_i32(
1200 ; CHECK-NEXT: [[BFE:%.*]] = call i32 @llvm.amdgcn.ubfe.i32(i32 [[SRC:%.*]], i32 undef, i32 [[WIDTH:%.*]])
1201 ; CHECK-NEXT: ret i32 [[BFE]]
1203 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 undef, i32 %width)
1207 define i32 @ubfe_undef_width_i32(i32 %src, i32 %offset) {
1208 ; CHECK-LABEL: @ubfe_undef_width_i32(
1209 ; CHECK-NEXT: [[BFE:%.*]] = call i32 @llvm.amdgcn.ubfe.i32(i32 [[SRC:%.*]], i32 [[OFFSET:%.*]], i32 undef)
1210 ; CHECK-NEXT: ret i32 [[BFE]]
1212 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 %offset, i32 undef)
1216 define i64 @ubfe_offset_33_width_4_i64(i64 %src) {
1217 ; CHECK-LABEL: @ubfe_offset_33_width_4_i64(
1218 ; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[SRC:%.*]], 33
1219 ; CHECK-NEXT: [[BFE:%.*]] = and i64 [[TMP1]], 15
1220 ; CHECK-NEXT: ret i64 [[BFE]]
1222 %bfe = call i64 @llvm.amdgcn.ubfe.i64(i64 %src, i32 33, i32 4)
1226 define i64 @ubfe_offset_0_i64(i64 %src, i32 %width) {
1227 ; CHECK-LABEL: @ubfe_offset_0_i64(
1228 ; CHECK-NEXT: [[BFE:%.*]] = call i64 @llvm.amdgcn.ubfe.i64(i64 [[SRC:%.*]], i32 0, i32 [[WIDTH:%.*]])
1229 ; CHECK-NEXT: ret i64 [[BFE]]
1231 %bfe = call i64 @llvm.amdgcn.ubfe.i64(i64 %src, i32 0, i32 %width)
1235 define i64 @ubfe_offset_32_width_32_i64(i64 %src) {
1236 ; CHECK-LABEL: @ubfe_offset_32_width_32_i64(
1237 ; CHECK-NEXT: [[BFE:%.*]] = lshr i64 [[SRC:%.*]], 32
1238 ; CHECK-NEXT: ret i64 [[BFE]]
1240 %bfe = call i64 @llvm.amdgcn.ubfe.i64(i64 %src, i32 32, i32 32)
1244 ; --------------------------------------------------------------------
1246 ; --------------------------------------------------------------------
1248 declare i32 @llvm.amdgcn.sbfe.i32(i32, i32, i32) nounwind readnone
1249 declare i64 @llvm.amdgcn.sbfe.i64(i64, i32, i32) nounwind readnone
1251 define i32 @sbfe_offset_31(i32 %src, i32 %width) {
1252 ; CHECK-LABEL: @sbfe_offset_31(
1253 ; CHECK-NEXT: [[BFE:%.*]] = call i32 @llvm.amdgcn.sbfe.i32(i32 [[SRC:%.*]], i32 31, i32 [[WIDTH:%.*]])
1254 ; CHECK-NEXT: ret i32 [[BFE]]
1256 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %src, i32 31, i32 %width)
1260 define i32 @sbfe_neg1_5_7() {
1261 ; CHECK-LABEL: @sbfe_neg1_5_7(
1262 ; CHECK-NEXT: ret i32 -1
1264 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 -1, i32 5, i32 7)
1268 define i64 @sbfe_offset_32_width_32_i64(i64 %src) {
1269 ; CHECK-LABEL: @sbfe_offset_32_width_32_i64(
1270 ; CHECK-NEXT: [[BFE:%.*]] = ashr i64 [[SRC:%.*]], 32
1271 ; CHECK-NEXT: ret i64 [[BFE]]
1273 %bfe = call i64 @llvm.amdgcn.sbfe.i64(i64 %src, i32 32, i32 32)
1277 ; --------------------------------------------------------------------
1279 ; --------------------------------------------------------------------
1281 declare void @llvm.amdgcn.exp.f32(i32 immarg, i32 immarg, float, float, float, float, i1 immarg, i1 immarg) nounwind inaccessiblememonly
1286 define void @exp_disabled_inputs_to_undef(float %x, float %y, float %z, float %w) {
1287 ; enable src0..src3 constants
1288 ; CHECK-LABEL: @exp_disabled_inputs_to_undef(
1289 ; CHECK-NEXT: call void @llvm.amdgcn.exp.f32(i32 0, i32 1, float 1.000000e+00, float undef, float undef, float undef, i1 true, i1 false)
1290 ; CHECK-NEXT: call void @llvm.amdgcn.exp.f32(i32 0, i32 2, float undef, float 2.000000e+00, float undef, float undef, i1 true, i1 false)
1291 ; CHECK-NEXT: call void @llvm.amdgcn.exp.f32(i32 0, i32 4, float undef, float undef, float 5.000000e-01, float undef, i1 true, i1 false)
1292 ; CHECK-NEXT: call void @llvm.amdgcn.exp.f32(i32 0, i32 8, float undef, float undef, float undef, float 4.000000e+00, i1 true, i1 false)
1293 ; CHECK-NEXT: call void @llvm.amdgcn.exp.f32(i32 0, i32 1, float [[X:%.*]], float undef, float undef, float undef, i1 true, i1 false)
1294 ; CHECK-NEXT: call void @llvm.amdgcn.exp.f32(i32 0, i32 2, float undef, float [[Y:%.*]], float undef, float undef, i1 true, i1 false)
1295 ; CHECK-NEXT: call void @llvm.amdgcn.exp.f32(i32 0, i32 4, float undef, float undef, float [[Z:%.*]], float undef, i1 true, i1 false)
1296 ; CHECK-NEXT: call void @llvm.amdgcn.exp.f32(i32 0, i32 8, float undef, float undef, float undef, float [[W:%.*]], i1 true, i1 false)
1297 ; CHECK-NEXT: call void @llvm.amdgcn.exp.f32(i32 0, i32 0, float undef, float undef, float undef, float undef, i1 true, i1 false)
1298 ; CHECK-NEXT: call void @llvm.amdgcn.exp.f32(i32 0, i32 3, float 1.000000e+00, float 2.000000e+00, float undef, float undef, i1 true, i1 false)
1299 ; CHECK-NEXT: call void @llvm.amdgcn.exp.f32(i32 0, i32 5, float 1.000000e+00, float undef, float 5.000000e-01, float undef, i1 true, i1 false)
1300 ; CHECK-NEXT: call void @llvm.amdgcn.exp.f32(i32 0, i32 9, float 1.000000e+00, float undef, float undef, float 4.000000e+00, i1 false, i1 false)
1301 ; CHECK-NEXT: call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float 1.000000e+00, float 2.000000e+00, float 5.000000e-01, float 4.000000e+00, i1 false, i1 false)
1302 ; CHECK-NEXT: ret void
1304 call void @llvm.amdgcn.exp.f32(i32 0, i32 1, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
1305 call void @llvm.amdgcn.exp.f32(i32 0, i32 2, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
1306 call void @llvm.amdgcn.exp.f32(i32 0, i32 4, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
1307 call void @llvm.amdgcn.exp.f32(i32 0, i32 8, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
1309 ; enable src0..src3 variables
1310 call void @llvm.amdgcn.exp.f32(i32 0, i32 1, float %x, float %y, float %z, float %w, i1 true, i1 false)
1311 call void @llvm.amdgcn.exp.f32(i32 0, i32 2, float %x, float %y, float %z, float %w, i1 true, i1 false)
1312 call void @llvm.amdgcn.exp.f32(i32 0, i32 4, float %x, float %y, float %z, float %w, i1 true, i1 false)
1313 call void @llvm.amdgcn.exp.f32(i32 0, i32 8, float %x, float %y, float %z, float %w, i1 true, i1 false)
1316 call void @llvm.amdgcn.exp.f32(i32 0, i32 0, float %x, float %y, float %z, float %w, i1 true, i1 false)
1318 ; enable different source combinations
1319 call void @llvm.amdgcn.exp.f32(i32 0, i32 3, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
1320 call void @llvm.amdgcn.exp.f32(i32 0, i32 5, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
1321 call void @llvm.amdgcn.exp.f32(i32 0, i32 9, float 1.0, float 2.0, float 0.5, float 4.0, i1 false, i1 false)
1322 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 false, i1 false)
1327 ; --------------------------------------------------------------------
1328 ; llvm.amdgcn.exp.compr
1329 ; --------------------------------------------------------------------
1331 declare void @llvm.amdgcn.exp.compr.v2f16(i32 immarg, i32 immarg, <2 x half>, <2 x half>, i1 immarg, i1 immarg) nounwind inaccessiblememonly
1335 define void @exp_compr_disabled_inputs_to_undef(<2 x half> %xy, <2 x half> %zw) {
1336 ; CHECK-LABEL: @exp_compr_disabled_inputs_to_undef(
1337 ; CHECK-NEXT: call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 0, <2 x half> undef, <2 x half> undef, i1 true, i1 false)
1338 ; CHECK-NEXT: call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 1, <2 x half> <half 0xH3C00, half 0xH4000>, <2 x half> undef, i1 true, i1 false)
1339 ; CHECK-NEXT: call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 2, <2 x half> <half 0xH3C00, half 0xH4000>, <2 x half> undef, i1 true, i1 false)
1340 ; CHECK-NEXT: call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 3, <2 x half> <half 0xH3C00, half 0xH4000>, <2 x half> undef, i1 true, i1 false)
1341 ; CHECK-NEXT: call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 0, <2 x half> undef, <2 x half> undef, i1 true, i1 false)
1342 ; CHECK-NEXT: call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 1, <2 x half> [[XY:%.*]], <2 x half> undef, i1 true, i1 false)
1343 ; CHECK-NEXT: call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 2, <2 x half> [[XY]], <2 x half> undef, i1 true, i1 false)
1344 ; CHECK-NEXT: call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 3, <2 x half> [[XY]], <2 x half> undef, i1 true, i1 false)
1345 ; CHECK-NEXT: call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 12, <2 x half> undef, <2 x half> [[ZW:%.*]], i1 true, i1 false)
1346 ; CHECK-NEXT: call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 15, <2 x half> [[XY]], <2 x half> [[ZW]], i1 true, i1 false)
1347 ; CHECK-NEXT: ret void
1349 call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 0, <2 x half> <half 1.0, half 2.0>, <2 x half> <half 0.5, half 4.0>, i1 true, i1 false)
1350 call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 1, <2 x half> <half 1.0, half 2.0>, <2 x half> <half 0.5, half 4.0>, i1 true, i1 false)
1351 call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 2, <2 x half> <half 1.0, half 2.0>, <2 x half> <half 0.5, half 4.0>, i1 true, i1 false)
1352 call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 3, <2 x half> <half 1.0, half 2.0>, <2 x half> <half 0.5, half 4.0>, i1 true, i1 false)
1354 call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 0, <2 x half> %xy, <2 x half> %zw, i1 true, i1 false)
1355 call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 1, <2 x half> %xy, <2 x half> %zw, i1 true, i1 false)
1356 call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 2, <2 x half> %xy, <2 x half> %zw, i1 true, i1 false)
1357 call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 3, <2 x half> %xy, <2 x half> %zw, i1 true, i1 false)
1359 call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 12, <2 x half> %xy, <2 x half> %zw, i1 true, i1 false)
1360 call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 15, <2 x half> %xy, <2 x half> %zw, i1 true, i1 false)
1364 ; --------------------------------------------------------------------
1366 ; --------------------------------------------------------------------
1368 declare float @llvm.amdgcn.fmed3.f32(float, float, float) nounwind readnone
1370 define float @fmed3_f32(float %x, float %y, float %z) {
1371 ; CHECK-LABEL: @fmed3_f32(
1372 ; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.amdgcn.fmed3.f32(float [[X:%.*]], float [[Y:%.*]], float [[Z:%.*]])
1373 ; CHECK-NEXT: ret float [[MED3]]
1375 %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float %z)
1379 define float @fmed3_canonicalize_x_c0_c1_f32(float %x) {
1380 ; CHECK-LABEL: @fmed3_canonicalize_x_c0_c1_f32(
1381 ; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.amdgcn.fmed3.f32(float [[X:%.*]], float 0.000000e+00, float 1.000000e+00)
1382 ; CHECK-NEXT: ret float [[MED3]]
1384 %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float 0.0, float 1.0)
1388 define float @fmed3_canonicalize_c0_x_c1_f32(float %x) {
1389 ; CHECK-LABEL: @fmed3_canonicalize_c0_x_c1_f32(
1390 ; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.amdgcn.fmed3.f32(float [[X:%.*]], float 0.000000e+00, float 1.000000e+00)
1391 ; CHECK-NEXT: ret float [[MED3]]
1393 %med3 = call float @llvm.amdgcn.fmed3.f32(float 0.0, float %x, float 1.0)
1397 define float @fmed3_canonicalize_c0_c1_x_f32(float %x) {
1398 ; CHECK-LABEL: @fmed3_canonicalize_c0_c1_x_f32(
1399 ; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.amdgcn.fmed3.f32(float [[X:%.*]], float 0.000000e+00, float 1.000000e+00)
1400 ; CHECK-NEXT: ret float [[MED3]]
1402 %med3 = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float %x)
1406 define float @fmed3_canonicalize_x_y_c_f32(float %x, float %y) {
1407 ; CHECK-LABEL: @fmed3_canonicalize_x_y_c_f32(
1408 ; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.amdgcn.fmed3.f32(float [[X:%.*]], float [[Y:%.*]], float 1.000000e+00)
1409 ; CHECK-NEXT: ret float [[MED3]]
1411 %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float 1.0)
1415 define float @fmed3_canonicalize_x_c_y_f32(float %x, float %y) {
1416 ; CHECK-LABEL: @fmed3_canonicalize_x_c_y_f32(
1417 ; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.amdgcn.fmed3.f32(float [[X:%.*]], float [[Y:%.*]], float 1.000000e+00)
1418 ; CHECK-NEXT: ret float [[MED3]]
1420 %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float 1.0, float %y)
1424 define float @fmed3_canonicalize_c_x_y_f32(float %x, float %y) {
1425 ; CHECK-LABEL: @fmed3_canonicalize_c_x_y_f32(
1426 ; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.amdgcn.fmed3.f32(float [[X:%.*]], float [[Y:%.*]], float 1.000000e+00)
1427 ; CHECK-NEXT: ret float [[MED3]]
1429 %med3 = call float @llvm.amdgcn.fmed3.f32(float 1.0, float %x, float %y)
1433 define float @fmed3_undef_x_y_f32(float %x, float %y) {
1434 ; CHECK-LABEL: @fmed3_undef_x_y_f32(
1435 ; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.minnum.f32(float [[X:%.*]], float [[Y:%.*]])
1436 ; CHECK-NEXT: ret float [[MED3]]
1438 %med3 = call float @llvm.amdgcn.fmed3.f32(float undef, float %x, float %y)
1442 define float @fmed3_fmf_undef_x_y_f32(float %x, float %y) {
1443 ; CHECK-LABEL: @fmed3_fmf_undef_x_y_f32(
1444 ; CHECK-NEXT: [[MED3:%.*]] = call nnan float @llvm.minnum.f32(float [[X:%.*]], float [[Y:%.*]])
1445 ; CHECK-NEXT: ret float [[MED3]]
1447 %med3 = call nnan float @llvm.amdgcn.fmed3.f32(float undef, float %x, float %y)
1451 define float @fmed3_x_undef_y_f32(float %x, float %y) {
1452 ; CHECK-LABEL: @fmed3_x_undef_y_f32(
1453 ; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.minnum.f32(float [[X:%.*]], float [[Y:%.*]])
1454 ; CHECK-NEXT: ret float [[MED3]]
1456 %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float undef, float %y)
1460 define float @fmed3_x_y_undef_f32(float %x, float %y) {
1461 ; CHECK-LABEL: @fmed3_x_y_undef_f32(
1462 ; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.maxnum.f32(float [[X:%.*]], float [[Y:%.*]])
1463 ; CHECK-NEXT: ret float [[MED3]]
1465 %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float undef)
1469 define float @fmed3_qnan0_x_y_f32(float %x, float %y) {
1470 ; CHECK-LABEL: @fmed3_qnan0_x_y_f32(
1471 ; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.minnum.f32(float [[X:%.*]], float [[Y:%.*]])
1472 ; CHECK-NEXT: ret float [[MED3]]
1474 %med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF8000000000000, float %x, float %y)
1478 define float @fmed3_x_qnan0_y_f32(float %x, float %y) {
1479 ; CHECK-LABEL: @fmed3_x_qnan0_y_f32(
1480 ; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.minnum.f32(float [[X:%.*]], float [[Y:%.*]])
1481 ; CHECK-NEXT: ret float [[MED3]]
1483 %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float 0x7FF8000000000000, float %y)
1487 define float @fmed3_x_y_qnan0_f32(float %x, float %y) {
1488 ; CHECK-LABEL: @fmed3_x_y_qnan0_f32(
1489 ; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.maxnum.f32(float [[X:%.*]], float [[Y:%.*]])
1490 ; CHECK-NEXT: ret float [[MED3]]
1492 %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float 0x7FF8000000000000)
1496 define float @fmed3_qnan1_x_y_f32(float %x, float %y) {
1497 ; CHECK-LABEL: @fmed3_qnan1_x_y_f32(
1498 ; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.minnum.f32(float [[X:%.*]], float [[Y:%.*]])
1499 ; CHECK-NEXT: ret float [[MED3]]
1501 %med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF8000100000000, float %x, float %y)
1505 ; This can return any of the qnans.
1506 define float @fmed3_qnan0_qnan1_qnan2_f32(float %x, float %y) {
1507 ; CHECK-LABEL: @fmed3_qnan0_qnan1_qnan2_f32(
1508 ; CHECK-NEXT: ret float 0x7FF8030000000000
1510 %med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF8000100000000, float 0x7FF8002000000000, float 0x7FF8030000000000)
1514 define float @fmed3_constant_src0_0_f32(float %x, float %y) {
1515 ; CHECK-LABEL: @fmed3_constant_src0_0_f32(
1516 ; CHECK-NEXT: ret float 5.000000e-01
1518 %med3 = call float @llvm.amdgcn.fmed3.f32(float 0.5, float -1.0, float 4.0)
1522 define float @fmed3_constant_src0_1_f32(float %x, float %y) {
1523 ; CHECK-LABEL: @fmed3_constant_src0_1_f32(
1524 ; CHECK-NEXT: ret float 5.000000e-01
1526 %med3 = call float @llvm.amdgcn.fmed3.f32(float 0.5, float 4.0, float -1.0)
1530 define float @fmed3_constant_src1_0_f32(float %x, float %y) {
1531 ; CHECK-LABEL: @fmed3_constant_src1_0_f32(
1532 ; CHECK-NEXT: ret float 5.000000e-01
1534 %med3 = call float @llvm.amdgcn.fmed3.f32(float -1.0, float 0.5, float 4.0)
1538 define float @fmed3_constant_src1_1_f32(float %x, float %y) {
1539 ; CHECK-LABEL: @fmed3_constant_src1_1_f32(
1540 ; CHECK-NEXT: ret float 5.000000e-01
1542 %med3 = call float @llvm.amdgcn.fmed3.f32(float 4.0, float 0.5, float -1.0)
1546 define float @fmed3_constant_src2_0_f32(float %x, float %y) {
1547 ; CHECK-LABEL: @fmed3_constant_src2_0_f32(
1548 ; CHECK-NEXT: ret float 5.000000e-01
1550 %med3 = call float @llvm.amdgcn.fmed3.f32(float -1.0, float 4.0, float 0.5)
1554 define float @fmed3_constant_src2_1_f32(float %x, float %y) {
1555 ; CHECK-LABEL: @fmed3_constant_src2_1_f32(
1556 ; CHECK-NEXT: ret float 5.000000e-01
1558 %med3 = call float @llvm.amdgcn.fmed3.f32(float 4.0, float -1.0, float 0.5)
1562 define float @fmed3_x_qnan0_qnan1_f32(float %x) {
1563 ; CHECK-LABEL: @fmed3_x_qnan0_qnan1_f32(
1564 ; CHECK-NEXT: ret float [[X:%.*]]
1566 %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float 0x7FF8001000000000, float 0x7FF8002000000000)
1570 define float @fmed3_qnan0_x_qnan1_f32(float %x) {
1571 ; CHECK-LABEL: @fmed3_qnan0_x_qnan1_f32(
1572 ; CHECK-NEXT: ret float [[X:%.*]]
1574 %med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF8001000000000, float %x, float 0x7FF8002000000000)
1578 define float @fmed3_qnan0_qnan1_x_f32(float %x) {
1579 ; CHECK-LABEL: @fmed3_qnan0_qnan1_x_f32(
1580 ; CHECK-NEXT: ret float [[X:%.*]]
1582 %med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF8001000000000, float 0x7FF8002000000000, float %x)
1586 define float @fmed3_nan_0_1_f32() {
1587 ; CHECK-LABEL: @fmed3_nan_0_1_f32(
1588 ; CHECK-NEXT: ret float 0.000000e+00
1590 %med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF8001000000000, float 0.0, float 1.0)
1594 define float @fmed3_0_nan_1_f32() {
1595 ; CHECK-LABEL: @fmed3_0_nan_1_f32(
1596 ; CHECK-NEXT: ret float 0.000000e+00
1598 %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 0x7FF8001000000000, float 1.0)
1602 define float @fmed3_0_1_nan_f32() {
1603 ; CHECK-LABEL: @fmed3_0_1_nan_f32(
1604 ; CHECK-NEXT: ret float 1.000000e+00
1606 %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float 0x7FF8001000000000)
1610 define float @fmed3_undef_0_1_f32() {
1611 ; CHECK-LABEL: @fmed3_undef_0_1_f32(
1612 ; CHECK-NEXT: ret float 0.000000e+00
1614 %med3 = call float @llvm.amdgcn.fmed3.f32(float undef, float 0.0, float 1.0)
1618 define float @fmed3_0_undef_1_f32() {
1619 ; CHECK-LABEL: @fmed3_0_undef_1_f32(
1620 ; CHECK-NEXT: ret float 0.000000e+00
1622 %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float undef, float 1.0)
1626 define float @fmed3_0_1_undef_f32() {
1627 ; CHECK-LABEL: @fmed3_0_1_undef_f32(
1628 ; CHECK-NEXT: ret float 1.000000e+00
1630 %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float undef)
1634 ; --------------------------------------------------------------------
1636 ; --------------------------------------------------------------------
1638 declare i64 @llvm.amdgcn.icmp.i64.i32(i32, i32, i32 immarg) nounwind readnone convergent
1639 declare i64 @llvm.amdgcn.icmp.i64.i64(i64, i64, i32 immarg) nounwind readnone convergent
1640 declare i64 @llvm.amdgcn.icmp.i64.i1(i1, i1, i32 immarg) nounwind readnone convergent
1642 define i64 @invalid_icmp_code(i32 %a, i32 %b) {
1643 ; CHECK-LABEL: @invalid_icmp_code(
1644 ; CHECK-NEXT: [[UNDER:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 31)
1645 ; CHECK-NEXT: [[OVER:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[A]], i32 [[B]], i32 42)
1646 ; CHECK-NEXT: [[OR:%.*]] = or i64 [[UNDER]], [[OVER]]
1647 ; CHECK-NEXT: ret i64 [[OR]]
1649 %under = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %a, i32 %b, i32 31)
1650 %over = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %a, i32 %b, i32 42)
1651 %or = or i64 %under, %over
1655 define i64 @icmp_constant_inputs_false() {
1656 ; CHECK-LABEL: @icmp_constant_inputs_false(
1657 ; CHECK-NEXT: ret i64 0
1659 %result = call i64 @llvm.amdgcn.icmp.i64.i32(i32 9, i32 8, i32 32)
1663 define i64 @icmp_constant_inputs_true() {
1664 ; CHECK-LABEL: @icmp_constant_inputs_true(
1665 ; CHECK-NEXT: [[RESULT:%.*]] = call i64 @llvm.read_register.i64(metadata !0) [[ATTR12:#.*]]
1666 ; CHECK-NEXT: ret i64 [[RESULT]]
1668 %result = call i64 @llvm.amdgcn.icmp.i64.i32(i32 9, i32 8, i32 34)
1672 define i64 @icmp_constant_to_rhs_slt(i32 %x) {
1673 ; CHECK-LABEL: @icmp_constant_to_rhs_slt(
1674 ; CHECK-NEXT: [[RESULT:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[X:%.*]], i32 9, i32 38)
1675 ; CHECK-NEXT: ret i64 [[RESULT]]
1677 %result = call i64 @llvm.amdgcn.icmp.i64.i32(i32 9, i32 %x, i32 40)
1681 define i64 @fold_icmp_ne_0_zext_icmp_eq_i32(i32 %a, i32 %b) {
1682 ; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_eq_i32(
1683 ; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 32)
1684 ; CHECK-NEXT: ret i64 [[MASK]]
1686 %cmp = icmp eq i32 %a, %b
1687 %zext.cmp = zext i1 %cmp to i32
1688 %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33)
1692 define i64 @fold_icmp_ne_0_zext_icmp_ne_i32(i32 %a, i32 %b) {
1693 ; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_ne_i32(
1694 ; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 33)
1695 ; CHECK-NEXT: ret i64 [[MASK]]
1697 %cmp = icmp ne i32 %a, %b
1698 %zext.cmp = zext i1 %cmp to i32
1699 %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33)
1703 define i64 @fold_icmp_ne_0_zext_icmp_sle_i32(i32 %a, i32 %b) {
1704 ; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_sle_i32(
1705 ; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 41)
1706 ; CHECK-NEXT: ret i64 [[MASK]]
1708 %cmp = icmp sle i32 %a, %b
1709 %zext.cmp = zext i1 %cmp to i32
1710 %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33)
1714 define i64 @fold_icmp_ne_0_zext_icmp_ugt_i64(i64 %a, i64 %b) {
1715 ; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_ugt_i64(
1716 ; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i64(i64 [[A:%.*]], i64 [[B:%.*]], i32 34)
1717 ; CHECK-NEXT: ret i64 [[MASK]]
1719 %cmp = icmp ugt i64 %a, %b
1720 %zext.cmp = zext i1 %cmp to i32
1721 %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33)
1725 define i64 @fold_icmp_ne_0_zext_icmp_ult_swap_i64(i64 %a, i64 %b) {
1726 ; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_ult_swap_i64(
1727 ; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i64(i64 [[A:%.*]], i64 [[B:%.*]], i32 34)
1728 ; CHECK-NEXT: ret i64 [[MASK]]
1730 %cmp = icmp ugt i64 %a, %b
1731 %zext.cmp = zext i1 %cmp to i32
1732 %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 0, i32 %zext.cmp, i32 33)
1736 define i64 @fold_icmp_ne_0_zext_fcmp_oeq_f32(float %a, float %b) {
1737 ; CHECK-LABEL: @fold_icmp_ne_0_zext_fcmp_oeq_f32(
1738 ; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.fcmp.i64.f32(float [[A:%.*]], float [[B:%.*]], i32 1)
1739 ; CHECK-NEXT: ret i64 [[MASK]]
1741 %cmp = fcmp oeq float %a, %b
1742 %zext.cmp = zext i1 %cmp to i32
1743 %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33)
1747 define i64 @fold_icmp_ne_0_zext_fcmp_une_f32(float %a, float %b) {
1748 ; CHECK-LABEL: @fold_icmp_ne_0_zext_fcmp_une_f32(
1749 ; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.fcmp.i64.f32(float [[A:%.*]], float [[B:%.*]], i32 14)
1750 ; CHECK-NEXT: ret i64 [[MASK]]
1752 %cmp = fcmp une float %a, %b
1753 %zext.cmp = zext i1 %cmp to i32
1754 %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33)
1758 define i64 @fold_icmp_ne_0_zext_fcmp_olt_f64(double %a, double %b) {
1759 ; CHECK-LABEL: @fold_icmp_ne_0_zext_fcmp_olt_f64(
1760 ; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.fcmp.i64.f64(double [[A:%.*]], double [[B:%.*]], i32 4)
1761 ; CHECK-NEXT: ret i64 [[MASK]]
1763 %cmp = fcmp olt double %a, %b
1764 %zext.cmp = zext i1 %cmp to i32
1765 %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33)
1769 define i64 @fold_icmp_sext_icmp_ne_0_i32(i32 %a, i32 %b) {
1770 ; CHECK-LABEL: @fold_icmp_sext_icmp_ne_0_i32(
1771 ; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 32)
1772 ; CHECK-NEXT: ret i64 [[MASK]]
1774 %cmp = icmp eq i32 %a, %b
1775 %sext.cmp = sext i1 %cmp to i32
1776 %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %sext.cmp, i32 0, i32 33)
1780 define i64 @fold_icmp_eq_0_zext_icmp_eq_i32(i32 %a, i32 %b) {
1781 ; CHECK-LABEL: @fold_icmp_eq_0_zext_icmp_eq_i32(
1782 ; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 33)
1783 ; CHECK-NEXT: ret i64 [[MASK]]
1785 %cmp = icmp eq i32 %a, %b
1786 %zext.cmp = zext i1 %cmp to i32
1787 %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 32)
1791 define i64 @fold_icmp_eq_0_zext_icmp_slt_i32(i32 %a, i32 %b) {
1792 ; CHECK-LABEL: @fold_icmp_eq_0_zext_icmp_slt_i32(
1793 ; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 39)
1794 ; CHECK-NEXT: ret i64 [[MASK]]
1796 %cmp = icmp slt i32 %a, %b
1797 %zext.cmp = zext i1 %cmp to i32
1798 %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 32)
1802 define i64 @fold_icmp_eq_0_zext_fcmp_oeq_f32(float %a, float %b) {
1803 ; CHECK-LABEL: @fold_icmp_eq_0_zext_fcmp_oeq_f32(
1804 ; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.fcmp.i64.f32(float [[A:%.*]], float [[B:%.*]], i32 14)
1805 ; CHECK-NEXT: ret i64 [[MASK]]
1807 %cmp = fcmp oeq float %a, %b
1808 %zext.cmp = zext i1 %cmp to i32
1809 %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 32)
1813 define i64 @fold_icmp_eq_0_zext_fcmp_ule_f32(float %a, float %b) {
1814 ; CHECK-LABEL: @fold_icmp_eq_0_zext_fcmp_ule_f32(
1815 ; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.fcmp.i64.f32(float [[A:%.*]], float [[B:%.*]], i32 2)
1816 ; CHECK-NEXT: ret i64 [[MASK]]
1818 %cmp = fcmp ule float %a, %b
1819 %zext.cmp = zext i1 %cmp to i32
1820 %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 32)
1824 define i64 @fold_icmp_eq_0_zext_fcmp_ogt_f32(float %a, float %b) {
1825 ; CHECK-LABEL: @fold_icmp_eq_0_zext_fcmp_ogt_f32(
1826 ; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.fcmp.i64.f32(float [[A:%.*]], float [[B:%.*]], i32 13)
1827 ; CHECK-NEXT: ret i64 [[MASK]]
1829 %cmp = fcmp ogt float %a, %b
1830 %zext.cmp = zext i1 %cmp to i32
1831 %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 32)
1835 define i64 @fold_icmp_zext_icmp_eq_1_i32(i32 %a, i32 %b) {
1836 ; CHECK-LABEL: @fold_icmp_zext_icmp_eq_1_i32(
1837 ; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 32)
1838 ; CHECK-NEXT: ret i64 [[MASK]]
1840 %cmp = icmp eq i32 %a, %b
1841 %zext.cmp = zext i1 %cmp to i32
1842 %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 1, i32 32)
1846 define i64 @fold_icmp_zext_argi1_eq_1_i32(i1 %cond) {
1847 ; CHECK-LABEL: @fold_icmp_zext_argi1_eq_1_i32(
1848 ; CHECK-NEXT: [[ZEXT_COND:%.*]] = zext i1 [[COND:%.*]] to i32
1849 ; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[ZEXT_COND]], i32 0, i32 33)
1850 ; CHECK-NEXT: ret i64 [[MASK]]
1852 %zext.cond = zext i1 %cond to i32
1853 %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cond, i32 1, i32 32)
1857 define i64 @fold_icmp_zext_argi1_eq_neg1_i32(i1 %cond) {
1858 ; CHECK-LABEL: @fold_icmp_zext_argi1_eq_neg1_i32(
1859 ; CHECK-NEXT: [[ZEXT_COND:%.*]] = zext i1 [[COND:%.*]] to i32
1860 ; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[ZEXT_COND]], i32 -1, i32 32)
1861 ; CHECK-NEXT: ret i64 [[MASK]]
1863 %zext.cond = zext i1 %cond to i32
1864 %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cond, i32 -1, i32 32)
1868 define i64 @fold_icmp_sext_argi1_eq_1_i32(i1 %cond) {
1869 ; CHECK-LABEL: @fold_icmp_sext_argi1_eq_1_i32(
1870 ; CHECK-NEXT: [[SEXT_COND:%.*]] = sext i1 [[COND:%.*]] to i32
1871 ; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[SEXT_COND]], i32 1, i32 32)
1872 ; CHECK-NEXT: ret i64 [[MASK]]
1874 %sext.cond = sext i1 %cond to i32
1875 %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %sext.cond, i32 1, i32 32)
1879 define i64 @fold_icmp_sext_argi1_eq_neg1_i32(i1 %cond) {
1880 ; CHECK-LABEL: @fold_icmp_sext_argi1_eq_neg1_i32(
1881 ; CHECK-NEXT: [[SEXT_COND:%.*]] = sext i1 [[COND:%.*]] to i32
1882 ; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[SEXT_COND]], i32 0, i32 33)
1883 ; CHECK-NEXT: ret i64 [[MASK]]
1885 %sext.cond = sext i1 %cond to i32
1886 %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %sext.cond, i32 -1, i32 32)
1890 define i64 @fold_icmp_sext_argi1_eq_neg1_i64(i1 %cond) {
1891 ; CHECK-LABEL: @fold_icmp_sext_argi1_eq_neg1_i64(
1892 ; CHECK-NEXT: [[SEXT_COND:%.*]] = sext i1 [[COND:%.*]] to i64
1893 ; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i64(i64 [[SEXT_COND]], i64 0, i32 33)
1894 ; CHECK-NEXT: ret i64 [[MASK]]
1896 %sext.cond = sext i1 %cond to i64
1897 %mask = call i64 @llvm.amdgcn.icmp.i64.i64(i64 %sext.cond, i64 -1, i32 32)
1901 ; TODO: Should be able to fold to false
1902 define i64 @fold_icmp_sext_icmp_eq_1_i32(i32 %a, i32 %b) {
1903 ; CHECK-LABEL: @fold_icmp_sext_icmp_eq_1_i32(
1904 ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[A:%.*]], [[B:%.*]]
1905 ; CHECK-NEXT: [[SEXT_CMP:%.*]] = sext i1 [[CMP]] to i32
1906 ; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[SEXT_CMP]], i32 1, i32 32)
1907 ; CHECK-NEXT: ret i64 [[MASK]]
1909 %cmp = icmp eq i32 %a, %b
1910 %sext.cmp = sext i1 %cmp to i32
1911 %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %sext.cmp, i32 1, i32 32)
1915 define i64 @fold_icmp_sext_icmp_eq_neg1_i32(i32 %a, i32 %b) {
1916 ; CHECK-LABEL: @fold_icmp_sext_icmp_eq_neg1_i32(
1917 ; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 32)
1918 ; CHECK-NEXT: ret i64 [[MASK]]
1920 %cmp = icmp eq i32 %a, %b
1921 %sext.cmp = sext i1 %cmp to i32
1922 %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %sext.cmp, i32 -1, i32 32)
1926 define i64 @fold_icmp_sext_icmp_sge_neg1_i32(i32 %a, i32 %b) {
1927 ; CHECK-LABEL: @fold_icmp_sext_icmp_sge_neg1_i32(
1928 ; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 39)
1929 ; CHECK-NEXT: ret i64 [[MASK]]
1931 %cmp = icmp sge i32 %a, %b
1932 %sext.cmp = sext i1 %cmp to i32
1933 %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %sext.cmp, i32 -1, i32 32)
1937 define i64 @fold_not_icmp_ne_0_zext_icmp_sle_i32(i32 %a, i32 %b) {
1938 ; CHECK-LABEL: @fold_not_icmp_ne_0_zext_icmp_sle_i32(
1939 ; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 38)
1940 ; CHECK-NEXT: ret i64 [[MASK]]
1942 %cmp = icmp sle i32 %a, %b
1943 %not = xor i1 %cmp, true
1944 %zext.cmp = zext i1 %not to i32
1945 %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33)
1949 define i64 @fold_icmp_ne_0_zext_icmp_eq_i4(i4 %a, i4 %b) {
1950 ; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_eq_i4(
1951 ; CHECK-NEXT: [[TMP1:%.*]] = zext i4 [[A:%.*]] to i16
1952 ; CHECK-NEXT: [[TMP2:%.*]] = zext i4 [[B:%.*]] to i16
1953 ; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i16(i16 [[TMP1]], i16 [[TMP2]], i32 32)
1954 ; CHECK-NEXT: ret i64 [[MASK]]
1956 %cmp = icmp eq i4 %a, %b
1957 %zext.cmp = zext i1 %cmp to i32
1958 %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33)
1962 define i64 @fold_icmp_ne_0_zext_icmp_eq_i8(i8 %a, i8 %b) {
1963 ; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_eq_i8(
1964 ; CHECK-NEXT: [[TMP1:%.*]] = zext i8 [[A:%.*]] to i16
1965 ; CHECK-NEXT: [[TMP2:%.*]] = zext i8 [[B:%.*]] to i16
1966 ; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i16(i16 [[TMP1]], i16 [[TMP2]], i32 32)
1967 ; CHECK-NEXT: ret i64 [[MASK]]
1969 %cmp = icmp eq i8 %a, %b
1970 %zext.cmp = zext i1 %cmp to i32
1971 %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33)
1975 define i64 @fold_icmp_ne_0_zext_icmp_eq_i16(i16 %a, i16 %b) {
1976 ; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_eq_i16(
1977 ; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i16(i16 [[A:%.*]], i16 [[B:%.*]], i32 32)
1978 ; CHECK-NEXT: ret i64 [[MASK]]
1980 %cmp = icmp eq i16 %a, %b
1981 %zext.cmp = zext i1 %cmp to i32
1982 %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33)
1986 define i64 @fold_icmp_ne_0_zext_icmp_eq_i36(i36 %a, i36 %b) {
1987 ; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_eq_i36(
1988 ; CHECK-NEXT: [[TMP1:%.*]] = zext i36 [[A:%.*]] to i64
1989 ; CHECK-NEXT: [[TMP2:%.*]] = zext i36 [[B:%.*]] to i64
1990 ; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i64(i64 [[TMP1]], i64 [[TMP2]], i32 32)
1991 ; CHECK-NEXT: ret i64 [[MASK]]
1993 %cmp = icmp eq i36 %a, %b
1994 %zext.cmp = zext i1 %cmp to i32
1995 %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33)
1999 define i64 @fold_icmp_ne_0_zext_icmp_eq_i128(i128 %a, i128 %b) {
2000 ; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_eq_i128(
2001 ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i128 [[A:%.*]], [[B:%.*]]
2002 ; CHECK-NEXT: [[ZEXT_CMP:%.*]] = zext i1 [[CMP]] to i32
2003 ; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[ZEXT_CMP]], i32 0, i32 33)
2004 ; CHECK-NEXT: ret i64 [[MASK]]
2006 %cmp = icmp eq i128 %a, %b
2007 %zext.cmp = zext i1 %cmp to i32
2008 %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33)
2012 define i64 @fold_icmp_ne_0_zext_fcmp_oeq_f16(half %a, half %b) {
2013 ; CHECK-LABEL: @fold_icmp_ne_0_zext_fcmp_oeq_f16(
2014 ; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.fcmp.i64.f16(half [[A:%.*]], half [[B:%.*]], i32 1)
2015 ; CHECK-NEXT: ret i64 [[MASK]]
2017 %cmp = fcmp oeq half %a, %b
2018 %zext.cmp = zext i1 %cmp to i32
2019 %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33)
2023 define i64 @fold_icmp_ne_0_zext_fcmp_oeq_f128(fp128 %a, fp128 %b) {
2024 ; CHECK-LABEL: @fold_icmp_ne_0_zext_fcmp_oeq_f128(
2025 ; CHECK-NEXT: [[CMP:%.*]] = fcmp oeq fp128 [[A:%.*]], [[B:%.*]]
2026 ; CHECK-NEXT: [[ZEXT_CMP:%.*]] = zext i1 [[CMP]] to i32
2027 ; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[ZEXT_CMP]], i32 0, i32 33)
2028 ; CHECK-NEXT: ret i64 [[MASK]]
2030 %cmp = fcmp oeq fp128 %a, %b
2031 %zext.cmp = zext i1 %cmp to i32
2032 %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33)
2036 define i64 @fold_icmp_ne_0_zext_icmp_slt_i4(i4 %a, i4 %b) {
2037 ; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_slt_i4(
2038 ; CHECK-NEXT: [[TMP1:%.*]] = sext i4 [[A:%.*]] to i16
2039 ; CHECK-NEXT: [[TMP2:%.*]] = sext i4 [[B:%.*]] to i16
2040 ; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i16(i16 [[TMP1]], i16 [[TMP2]], i32 40)
2041 ; CHECK-NEXT: ret i64 [[MASK]]
2043 %cmp = icmp slt i4 %a, %b
2044 %zext.cmp = zext i1 %cmp to i32
2045 %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33)
2049 define i64 @fold_icmp_ne_0_zext_icmp_slt_i8(i8 %a, i8 %b) {
2050 ; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_slt_i8(
2051 ; CHECK-NEXT: [[TMP1:%.*]] = sext i8 [[A:%.*]] to i16
2052 ; CHECK-NEXT: [[TMP2:%.*]] = sext i8 [[B:%.*]] to i16
2053 ; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i16(i16 [[TMP1]], i16 [[TMP2]], i32 40)
2054 ; CHECK-NEXT: ret i64 [[MASK]]
2056 %cmp = icmp slt i8 %a, %b
2057 %zext.cmp = zext i1 %cmp to i32
2058 %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33)
2062 define i64 @fold_icmp_ne_0_zext_icmp_slt_i16(i16 %a, i16 %b) {
2063 ; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_slt_i16(
2064 ; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i16(i16 [[A:%.*]], i16 [[B:%.*]], i32 40)
2065 ; CHECK-NEXT: ret i64 [[MASK]]
2067 %cmp = icmp slt i16 %a, %b
2068 %zext.cmp = zext i1 %cmp to i32
2069 %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33)
2073 define i64 @fold_icmp_ne_0_zext_icmp_ult_i4(i4 %a, i4 %b) {
2074 ; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_ult_i4(
2075 ; CHECK-NEXT: [[TMP1:%.*]] = zext i4 [[A:%.*]] to i16
2076 ; CHECK-NEXT: [[TMP2:%.*]] = zext i4 [[B:%.*]] to i16
2077 ; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i16(i16 [[TMP1]], i16 [[TMP2]], i32 36)
2078 ; CHECK-NEXT: ret i64 [[MASK]]
2080 %cmp = icmp ult i4 %a, %b
2081 %zext.cmp = zext i1 %cmp to i32
2082 %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33)
2086 define i64 @fold_icmp_ne_0_zext_icmp_ult_i8(i8 %a, i8 %b) {
2087 ; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_ult_i8(
2088 ; CHECK-NEXT: [[TMP1:%.*]] = zext i8 [[A:%.*]] to i16
2089 ; CHECK-NEXT: [[TMP2:%.*]] = zext i8 [[B:%.*]] to i16
2090 ; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i16(i16 [[TMP1]], i16 [[TMP2]], i32 36)
2091 ; CHECK-NEXT: ret i64 [[MASK]]
2093 %cmp = icmp ult i8 %a, %b
2094 %zext.cmp = zext i1 %cmp to i32
2095 %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33)
2099 define i64 @fold_icmp_ne_0_zext_icmp_ult_i16(i16 %a, i16 %b) {
2100 ; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_ult_i16(
2101 ; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i16(i16 [[A:%.*]], i16 [[B:%.*]], i32 36)
2102 ; CHECK-NEXT: ret i64 [[MASK]]
2104 %cmp = icmp ult i16 %a, %b
2105 %zext.cmp = zext i1 %cmp to i32
2106 %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33)
2110 ; 1-bit NE comparisons
2112 define i64 @fold_icmp_i1_ne_0_icmp_eq_i1(i32 %a, i32 %b) {
2113 ; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_eq_i1(
2114 ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[A:%.*]], [[B:%.*]]
2115 ; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33)
2116 ; CHECK-NEXT: ret i64 [[MASK]]
2118 %cmp = icmp eq i32 %a, %b
2119 %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33)
2123 define i64 @fold_icmp_i1_ne_0_icmp_ne_i1(i32 %a, i32 %b) {
2124 ; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_ne_i1(
2125 ; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[A:%.*]], [[B:%.*]]
2126 ; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33)
2127 ; CHECK-NEXT: ret i64 [[MASK]]
2129 %cmp = icmp ne i32 %a, %b
2130 %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33)
2134 define i64 @fold_icmp_i1_ne_0_icmp_sle_i1(i32 %a, i32 %b) {
2135 ; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_sle_i1(
2136 ; CHECK-NEXT: [[CMP:%.*]] = icmp sle i32 [[A:%.*]], [[B:%.*]]
2137 ; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33)
2138 ; CHECK-NEXT: ret i64 [[MASK]]
2140 %cmp = icmp sle i32 %a, %b
2141 %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33)
2145 define i64 @fold_icmp_i1_ne_0_icmp_ugt_i64(i64 %a, i64 %b) {
2146 ; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_ugt_i64(
2147 ; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i64 [[A:%.*]], [[B:%.*]]
2148 ; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33)
2149 ; CHECK-NEXT: ret i64 [[MASK]]
2151 %cmp = icmp ugt i64 %a, %b
2152 %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33)
2156 define i64 @fold_icmp_i1_ne_0_icmp_ult_swap_i64(i64 %a, i64 %b) {
2157 ; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_ult_swap_i64(
2158 ; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i64 [[A:%.*]], [[B:%.*]]
2159 ; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33)
2160 ; CHECK-NEXT: ret i64 [[MASK]]
2162 %cmp = icmp ugt i64 %a, %b
2163 %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 false, i1 %cmp, i32 33)
2167 define i64 @fold_icmp_i1_ne_0_fcmp_oeq_f32(float %a, float %b) {
2168 ; CHECK-LABEL: @fold_icmp_i1_ne_0_fcmp_oeq_f32(
2169 ; CHECK-NEXT: [[CMP:%.*]] = fcmp oeq float [[A:%.*]], [[B:%.*]]
2170 ; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33)
2171 ; CHECK-NEXT: ret i64 [[MASK]]
2173 %cmp = fcmp oeq float %a, %b
2174 %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33)
2178 define i64 @fold_icmp_i1_ne_0_fcmp_une_f32(float %a, float %b) {
2179 ; CHECK-LABEL: @fold_icmp_i1_ne_0_fcmp_une_f32(
2180 ; CHECK-NEXT: [[CMP:%.*]] = fcmp une float [[A:%.*]], [[B:%.*]]
2181 ; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33)
2182 ; CHECK-NEXT: ret i64 [[MASK]]
2184 %cmp = fcmp une float %a, %b
2185 %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33)
2189 define i64 @fold_icmp_i1_ne_0_fcmp_olt_f64(double %a, double %b) {
2190 ; CHECK-LABEL: @fold_icmp_i1_ne_0_fcmp_olt_f64(
2191 ; CHECK-NEXT: [[CMP:%.*]] = fcmp olt double [[A:%.*]], [[B:%.*]]
2192 ; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33)
2193 ; CHECK-NEXT: ret i64 [[MASK]]
2195 %cmp = fcmp olt double %a, %b
2196 %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33)
2200 define i64 @fold_icmp_i1_ne_0_icmp_eq_i4(i4 %a, i4 %b) {
2201 ; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_eq_i4(
2202 ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i4 [[A:%.*]], [[B:%.*]]
2203 ; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33)
2204 ; CHECK-NEXT: ret i64 [[MASK]]
2206 %cmp = icmp eq i4 %a, %b
2207 %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33)
2211 define i64 @fold_icmp_i1_ne_0_icmp_eq_i8(i8 %a, i8 %b) {
2212 ; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_eq_i8(
2213 ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[A:%.*]], [[B:%.*]]
2214 ; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33)
2215 ; CHECK-NEXT: ret i64 [[MASK]]
2217 %cmp = icmp eq i8 %a, %b
2218 %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33)
2222 define i64 @fold_icmp_i1_ne_0_icmp_eq_i16(i16 %a, i16 %b) {
2223 ; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_eq_i16(
2224 ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i16 [[A:%.*]], [[B:%.*]]
2225 ; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33)
2226 ; CHECK-NEXT: ret i64 [[MASK]]
2228 %cmp = icmp eq i16 %a, %b
2229 %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33)
2233 define i64 @fold_icmp_i1_ne_0_icmp_eq_i36(i36 %a, i36 %b) {
2234 ; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_eq_i36(
2235 ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i36 [[A:%.*]], [[B:%.*]]
2236 ; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33)
2237 ; CHECK-NEXT: ret i64 [[MASK]]
2239 %cmp = icmp eq i36 %a, %b
2240 %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33)
2244 define i64 @fold_icmp_i1_ne_0_icmp_eq_i128(i128 %a, i128 %b) {
2245 ; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_eq_i128(
2246 ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i128 [[A:%.*]], [[B:%.*]]
2247 ; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33)
2248 ; CHECK-NEXT: ret i64 [[MASK]]
2250 %cmp = icmp eq i128 %a, %b
2251 %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33)
2255 define i64 @fold_icmp_i1_ne_0_fcmp_oeq_f16(half %a, half %b) {
2256 ; CHECK-LABEL: @fold_icmp_i1_ne_0_fcmp_oeq_f16(
2257 ; CHECK-NEXT: [[CMP:%.*]] = fcmp oeq half [[A:%.*]], [[B:%.*]]
2258 ; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33)
2259 ; CHECK-NEXT: ret i64 [[MASK]]
2261 %cmp = fcmp oeq half %a, %b
2262 %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33)
2266 define i64 @fold_icmp_i1_ne_0_fcmp_oeq_f128(fp128 %a, fp128 %b) {
2267 ; CHECK-LABEL: @fold_icmp_i1_ne_0_fcmp_oeq_f128(
2268 ; CHECK-NEXT: [[CMP:%.*]] = fcmp oeq fp128 [[A:%.*]], [[B:%.*]]
2269 ; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33)
2270 ; CHECK-NEXT: ret i64 [[MASK]]
2272 %cmp = fcmp oeq fp128 %a, %b
2273 %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33)
2277 define i64 @fold_icmp_i1_ne_0_icmp_slt_i4(i4 %a, i4 %b) {
2278 ; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_slt_i4(
2279 ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i4 [[A:%.*]], [[B:%.*]]
2280 ; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33)
2281 ; CHECK-NEXT: ret i64 [[MASK]]
2283 %cmp = icmp slt i4 %a, %b
2284 %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33)
2288 define i64 @fold_icmp_i1_ne_0_icmp_slt_i8(i8 %a, i8 %b) {
2289 ; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_slt_i8(
2290 ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[A:%.*]], [[B:%.*]]
2291 ; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33)
2292 ; CHECK-NEXT: ret i64 [[MASK]]
2294 %cmp = icmp slt i8 %a, %b
2295 %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33)
2299 define i64 @fold_icmp_i1_ne_0_icmp_slt_i16(i16 %a, i16 %b) {
2300 ; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_slt_i16(
2301 ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i16 [[A:%.*]], [[B:%.*]]
2302 ; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33)
2303 ; CHECK-NEXT: ret i64 [[MASK]]
2305 %cmp = icmp slt i16 %a, %b
2306 %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33)
2310 define i64 @fold_icmp_i1_ne_0_icmp_ult_i4(i4 %a, i4 %b) {
2311 ; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_ult_i4(
2312 ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i4 [[A:%.*]], [[B:%.*]]
2313 ; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33)
2314 ; CHECK-NEXT: ret i64 [[MASK]]
2316 %cmp = icmp ult i4 %a, %b
2317 %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33)
2321 define i64 @fold_icmp_i1_ne_0_icmp_ult_i8(i8 %a, i8 %b) {
2322 ; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_ult_i8(
2323 ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[A:%.*]], [[B:%.*]]
2324 ; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33)
2325 ; CHECK-NEXT: ret i64 [[MASK]]
2327 %cmp = icmp ult i8 %a, %b
2328 %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33)
2332 define i64 @fold_icmp_i1_ne_0_icmp_ult_i16(i16 %a, i16 %b) {
2333 ; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_ult_i16(
2334 ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i16 [[A:%.*]], [[B:%.*]]
2335 ; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33)
2336 ; CHECK-NEXT: ret i64 [[MASK]]
2338 %cmp = icmp ult i16 %a, %b
2339 %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33)
2343 ; --------------------------------------------------------------------
2345 ; --------------------------------------------------------------------
2347 declare i64 @llvm.amdgcn.fcmp.i64.f32(float, float, i32 immarg) nounwind readnone convergent
2349 define i64 @invalid_fcmp_code(float %a, float %b) {
2350 ; CHECK-LABEL: @invalid_fcmp_code(
2351 ; CHECK-NEXT: [[UNDER:%.*]] = call i64 @llvm.amdgcn.fcmp.i64.f32(float [[A:%.*]], float [[B:%.*]], i32 -1)
2352 ; CHECK-NEXT: [[OVER:%.*]] = call i64 @llvm.amdgcn.fcmp.i64.f32(float [[A]], float [[B]], i32 16)
2353 ; CHECK-NEXT: [[OR:%.*]] = or i64 [[UNDER]], [[OVER]]
2354 ; CHECK-NEXT: ret i64 [[OR]]
2356 %under = call i64 @llvm.amdgcn.fcmp.i64.f32(float %a, float %b, i32 -1)
2357 %over = call i64 @llvm.amdgcn.fcmp.i64.f32(float %a, float %b, i32 16)
2358 %or = or i64 %under, %over
2362 define i64 @fcmp_constant_inputs_false() {
2363 ; CHECK-LABEL: @fcmp_constant_inputs_false(
2364 ; CHECK-NEXT: ret i64 0
2366 %result = call i64 @llvm.amdgcn.fcmp.i64.f32(float 2.0, float 4.0, i32 1)
2370 define i64 @fcmp_constant_inputs_true() {
2371 ; CHECK-LABEL: @fcmp_constant_inputs_true(
2372 ; CHECK-NEXT: [[RESULT:%.*]] = call i64 @llvm.read_register.i64(metadata !0) [[ATTR12]]
2373 ; CHECK-NEXT: ret i64 [[RESULT]]
2375 %result = call i64 @llvm.amdgcn.fcmp.i64.f32(float 2.0, float 4.0, i32 4)
2379 define i64 @fcmp_constant_to_rhs_olt(float %x) {
2380 ; CHECK-LABEL: @fcmp_constant_to_rhs_olt(
2381 ; CHECK-NEXT: [[RESULT:%.*]] = call i64 @llvm.amdgcn.fcmp.i64.f32(float [[X:%.*]], float 4.000000e+00, i32 2)
2382 ; CHECK-NEXT: ret i64 [[RESULT]]
2384 %result = call i64 @llvm.amdgcn.fcmp.i64.f32(float 4.0, float %x, i32 4)
2388 ; --------------------------------------------------------------------
2389 ; llvm.amdgcn.ballot
2390 ; --------------------------------------------------------------------
2392 declare i64 @llvm.amdgcn.ballot.i64(i1) nounwind readnone convergent
2393 declare i32 @llvm.amdgcn.ballot.i32(i1) nounwind readnone convergent
2395 define i64 @ballot_nocombine_64(i1 %i) {
2396 ; CHECK-LABEL: @ballot_nocombine_64(
2397 ; CHECK-NEXT: [[B:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[I:%.*]])
2398 ; CHECK-NEXT: ret i64 [[B]]
2400 %b = call i64 @llvm.amdgcn.ballot.i64(i1 %i)
2404 define i64 @ballot_zero_64() {
2405 ; CHECK-LABEL: @ballot_zero_64(
2406 ; CHECK-NEXT: ret i64 0
2408 %b = call i64 @llvm.amdgcn.ballot.i64(i1 0)
2412 define i64 @ballot_one_64() {
2413 ; CHECK-LABEL: @ballot_one_64(
2414 ; CHECK-NEXT: [[B:%.*]] = call i64 @llvm.read_register.i64(metadata !0) [[ATTR12]]
2415 ; CHECK-NEXT: ret i64 [[B]]
2417 %b = call i64 @llvm.amdgcn.ballot.i64(i1 1)
2421 define i32 @ballot_nocombine_32(i1 %i) {
2422 ; CHECK-LABEL: @ballot_nocombine_32(
2423 ; CHECK-NEXT: [[B:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[I:%.*]])
2424 ; CHECK-NEXT: ret i32 [[B]]
2426 %b = call i32 @llvm.amdgcn.ballot.i32(i1 %i)
2430 define i32 @ballot_zero_32() {
2431 ; CHECK-LABEL: @ballot_zero_32(
2432 ; CHECK-NEXT: ret i32 0
2434 %b = call i32 @llvm.amdgcn.ballot.i32(i1 0)
2438 define i32 @ballot_one_32() {
2439 ; CHECK-LABEL: @ballot_one_32(
2440 ; CHECK-NEXT: [[B:%.*]] = call i32 @llvm.read_register.i32(metadata !1) [[ATTR12]]
2441 ; CHECK-NEXT: ret i32 [[B]]
2443 %b = call i32 @llvm.amdgcn.ballot.i32(i1 1)
2447 ; --------------------------------------------------------------------
2448 ; llvm.amdgcn.wqm.vote
2449 ; --------------------------------------------------------------------
2451 declare i1 @llvm.amdgcn.wqm.vote(i1)
2453 define float @wqm_vote_true() {
2454 ; CHECK-LABEL: @wqm_vote_true(
2455 ; CHECK-NEXT: main_body:
2456 ; CHECK-NEXT: ret float 1.000000e+00
2459 %w = call i1 @llvm.amdgcn.wqm.vote(i1 true)
2460 %r = select i1 %w, float 1.0, float 0.0
2464 define float @wqm_vote_false() {
2465 ; CHECK-LABEL: @wqm_vote_false(
2466 ; CHECK-NEXT: main_body:
2467 ; CHECK-NEXT: ret float 0.000000e+00
2470 %w = call i1 @llvm.amdgcn.wqm.vote(i1 false)
2471 %r = select i1 %w, float 1.0, float 0.0
2475 define float @wqm_vote_undef() {
2476 ; CHECK-LABEL: @wqm_vote_undef(
2477 ; CHECK-NEXT: main_body:
2478 ; CHECK-NEXT: ret float 0.000000e+00
2481 %w = call i1 @llvm.amdgcn.wqm.vote(i1 undef)
2482 %r = select i1 %w, float 1.0, float 0.0
2486 ; --------------------------------------------------------------------
2488 ; --------------------------------------------------------------------
2490 declare void @llvm.amdgcn.kill(i1)
2492 define void @kill_true() {
2493 ; CHECK-LABEL: @kill_true(
2494 ; CHECK-NEXT: ret void
2496 call void @llvm.amdgcn.kill(i1 true)
2500 ; --------------------------------------------------------------------
2501 ; llvm.amdgcn.readfirstlane
2502 ; --------------------------------------------------------------------
2504 declare i32 @llvm.amdgcn.readfirstlane(i32)
2506 @gv = constant i32 0
2508 define amdgpu_kernel void @readfirstlane_constant(i32 %arg) {
2509 ; CHECK-LABEL: @readfirstlane_constant(
2510 ; CHECK-NEXT: [[VAR:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[ARG:%.*]])
2511 ; CHECK-NEXT: store volatile i32 [[VAR]], i32* undef, align 4
2512 ; CHECK-NEXT: store volatile i32 0, i32* undef, align 4
2513 ; CHECK-NEXT: store volatile i32 123, i32* undef, align 4
2514 ; CHECK-NEXT: store volatile i32 ptrtoint (i32* @gv to i32), i32* undef, align 4
2515 ; CHECK-NEXT: store volatile i32 undef, i32* undef, align 4
2516 ; CHECK-NEXT: ret void
2518 %var = call i32 @llvm.amdgcn.readfirstlane(i32 %arg)
2519 %zero = call i32 @llvm.amdgcn.readfirstlane(i32 0)
2520 %imm = call i32 @llvm.amdgcn.readfirstlane(i32 123)
2521 %constexpr = call i32 @llvm.amdgcn.readfirstlane(i32 ptrtoint (i32* @gv to i32))
2522 %undef = call i32 @llvm.amdgcn.readfirstlane(i32 undef)
2523 store volatile i32 %var, i32* undef
2524 store volatile i32 %zero, i32* undef
2525 store volatile i32 %imm, i32* undef
2526 store volatile i32 %constexpr, i32* undef
2527 store volatile i32 %undef, i32* undef
2531 define i32 @readfirstlane_idempotent(i32 %arg) {
2532 ; CHECK-LABEL: @readfirstlane_idempotent(
2533 ; CHECK-NEXT: [[READ0:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[ARG:%.*]])
2534 ; CHECK-NEXT: ret i32 [[READ0]]
2536 %read0 = call i32 @llvm.amdgcn.readfirstlane(i32 %arg)
2537 %read1 = call i32 @llvm.amdgcn.readfirstlane(i32 %read0)
2538 %read2 = call i32 @llvm.amdgcn.readfirstlane(i32 %read1)
2542 define i32 @readfirstlane_readlane(i32 %arg) {
2543 ; CHECK-LABEL: @readfirstlane_readlane(
2544 ; CHECK-NEXT: [[READ0:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[ARG:%.*]])
2545 ; CHECK-NEXT: ret i32 [[READ0]]
2547 %read0 = call i32 @llvm.amdgcn.readfirstlane(i32 %arg)
2548 %read1 = call i32 @llvm.amdgcn.readlane(i32 %read0, i32 0)
2552 define i32 @readfirstlane_readfirstlane_different_block(i32 %arg) {
2553 ; CHECK-LABEL: @readfirstlane_readfirstlane_different_block(
2555 ; CHECK-NEXT: [[READ0:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[ARG:%.*]])
2556 ; CHECK-NEXT: br label [[BB1:%.*]]
2558 ; CHECK-NEXT: [[READ1:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[READ0]])
2559 ; CHECK-NEXT: ret i32 [[READ1]]
2562 %read0 = call i32 @llvm.amdgcn.readfirstlane(i32 %arg)
2566 %read1 = call i32 @llvm.amdgcn.readfirstlane(i32 %read0)
2570 define i32 @readfirstlane_readlane_different_block(i32 %arg) {
2571 ; CHECK-LABEL: @readfirstlane_readlane_different_block(
2573 ; CHECK-NEXT: [[READ0:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[ARG:%.*]], i32 0)
2574 ; CHECK-NEXT: br label [[BB1:%.*]]
2576 ; CHECK-NEXT: [[READ1:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[READ0]])
2577 ; CHECK-NEXT: ret i32 [[READ1]]
2580 %read0 = call i32 @llvm.amdgcn.readlane(i32 %arg, i32 0)
2584 %read1 = call i32 @llvm.amdgcn.readfirstlane(i32 %read0)
2588 ; --------------------------------------------------------------------
2589 ; llvm.amdgcn.readlane
2590 ; --------------------------------------------------------------------
2592 declare i32 @llvm.amdgcn.readlane(i32, i32)
2594 define amdgpu_kernel void @readlane_constant(i32 %arg, i32 %lane) {
2595 ; CHECK-LABEL: @readlane_constant(
2596 ; CHECK-NEXT: [[VAR:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[ARG:%.*]], i32 7)
2597 ; CHECK-NEXT: store volatile i32 [[VAR]], i32* undef, align 4
2598 ; CHECK-NEXT: store volatile i32 0, i32* undef, align 4
2599 ; CHECK-NEXT: store volatile i32 123, i32* undef, align 4
2600 ; CHECK-NEXT: store volatile i32 ptrtoint (i32* @gv to i32), i32* undef, align 4
2601 ; CHECK-NEXT: store volatile i32 undef, i32* undef, align 4
2602 ; CHECK-NEXT: ret void
2604 %var = call i32 @llvm.amdgcn.readlane(i32 %arg, i32 7)
2605 %zero = call i32 @llvm.amdgcn.readlane(i32 0, i32 %lane)
2606 %imm = call i32 @llvm.amdgcn.readlane(i32 123, i32 %lane)
2607 %constexpr = call i32 @llvm.amdgcn.readlane(i32 ptrtoint (i32* @gv to i32), i32 %lane)
2608 %undef = call i32 @llvm.amdgcn.readlane(i32 undef, i32 %lane)
2609 store volatile i32 %var, i32* undef
2610 store volatile i32 %zero, i32* undef
2611 store volatile i32 %imm, i32* undef
2612 store volatile i32 %constexpr, i32* undef
2613 store volatile i32 %undef, i32* undef
2617 define i32 @readlane_idempotent(i32 %arg, i32 %lane) {
2618 ; CHECK-LABEL: @readlane_idempotent(
2619 ; CHECK-NEXT: [[READ0:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[ARG:%.*]], i32 [[LANE:%.*]])
2620 ; CHECK-NEXT: ret i32 [[READ0]]
2622 %read0 = call i32 @llvm.amdgcn.readlane(i32 %arg, i32 %lane)
2623 %read1 = call i32 @llvm.amdgcn.readlane(i32 %read0, i32 %lane)
2627 define i32 @readlane_idempotent_different_lanes(i32 %arg, i32 %lane0, i32 %lane1) {
2628 ; CHECK-LABEL: @readlane_idempotent_different_lanes(
2629 ; CHECK-NEXT: [[READ0:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[ARG:%.*]], i32 [[LANE0:%.*]])
2630 ; CHECK-NEXT: [[READ1:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[READ0]], i32 [[LANE1:%.*]])
2631 ; CHECK-NEXT: ret i32 [[READ1]]
2633 %read0 = call i32 @llvm.amdgcn.readlane(i32 %arg, i32 %lane0)
2634 %read1 = call i32 @llvm.amdgcn.readlane(i32 %read0, i32 %lane1)
2638 define i32 @readlane_readfirstlane(i32 %arg) {
2639 ; CHECK-LABEL: @readlane_readfirstlane(
2640 ; CHECK-NEXT: [[READ0:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[ARG:%.*]])
2641 ; CHECK-NEXT: ret i32 [[READ0]]
2643 %read0 = call i32 @llvm.amdgcn.readfirstlane(i32 %arg)
2644 %read1 = call i32 @llvm.amdgcn.readlane(i32 %read0, i32 0)
2648 define i32 @readlane_idempotent_different_block(i32 %arg, i32 %lane) {
2649 ; CHECK-LABEL: @readlane_idempotent_different_block(
2651 ; CHECK-NEXT: [[READ0:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[ARG:%.*]], i32 [[LANE:%.*]])
2652 ; CHECK-NEXT: br label [[BB1:%.*]]
2654 ; CHECK-NEXT: [[READ1:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[READ0]], i32 [[LANE]])
2655 ; CHECK-NEXT: ret i32 [[READ1]]
2658 %read0 = call i32 @llvm.amdgcn.readlane(i32 %arg, i32 %lane)
2662 %read1 = call i32 @llvm.amdgcn.readlane(i32 %read0, i32 %lane)
2667 define i32 @readlane_readfirstlane_different_block(i32 %arg) {
2668 ; CHECK-LABEL: @readlane_readfirstlane_different_block(
2670 ; CHECK-NEXT: [[READ0:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[ARG:%.*]])
2671 ; CHECK-NEXT: br label [[BB1:%.*]]
2673 ; CHECK-NEXT: [[READ1:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[READ0]], i32 0)
2674 ; CHECK-NEXT: ret i32 [[READ1]]
2677 %read0 = call i32 @llvm.amdgcn.readfirstlane(i32 %arg)
2681 %read1 = call i32 @llvm.amdgcn.readlane(i32 %read0, i32 0)
2685 ; --------------------------------------------------------------------
2686 ; llvm.amdgcn.update.dpp.i32
2687 ; --------------------------------------------------------------------
2689 declare i32 @llvm.amdgcn.update.dpp.i32(i32, i32, i32, i32, i32, i1)
2691 define amdgpu_kernel void @update_dpp_no_combine(i32 addrspace(1)* %out, i32 %in1, i32 %in2) {
2692 ; CHECK-LABEL: @update_dpp_no_combine(
2693 ; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.update.dpp.i32(i32 [[IN1:%.*]], i32 [[IN2:%.*]], i32 1, i32 1, i32 1, i1 false)
2694 ; CHECK-NEXT: store i32 [[TMP0]], i32 addrspace(1)* [[OUT:%.*]], align 4
2695 ; CHECK-NEXT: ret void
2697 %tmp0 = call i32 @llvm.amdgcn.update.dpp.i32(i32 %in1, i32 %in2, i32 1, i32 1, i32 1, i1 0)
2698 store i32 %tmp0, i32 addrspace(1)* %out
2702 define amdgpu_kernel void @update_dpp_drop_old(i32 addrspace(1)* %out, i32 %in1, i32 %in2) {
2703 ; CHECK-LABEL: @update_dpp_drop_old(
2704 ; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.update.dpp.i32(i32 undef, i32 [[IN2:%.*]], i32 3, i32 15, i32 15, i1 true)
2705 ; CHECK-NEXT: store i32 [[TMP0]], i32 addrspace(1)* [[OUT:%.*]], align 4
2706 ; CHECK-NEXT: ret void
2708 %tmp0 = call i32 @llvm.amdgcn.update.dpp.i32(i32 %in1, i32 %in2, i32 3, i32 15, i32 15, i1 1)
2709 store i32 %tmp0, i32 addrspace(1)* %out
2713 define amdgpu_kernel void @update_dpp_undef_old(i32 addrspace(1)* %out, i32 %in1) {
2714 ; CHECK-LABEL: @update_dpp_undef_old(
2715 ; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.update.dpp.i32(i32 undef, i32 [[IN1:%.*]], i32 4, i32 15, i32 15, i1 true)
2716 ; CHECK-NEXT: store i32 [[TMP0]], i32 addrspace(1)* [[OUT:%.*]], align 4
2717 ; CHECK-NEXT: ret void
2719 %tmp0 = call i32 @llvm.amdgcn.update.dpp.i32(i32 undef, i32 %in1, i32 4, i32 15, i32 15, i1 1)
2720 store i32 %tmp0, i32 addrspace(1)* %out
2725 ; --------------------------------------------------------------------
2726 ; llvm.amdgcn.permlane16
2727 ; --------------------------------------------------------------------
2729 declare i32 @llvm.amdgcn.permlane16(i32, i32, i32, i32, i1 immarg, i1 immarg)
2731 define amdgpu_kernel void @permlane16(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) {
2732 ; CHECK-LABEL: @permlane16(
2733 ; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.amdgcn.permlane16(i32 12345, i32 [[SRC0:%.*]], i32 [[SRC1:%.*]], i32 [[SRC2:%.*]], i1 false, i1 false)
2734 ; CHECK-NEXT: store i32 [[RES]], i32 addrspace(1)* [[OUT:%.*]], align 4
2735 ; CHECK-NEXT: ret void
2737 %res = call i32 @llvm.amdgcn.permlane16(i32 12345, i32 %src0, i32 %src1, i32 %src2, i1 false, i1 false)
2738 store i32 %res, i32 addrspace(1)* %out
2742 define amdgpu_kernel void @permlane16_bound_ctrl(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) {
2743 ; CHECK-LABEL: @permlane16_bound_ctrl(
2744 ; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.amdgcn.permlane16(i32 undef, i32 [[SRC0:%.*]], i32 [[SRC1:%.*]], i32 [[SRC2:%.*]], i1 false, i1 true)
2745 ; CHECK-NEXT: store i32 [[RES]], i32 addrspace(1)* [[OUT:%.*]], align 4
2746 ; CHECK-NEXT: ret void
2748 %res = call i32 @llvm.amdgcn.permlane16(i32 12345, i32 %src0, i32 %src1, i32 %src2, i1 false, i1 true)
2749 store i32 %res, i32 addrspace(1)* %out
2753 define amdgpu_kernel void @permlane16_fetch_invalid_bound_ctrl(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) {
2754 ; CHECK-LABEL: @permlane16_fetch_invalid_bound_ctrl(
2755 ; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.amdgcn.permlane16(i32 undef, i32 [[SRC0:%.*]], i32 [[SRC1:%.*]], i32 [[SRC2:%.*]], i1 true, i1 true)
2756 ; CHECK-NEXT: store i32 [[RES]], i32 addrspace(1)* [[OUT:%.*]], align 4
2757 ; CHECK-NEXT: ret void
2759 %res = call i32 @llvm.amdgcn.permlane16(i32 12345, i32 %src0, i32 %src1, i32 %src2, i1 true, i1 true)
2760 store i32 %res, i32 addrspace(1)* %out
2764 ; --------------------------------------------------------------------
2765 ; llvm.amdgcn.permlanex16
2766 ; --------------------------------------------------------------------
2768 declare i32 @llvm.amdgcn.permlanex16(i32, i32, i32, i32, i1 immarg, i1 immarg)
2770 define amdgpu_kernel void @permlanex16(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) {
2771 ; CHECK-LABEL: @permlanex16(
2772 ; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.amdgcn.permlanex16(i32 12345, i32 [[SRC0:%.*]], i32 [[SRC1:%.*]], i32 [[SRC2:%.*]], i1 false, i1 false)
2773 ; CHECK-NEXT: store i32 [[RES]], i32 addrspace(1)* [[OUT:%.*]], align 4
2774 ; CHECK-NEXT: ret void
2776 %res = call i32 @llvm.amdgcn.permlanex16(i32 12345, i32 %src0, i32 %src1, i32 %src2, i1 false, i1 false)
2777 store i32 %res, i32 addrspace(1)* %out
2781 define amdgpu_kernel void @permlanex16_bound_ctrl(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) {
2782 ; CHECK-LABEL: @permlanex16_bound_ctrl(
2783 ; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.amdgcn.permlanex16(i32 undef, i32 [[SRC0:%.*]], i32 [[SRC1:%.*]], i32 [[SRC2:%.*]], i1 false, i1 true)
2784 ; CHECK-NEXT: store i32 [[RES]], i32 addrspace(1)* [[OUT:%.*]], align 4
2785 ; CHECK-NEXT: ret void
2787 %res = call i32 @llvm.amdgcn.permlanex16(i32 12345, i32 %src0, i32 %src1, i32 %src2, i1 false, i1 true)
2788 store i32 %res, i32 addrspace(1)* %out
2792 define amdgpu_kernel void @permlanex16_fetch_invalid_bound_ctrl(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) {
2793 ; CHECK-LABEL: @permlanex16_fetch_invalid_bound_ctrl(
2794 ; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.amdgcn.permlanex16(i32 undef, i32 [[SRC0:%.*]], i32 [[SRC1:%.*]], i32 [[SRC2:%.*]], i1 true, i1 true)
2795 ; CHECK-NEXT: store i32 [[RES]], i32 addrspace(1)* [[OUT:%.*]], align 4
2796 ; CHECK-NEXT: ret void
2798 %res = call i32 @llvm.amdgcn.permlanex16(i32 12345, i32 %src0, i32 %src1, i32 %src2, i1 true, i1 true)
2799 store i32 %res, i32 addrspace(1)* %out
2803 ; --------------------------------------------------------------------
2804 ; llvm.amdgcn.image.sample a16
2805 ; --------------------------------------------------------------------
2807 declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2808 declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2809 declare <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2810 declare <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2811 declare <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2812 declare <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2814 declare <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2815 declare <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2816 declare <4 x float> @llvm.amdgcn.image.sample.cl.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2817 declare <4 x float> @llvm.amdgcn.image.sample.cl.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2818 declare <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2819 declare <4 x float> @llvm.amdgcn.image.sample.c.cl.2d.v4f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2821 declare <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2822 declare <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2823 declare <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2824 declare <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2825 declare <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2826 declare <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2827 declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2828 declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f32.f32(i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2830 declare <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2831 declare <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2832 declare <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f32.f32(i32, float, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2833 declare <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2834 declare <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2835 declare <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2836 declare <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2837 declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f32(i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2838 declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2840 declare <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2841 declare <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2842 declare <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2843 declare <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2844 declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2845 declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2846 declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f32(i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2847 declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2849 declare <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2850 declare <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2851 declare <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2852 declare <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2854 declare <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2855 declare <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2856 declare <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2857 declare <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2859 declare float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f32.f32(i32, i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2860 declare <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f32(i32, i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2862 define amdgpu_kernel void @image_sample_a16_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s) {
2863 ; CHECK-LABEL: @image_sample_a16_1d(
2864 ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f16(i32 15, half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
2865 ; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
2866 ; CHECK-NEXT: ret void
2868 %s32 = fpext half %s to float
2869 %res = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
2870 store <4 x float> %res, <4 x float> addrspace(1)* %out
2874 define amdgpu_kernel void @image_sample_a16_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) {
2875 ; CHECK-LABEL: @image_sample_a16_2d(
2876 ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f16(i32 15, half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
2877 ; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
2878 ; CHECK-NEXT: ret void
2880 %s32 = fpext half %s to float
2881 %t32 = fpext half %t to float
2882 %res = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %s32, float %t32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
2883 store <4 x float> %res, <4 x float> addrspace(1)* %out
2887 define amdgpu_kernel void @image_sample_a16_3d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %r) {
2888 ; CHECK-LABEL: @image_sample_a16_3d(
2889 ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f16(i32 15, half [[S:%.*]], half [[T:%.*]], half [[R:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
2890 ; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
2891 ; CHECK-NEXT: ret void
2893 %s32 = fpext half %s to float
2894 %t32 = fpext half %t to float
2895 %r32 = fpext half %r to float
2896 %res = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32 15, float %s32, float %t32, float %r32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
2897 store <4 x float> %res, <4 x float> addrspace(1)* %out
2901 define amdgpu_kernel void @image_sample_a16_cube(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %face) {
2903 ; CHECK-LABEL: @image_sample_a16_cube(
2904 ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f16(i32 15, half [[S:%.*]], half [[T:%.*]], half [[FACE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
2905 ; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
2906 ; CHECK-NEXT: ret void
2908 %s32 = fpext half %s to float
2909 %t32 = fpext half %t to float
2910 %face32 = fpext half %face to float
2911 %res = call <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f32(i32 15, float %s32, float %t32, float %face32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
2912 store <4 x float> %res, <4 x float> addrspace(1)* %out
2916 define amdgpu_kernel void @image_sample_a16_1darray(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %slice) {
2917 ; CHECK-LABEL: @image_sample_a16_1darray(
2918 ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f16(i32 15, half [[S:%.*]], half [[SLICE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
2919 ; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
2920 ; CHECK-NEXT: ret void
2922 %s32 = fpext half %s to float
2923 %slice32 = fpext half %slice to float
2924 %res = call <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32(i32 15, float %s32, float %slice32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
2925 store <4 x float> %res, <4 x float> addrspace(1)* %out
2929 define amdgpu_kernel void @image_sample_a16_2darray(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %slice) {
2930 ; CHECK-LABEL: @image_sample_a16_2darray(
2931 ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f16(i32 15, half [[S:%.*]], half [[T:%.*]], half [[SLICE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
2932 ; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
2933 ; CHECK-NEXT: ret void
2935 %s32 = fpext half %s to float
2936 %t32 = fpext half %t to float
2937 %slice32 = fpext half %slice to float
2938 %res = call <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f32(i32 15, float %s32, float %t32, float %slice32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
2939 store <4 x float> %res, <4 x float> addrspace(1)* %out
2943 define amdgpu_kernel void @image_sample_a16_c_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s) {
2944 ; CHECK-LABEL: @image_sample_a16_c_1d(
2945 ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f16(i32 15, float [[ZCOMPARE:%.*]], half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
2946 ; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
2947 ; CHECK-NEXT: ret void
2949 %s32 = fpext half %s to float
2950 %res = call <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32 15, float %zcompare, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
2951 store <4 x float> %res, <4 x float> addrspace(1)* %out
2955 define amdgpu_kernel void @image_sample_a16_c_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t) {
2956 ; CHECK-LABEL: @image_sample_a16_c_2d(
2957 ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f16(i32 15, float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
2958 ; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
2959 ; CHECK-NEXT: ret void
2961 %s32 = fpext half %s to float
2962 %t32 = fpext half %t to float
2963 %res = call <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f32(i32 15, float %zcompare, float %s32, float %t32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
2964 store <4 x float> %res, <4 x float> addrspace(1)* %out
2968 define amdgpu_kernel void @image_sample_a16_cl_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %clamp) {
2969 ; CHECK-LABEL: @image_sample_a16_cl_1d(
2970 ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cl.1d.v4f32.f16(i32 15, half [[S:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
2971 ; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
2972 ; CHECK-NEXT: ret void
2974 %s32 = fpext half %s to float
2975 %clamp32 = fpext half %clamp to float
2976 %res = call <4 x float> @llvm.amdgcn.image.sample.cl.1d.v4f32.f32(i32 15, float %s32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
2977 store <4 x float> %res, <4 x float> addrspace(1)* %out
2981 define amdgpu_kernel void @image_sample_a16_cl_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %clamp) {
2982 ; CHECK-LABEL: @image_sample_a16_cl_2d(
2983 ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cl.2d.v4f32.f16(i32 15, half [[S:%.*]], half [[T:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
2984 ; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
2985 ; CHECK-NEXT: ret void
2987 %s32 = fpext half %s to float
2988 %t32 = fpext half %t to float
2989 %clamp32 = fpext half %clamp to float
2990 %res = call <4 x float> @llvm.amdgcn.image.sample.cl.2d.v4f32.f32(i32 15, float %s32, float %t32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
2991 store <4 x float> %res, <4 x float> addrspace(1)* %out
2995 define amdgpu_kernel void @image_sample_a16_c_cl_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %clamp) {
2996 ; CHECK-LABEL: @image_sample_a16_c_cl_1d(
2997 ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f16(i32 15, float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
2998 ; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
2999 ; CHECK-NEXT: ret void
3001 %s32 = fpext half %s to float
3002 %clamp32 = fpext half %clamp to float
3003 %res = call <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f32(i32 15, float %zcompare, float %s32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3004 store <4 x float> %res, <4 x float> addrspace(1)* %out
3008 define amdgpu_kernel void @image_sample_a16_c_cl_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t, half %clamp) {
3009 ; CHECK-LABEL: @image_sample_a16_c_cl_2d(
3010 ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cl.2d.v4f32.f16(i32 15, float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[T:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3011 ; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
3012 ; CHECK-NEXT: ret void
3014 %s32 = fpext half %s to float
3015 %t32 = fpext half %t to float
3016 %clamp32 = fpext half %clamp to float
3017 %res = call <4 x float> @llvm.amdgcn.image.sample.c.cl.2d.v4f32.f32(i32 15, float %zcompare, float %s32, float %t32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3018 store <4 x float> %res, <4 x float> addrspace(1)* %out
3022 define amdgpu_kernel void @image_sample_a16_b_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s) {
3023 ; CHECK-LABEL: @image_sample_a16_b_1d(
3024 ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f16(i32 15, float [[BIAS:%.*]], half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3025 ; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
3026 ; CHECK-NEXT: ret void
3028 %s32 = fpext half %s to float
3029 %res = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f32(i32 15, float %bias, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3030 store <4 x float> %res, <4 x float> addrspace(1)* %out
3034 define amdgpu_kernel void @image_sample_a16_b_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %t) {
3035 ; CHECK-LABEL: @image_sample_a16_b_2d(
3036 ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32.f16(i32 15, float [[BIAS:%.*]], half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3037 ; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
3038 ; CHECK-NEXT: ret void
3040 %s32 = fpext half %s to float
3041 %t32 = fpext half %t to float
3042 %res = call <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32.f32(i32 15, float %bias, float %s32, float %t32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3043 store <4 x float> %res, <4 x float> addrspace(1)* %out
3047 define amdgpu_kernel void @image_sample_a16_c_b_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s) {
3048 ; CHECK-LABEL: @image_sample_a16_c_b_1d(
3049 ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f16(i32 15, float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3050 ; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
3051 ; CHECK-NEXT: ret void
3053 %s32 = fpext half %s to float
3054 %res = call <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f32(i32 15, float %bias, float %zcompare, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3055 store <4 x float> %res, <4 x float> addrspace(1)* %out
3059 define amdgpu_kernel void @image_sample_a16_c_b_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %t) {
3060 ; CHECK-LABEL: @image_sample_a16_c_b_2d(
3061 ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32.f16(i32 15, float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3062 ; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
3063 ; CHECK-NEXT: ret void
3065 %s32 = fpext half %s to float
3066 %t32 = fpext half %t to float
3067 %res = call <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32.f32(i32 15, float %bias, float %zcompare, float %s32, float %t32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3068 store <4 x float> %res, <4 x float> addrspace(1)* %out
3072 define amdgpu_kernel void @image_sample_a16_b_cl_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %clamp) {
3073 ; CHECK-LABEL: @image_sample_a16_b_cl_1d(
3074 ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f16(i32 15, float [[BIAS:%.*]], half [[S:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3075 ; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
3076 ; CHECK-NEXT: ret void
3078 %s32 = fpext half %s to float
3079 %clamp32 = fpext half %clamp to float
3080 %res = call <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f32(i32 15, float %bias, float %s32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3081 store <4 x float> %res, <4 x float> addrspace(1)* %out
3085 define amdgpu_kernel void @image_sample_a16_b_cl_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %t, half %clamp) {
3086 ; CHECK-LABEL: @image_sample_a16_b_cl_2d(
3087 ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f32.f16(i32 15, float [[BIAS:%.*]], half [[S:%.*]], half [[T:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3088 ; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
3089 ; CHECK-NEXT: ret void
3091 %s32 = fpext half %s to float
3092 %t32 = fpext half %t to float
3093 %clamp32 = fpext half %clamp to float
3094 %res = call <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f32.f32(i32 15, float %bias, float %s32, float %t32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3095 store <4 x float> %res, <4 x float> addrspace(1)* %out
3099 define amdgpu_kernel void @image_sample_a16_c_b_cl_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %clamp) {
3100 ; CHECK-LABEL: @image_sample_a16_c_b_cl_1d(
3101 ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f16(i32 15, float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3102 ; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
3103 ; CHECK-NEXT: ret void
3105 %s32 = fpext half %s to float
3106 %clamp32 = fpext half %clamp to float
3107 %res = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f32(i32 15, float %bias, float %zcompare, float %s32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3108 store <4 x float> %res, <4 x float> addrspace(1)* %out
3112 define amdgpu_kernel void @image_sample_a16_c_b_cl_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %t, half %clamp) {
3113 ; CHECK-LABEL: @image_sample_a16_c_b_cl_2d(
3114 ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f32.f16(i32 15, float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[T:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3115 ; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
3116 ; CHECK-NEXT: ret void
3118 %s32 = fpext half %s to float
3119 %t32 = fpext half %t to float
3120 %clamp32 = fpext half %clamp to float
3121 %res = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f32.f32(i32 15, float %bias, float %zcompare, float %s32, float %t32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3122 store <4 x float> %res, <4 x float> addrspace(1)* %out
3126 define amdgpu_kernel void @image_sample_a16_d_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s) {
3127 ; CHECK-LABEL: @image_sample_a16_d_1d(
3128 ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f16(i32 15, half [[DSDH:%.*]], half [[DSDV:%.*]], half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3129 ; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
3130 ; CHECK-NEXT: ret void
3132 %dsdh32 = fpext half %dsdh to float
3133 %dsdv32 = fpext half %dsdv to float
3134 %s32 = fpext half %s to float
3135 %res = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f32.f32(i32 15, float %dsdh32, float %dsdv32, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3136 store <4 x float> %res, <4 x float> addrspace(1)* %out
3140 define amdgpu_kernel void @image_sample_a16_d_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) {
3141 ; CHECK-LABEL: @image_sample_a16_d_2d(
3142 ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f16(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3143 ; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
3144 ; CHECK-NEXT: ret void
3146 %dsdh32 = fpext half %dsdh to float
3147 %dtdh32 = fpext half %dtdh to float
3148 %dsdv32 = fpext half %dsdv to float
3149 %dtdv32 = fpext half %dtdv to float
3150 %s32 = fpext half %s to float
3151 %t32 = fpext half %t to float
3152 %res = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32(i32 15, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s32, float %t32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3153 store <4 x float> %res, <4 x float> addrspace(1)* %out
3157 define amdgpu_kernel void @image_sample_a16_d_3d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, half %s, half %t, half %r) {
3158 ; CHECK-LABEL: @image_sample_a16_d_3d(
3159 ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f16(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DRDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[DRDV:%.*]], half [[S:%.*]], half [[T:%.*]], half [[R:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3160 ; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
3161 ; CHECK-NEXT: ret void
3163 %dsdh32 = fpext half %dsdh to float
3164 %dtdh32 = fpext half %dtdh to float
3165 %drdh32 = fpext half %drdh to float
3166 %dsdv32 = fpext half %dsdv to float
3167 %dtdv32 = fpext half %dtdv to float
3168 %drdv32 = fpext half %drdv to float
3169 %s32 = fpext half %s to float
3170 %t32 = fpext half %t to float
3171 %r32 = fpext half %r to float
3172 %res = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f32.f32(i32 15, float %dsdh32, float %dtdh32, float %drdh32, float %dsdv32, float %dtdv32, float %drdv32, float %s32, float %t32, float %r32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3173 store <4 x float> %res, <4 x float> addrspace(1)* %out
3177 define amdgpu_kernel void @image_sample_a16_c_d_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s) {
3178 ; CHECK-LABEL: @image_sample_a16_c_d_1d(
3179 ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f16.f16(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DSDV:%.*]], half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3180 ; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
3181 ; CHECK-NEXT: ret void
3183 %dsdh32 = fpext half %dsdh to float
3184 %dsdv32 = fpext half %dsdv to float
3185 %s32 = fpext half %s to float
3186 %res = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh32, float %dsdv32, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3187 store <4 x float> %res, <4 x float> addrspace(1)* %out
3191 define amdgpu_kernel void @image_sample_a16_c_d_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) {
3192 ; CHECK-LABEL: @image_sample_a16_c_d_2d(
3193 ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f16.f16(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3194 ; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
3195 ; CHECK-NEXT: ret void
3197 %dsdh32 = fpext half %dsdh to float
3198 %dtdh32 = fpext half %dtdh to float
3199 %dsdv32 = fpext half %dsdv to float
3200 %dtdv32 = fpext half %dtdv to float
3201 %s32 = fpext half %s to float
3202 %t32 = fpext half %t to float
3203 %res = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s32, float %t32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3204 store <4 x float> %res, <4 x float> addrspace(1)* %out
3208 define amdgpu_kernel void @image_sample_a16_d_cl_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s, half %clamp) {
3209 ; CHECK-LABEL: @image_sample_a16_d_cl_1d(
3210 ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f16(i32 15, half [[DSDH:%.*]], half [[DSDV:%.*]], half [[S:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3211 ; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
3212 ; CHECK-NEXT: ret void
3214 %dsdh32 = fpext half %dsdh to float
3215 %dsdv32 = fpext half %dsdv to float
3216 %s32 = fpext half %s to float
3217 %clamp32 = fpext half %clamp to float
3218 %res = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f32.f32(i32 15, float %dsdh32, float %dsdv32, float %s32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3219 store <4 x float> %res, <4 x float> addrspace(1)* %out
3223 define amdgpu_kernel void @image_sample_a16_d_cl_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) {
3224 ; CHECK-LABEL: @image_sample_a16_d_cl_2d(
3225 ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f16(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3226 ; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
3227 ; CHECK-NEXT: ret void
3229 %dsdh32 = fpext half %dsdh to float
3230 %dtdh32 = fpext half %dtdh to float
3231 %dsdv32 = fpext half %dsdv to float
3232 %dtdv32 = fpext half %dtdv to float
3233 %s32 = fpext half %s to float
3234 %t32 = fpext half %t to float
3235 %clamp32 = fpext half %clamp to float
3236 %res = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f32.f32(i32 15, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s32, float %t32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3237 store <4 x float> %res, <4 x float> addrspace(1)* %out
3241 define amdgpu_kernel void @image_sample_a16_c_d_cl_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s, half %clamp) {
3242 ; CHECK-LABEL: @image_sample_a16_c_d_cl_1d(
3243 ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f16.f16(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DSDV:%.*]], half [[S:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3244 ; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
3245 ; CHECK-NEXT: ret void
3247 %dsdh32 = fpext half %dsdh to float
3248 %dsdv32 = fpext half %dsdv to float
3249 %s32 = fpext half %s to float
3250 %clamp32 = fpext half %clamp to float
3251 %res = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh32, float %dsdv32, float %s32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3252 store <4 x float> %res, <4 x float> addrspace(1)* %out
3256 define amdgpu_kernel void @image_sample_a16_c_d_cl_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) {
3257 ; CHECK-LABEL: @image_sample_a16_c_d_cl_2d(
3258 ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f16.f16(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3259 ; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
3260 ; CHECK-NEXT: ret void
3262 %dsdh32 = fpext half %dsdh to float
3263 %dtdh32 = fpext half %dtdh to float
3264 %dsdv32 = fpext half %dsdv to float
3265 %dtdv32 = fpext half %dtdv to float
3266 %s32 = fpext half %s to float
3267 %t32 = fpext half %t to float
3268 %clamp32 = fpext half %clamp to float
3269 %res = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s32, float %t32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3270 store <4 x float> %res, <4 x float> addrspace(1)* %out
3274 define amdgpu_kernel void @image_sample_a16_cd_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s) {
3275 ; CHECK-LABEL: @image_sample_a16_cd_1d(
3276 ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f16(i32 15, half [[DSDH:%.*]], half [[DSDV:%.*]], half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3277 ; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
3278 ; CHECK-NEXT: ret void
3280 %dsdh32 = fpext half %dsdh to float
3281 %dsdv32 = fpext half %dsdv to float
3282 %s32 = fpext half %s to float
3283 %res = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f32.f32(i32 15, float %dsdh32, float %dsdv32, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3284 store <4 x float> %res, <4 x float> addrspace(1)* %out
3288 define amdgpu_kernel void @image_sample_a16_cd_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) {
3289 ; CHECK-LABEL: @image_sample_a16_cd_2d(
3290 ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f16(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3291 ; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
3292 ; CHECK-NEXT: ret void
3294 %dsdh32 = fpext half %dsdh to float
3295 %dtdh32 = fpext half %dtdh to float
3296 %dsdv32 = fpext half %dsdv to float
3297 %dtdv32 = fpext half %dtdv to float
3298 %s32 = fpext half %s to float
3299 %t32 = fpext half %t to float
3300 %res = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f32.f32(i32 15, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s32, float %t32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3301 store <4 x float> %res, <4 x float> addrspace(1)* %out
3305 define amdgpu_kernel void @image_sample_a16_c_cd_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s) {
3306 ; CHECK-LABEL: @image_sample_a16_c_cd_1d(
3307 ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f16.f16(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DSDV:%.*]], half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3308 ; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
3309 ; CHECK-NEXT: ret void
3311 %dsdh32 = fpext half %dsdh to float
3312 %dsdv32 = fpext half %dsdv to float
3313 %s32 = fpext half %s to float
3314 %res = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh32, float %dsdv32, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3315 store <4 x float> %res, <4 x float> addrspace(1)* %out
3319 define amdgpu_kernel void @image_sample_a16_c_cd_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) {
3320 ; CHECK-LABEL: @image_sample_a16_c_cd_2d(
3321 ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f16.f16(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3322 ; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
3323 ; CHECK-NEXT: ret void
3325 %dsdh32 = fpext half %dsdh to float
3326 %dtdh32 = fpext half %dtdh to float
3327 %dsdv32 = fpext half %dsdv to float
3328 %dtdv32 = fpext half %dtdv to float
3329 %s32 = fpext half %s to float
3330 %t32 = fpext half %t to float
3331 %res = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s32, float %t32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3332 store <4 x float> %res, <4 x float> addrspace(1)* %out
3336 define amdgpu_kernel void @image_sample_a16_cd_cl_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s, half %clamp) {
3337 ; CHECK-LABEL: @image_sample_a16_cd_cl_1d(
3338 ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f16(i32 15, half [[DSDH:%.*]], half [[DSDV:%.*]], half [[S:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3339 ; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
3340 ; CHECK-NEXT: ret void
3342 %dsdh32 = fpext half %dsdh to float
3343 %dsdv32 = fpext half %dsdv to float
3344 %s32 = fpext half %s to float
3345 %clamp32 = fpext half %clamp to float
3346 %res = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f32.f32(i32 15, float %dsdh32, float %dsdv32, float %s32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3347 store <4 x float> %res, <4 x float> addrspace(1)* %out
3351 define amdgpu_kernel void @image_sample_a16_cd_cl_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) {
3352 ; CHECK-LABEL: @image_sample_a16_cd_cl_2d(
3353 ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f16(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3354 ; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
3355 ; CHECK-NEXT: ret void
3357 %dsdh32 = fpext half %dsdh to float
3358 %dtdh32 = fpext half %dtdh to float
3359 %dsdv32 = fpext half %dsdv to float
3360 %dtdv32 = fpext half %dtdv to float
3361 %s32 = fpext half %s to float
3362 %t32 = fpext half %t to float
3363 %clamp32 = fpext half %clamp to float
3364 %res = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f32.f32(i32 15, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s32, float %t32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3365 store <4 x float> %res, <4 x float> addrspace(1)* %out
3369 define amdgpu_kernel void @image_sample_a16_c_cd_cl_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s, half %clamp) {
3370 ; CHECK-LABEL: @image_sample_a16_c_cd_cl_1d(
3371 ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f16.f16(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DSDV:%.*]], half [[S:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3372 ; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
3373 ; CHECK-NEXT: ret void
3375 %dsdh32 = fpext half %dsdh to float
3376 %dsdv32 = fpext half %dsdv to float
3377 %s32 = fpext half %s to float
3378 %clamp32 = fpext half %clamp to float
3379 %res = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh32, float %dsdv32, float %s32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3380 store <4 x float> %res, <4 x float> addrspace(1)* %out
3384 define amdgpu_kernel void @image_sample_a16_c_cd_cl_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) {
3385 ; CHECK-LABEL: @image_sample_a16_c_cd_cl_2d(
3386 ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f16.f16(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3387 ; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
3388 ; CHECK-NEXT: ret void
3390 %dsdh32 = fpext half %dsdh to float
3391 %dtdh32 = fpext half %dtdh to float
3392 %dsdv32 = fpext half %dsdv to float
3393 %dtdv32 = fpext half %dtdv to float
3394 %s32 = fpext half %s to float
3395 %t32 = fpext half %t to float
3396 %clamp32 = fpext half %clamp to float
3397 %res = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s32, float %t32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3398 store <4 x float> %res, <4 x float> addrspace(1)* %out
3402 define amdgpu_kernel void @image_sample_a16_l_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %lod) {
3403 ; CHECK-LABEL: @image_sample_a16_l_1d(
3404 ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f16(i32 15, half [[S:%.*]], half [[LOD:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3405 ; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
3406 ; CHECK-NEXT: ret void
3408 %s32 = fpext half %s to float
3409 %lod32 = fpext half %lod to float
3410 %res = call <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f32(i32 15, float %s32, float %lod32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3411 store <4 x float> %res, <4 x float> addrspace(1)* %out
3415 define amdgpu_kernel void @image_sample_a16_l_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %lod) {
3416 ; CHECK-LABEL: @image_sample_a16_l_2d(
3417 ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f16(i32 15, half [[S:%.*]], half [[T:%.*]], half [[LOD:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3418 ; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
3419 ; CHECK-NEXT: ret void
3421 %s32 = fpext half %s to float
3422 %t32 = fpext half %t to float
3423 %lod32 = fpext half %lod to float
3424 %res = call <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32(i32 15, float %s32, float %t32, float %lod32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3425 store <4 x float> %res, <4 x float> addrspace(1)* %out
3429 define amdgpu_kernel void @image_sample_a16_c_l_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %lod) {
3430 ; CHECK-LABEL: @image_sample_a16_c_l_1d(
3431 ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f16(i32 15, float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[LOD:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3432 ; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
3433 ; CHECK-NEXT: ret void
3435 %s32 = fpext half %s to float
3436 %lod32 = fpext half %lod to float
3437 %res = call <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f32(i32 15, float %zcompare, float %s32, float %lod32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3438 store <4 x float> %res, <4 x float> addrspace(1)* %out
3442 define amdgpu_kernel void @image_sample_a16_c_l_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t, half %lod) {
3443 ; CHECK-LABEL: @image_sample_a16_c_l_2d(
3444 ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f16(i32 15, float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[T:%.*]], half [[LOD:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3445 ; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
3446 ; CHECK-NEXT: ret void
3448 %s32 = fpext half %s to float
3449 %t32 = fpext half %t to float
3450 %lod32 = fpext half %lod to float
3451 %res = call <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f32(i32 15, float %zcompare, float %s32, float %t32, float %lod32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3452 store <4 x float> %res, <4 x float> addrspace(1)* %out
3456 define amdgpu_kernel void @image_sample_a16_lz_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s) {
3457 ; CHECK-LABEL: @image_sample_a16_lz_1d(
3458 ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f16(i32 15, half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3459 ; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
3460 ; CHECK-NEXT: ret void
3462 %s32 = fpext half %s to float
3463 %res = call <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f32(i32 15, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3464 store <4 x float> %res, <4 x float> addrspace(1)* %out
3468 define amdgpu_kernel void @image_sample_a16_lz_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) {
3469 ; CHECK-LABEL: @image_sample_a16_lz_2d(
3470 ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f16(i32 15, half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3471 ; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
3472 ; CHECK-NEXT: ret void
3474 %s32 = fpext half %s to float
3475 %t32 = fpext half %t to float
3476 %res = call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32(i32 15, float %s32, float %t32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3477 store <4 x float> %res, <4 x float> addrspace(1)* %out
3481 define amdgpu_kernel void @image_sample_a16_c_lz_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s) {
3482 ; CHECK-LABEL: @image_sample_a16_c_lz_1d(
3483 ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f16(i32 15, float [[ZCOMPARE:%.*]], half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3484 ; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
3485 ; CHECK-NEXT: ret void
3487 %s32 = fpext half %s to float
3488 %res = call <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f32(i32 15, float %zcompare, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3489 store <4 x float> %res, <4 x float> addrspace(1)* %out
3493 define amdgpu_kernel void @image_sample_a16_c_lz_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t) {
3494 ; CHECK-LABEL: @image_sample_a16_c_lz_2d(
3495 ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f16(i32 15, float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3496 ; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
3497 ; CHECK-NEXT: ret void
3499 %s32 = fpext half %s to float
3500 %t32 = fpext half %t to float
3501 %res = call <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f32(i32 15, float %zcompare, float %s32, float %t32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3502 store <4 x float> %res, <4 x float> addrspace(1)* %out
3506 define amdgpu_kernel void @image_sample_a16_c_d_o_2darray_V1(float addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice) {
3507 ; CHECK-LABEL: @image_sample_a16_c_d_o_2darray_V1(
3508 ; CHECK-NEXT: [[TMP1:%.*]] = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f16.f16(i32 4, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], half [[SLICE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3509 ; CHECK-NEXT: store float [[TMP1]], float addrspace(1)* [[OUT:%.*]], align 4
3510 ; CHECK-NEXT: ret void
3512 %dsdh32 = fpext half %dsdh to float
3513 %dtdh32 = fpext half %dtdh to float
3514 %dsdv32 = fpext half %dsdv to float
3515 %dtdv32 = fpext half %dtdv to float
3516 %s32 = fpext half %s to float
3517 %t32 = fpext half %t to float
3518 %slice32 = fpext half %slice to float
3519 %res = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f32.f32(i32 4, i32 %offset, float %zcompare, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s32, float %t32, float %slice32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3520 store float %res, float addrspace(1)* %out
3524 define amdgpu_kernel void @image_sample_a16_c_d_o_2darray_V2(<2 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice) {
3525 ; CHECK-LABEL: @image_sample_a16_c_d_o_2darray_V2(
3526 ; CHECK-NEXT: [[TMP1:%.*]] = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f16(i32 6, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], half [[SLICE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3527 ; CHECK-NEXT: store <2 x float> [[TMP1]], <2 x float> addrspace(1)* [[OUT:%.*]], align 8
3528 ; CHECK-NEXT: ret void
3530 %dsdh32 = fpext half %dsdh to float
3531 %dtdh32 = fpext half %dtdh to float
3532 %dsdv32 = fpext half %dsdv to float
3533 %dtdv32 = fpext half %dtdv to float
3534 %s32 = fpext half %s to float
3535 %t32 = fpext half %t to float
3536 %slice32 = fpext half %slice to float
3537 %res = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f32(i32 6, i32 %offset, float %zcompare, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s32, float %t32, float %slice32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3538 store <2 x float> %res, <2 x float> addrspace(1)* %out
3542 ; --------------------------------------------------------------------
3543 ; llvm.amdgcn.image.sample g16
3544 ; --------------------------------------------------------------------
3546 define amdgpu_kernel void @image_sample_g16_d_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s) {
3547 ; CHECK-LABEL: @image_sample_g16_d_1d(
3548 ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f32(i32 15, half [[DSDH:%.*]], half [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3549 ; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
3550 ; CHECK-NEXT: ret void
3552 %dsdh32 = fpext half %dsdh to float
3553 %dsdv32 = fpext half %dsdv to float
3554 %res = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f32.f32(i32 15, float %dsdh32, float %dsdv32, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3555 store <4 x float> %res, <4 x float> addrspace(1)* %out
3559 define amdgpu_kernel void @image_sample_g16_d_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) {
3560 ; CHECK-LABEL: @image_sample_g16_d_2d(
3561 ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f32(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3562 ; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
3563 ; CHECK-NEXT: ret void
3565 %dsdh32 = fpext half %dsdh to float
3566 %dtdh32 = fpext half %dtdh to float
3567 %dsdv32 = fpext half %dsdv to float
3568 %dtdv32 = fpext half %dtdv to float
3569 %res = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32(i32 15, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3570 store <4 x float> %res, <4 x float> addrspace(1)* %out
3574 define amdgpu_kernel void @image_sample_g16_d_3d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, float %s, float %t, float %r) {
3575 ; CHECK-LABEL: @image_sample_g16_d_3d(
3576 ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f32(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DRDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[DRDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[R:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3577 ; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
3578 ; CHECK-NEXT: ret void
3580 %dsdh32 = fpext half %dsdh to float
3581 %dtdh32 = fpext half %dtdh to float
3582 %drdh32 = fpext half %drdh to float
3583 %dsdv32 = fpext half %dsdv to float
3584 %dtdv32 = fpext half %dtdv to float
3585 %drdv32 = fpext half %drdv to float
3586 %res = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f32.f32(i32 15, float %dsdh32, float %dtdh32, float %drdh32, float %dsdv32, float %dtdv32, float %drdv32, float %s, float %t, float %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3587 store <4 x float> %res, <4 x float> addrspace(1)* %out
3591 define amdgpu_kernel void @image_sample_g16_c_d_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s) {
3592 ; CHECK-LABEL: @image_sample_g16_c_d_1d(
3593 ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f16.f32(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3594 ; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
3595 ; CHECK-NEXT: ret void
3597 %dsdh32 = fpext half %dsdh to float
3598 %dsdv32 = fpext half %dsdv to float
3599 %res = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh32, float %dsdv32, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3600 store <4 x float> %res, <4 x float> addrspace(1)* %out
3604 define amdgpu_kernel void @image_sample_g16_c_d_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) {
3605 ; CHECK-LABEL: @image_sample_g16_c_d_2d(
3606 ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f16.f32(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3607 ; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
3608 ; CHECK-NEXT: ret void
3610 %dsdh32 = fpext half %dsdh to float
3611 %dtdh32 = fpext half %dtdh to float
3612 %dsdv32 = fpext half %dsdv to float
3613 %dtdv32 = fpext half %dtdv to float
3614 %res = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3615 store <4 x float> %res, <4 x float> addrspace(1)* %out
3619 define amdgpu_kernel void @image_sample_g16_d_cl_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s, float %clamp) {
3620 ; CHECK-LABEL: @image_sample_g16_d_cl_1d(
3621 ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f32(i32 15, half [[DSDH:%.*]], half [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3622 ; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
3623 ; CHECK-NEXT: ret void
3625 %dsdh32 = fpext half %dsdh to float
3626 %dsdv32 = fpext half %dsdv to float
3627 %res = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f32.f32(i32 15, float %dsdh32, float %dsdv32, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3628 store <4 x float> %res, <4 x float> addrspace(1)* %out
3632 define amdgpu_kernel void @image_sample_g16_d_cl_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) {
3633 ; CHECK-LABEL: @image_sample_g16_d_cl_2d(
3634 ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f32(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3635 ; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
3636 ; CHECK-NEXT: ret void
3638 %dsdh32 = fpext half %dsdh to float
3639 %dtdh32 = fpext half %dtdh to float
3640 %dsdv32 = fpext half %dsdv to float
3641 %dtdv32 = fpext half %dtdv to float
3642 %res = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f32.f32(i32 15, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3643 store <4 x float> %res, <4 x float> addrspace(1)* %out
3647 define amdgpu_kernel void @image_sample_g16_c_d_cl_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp) {
3648 ; CHECK-LABEL: @image_sample_g16_c_d_cl_1d(
3649 ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f16.f32(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3650 ; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
3651 ; CHECK-NEXT: ret void
3653 %dsdh32 = fpext half %dsdh to float
3654 %dsdv32 = fpext half %dsdv to float
3655 %res = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh32, float %dsdv32, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3656 store <4 x float> %res, <4 x float> addrspace(1)* %out
3660 define amdgpu_kernel void @image_sample_g16_c_d_cl_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) {
3661 ; CHECK-LABEL: @image_sample_g16_c_d_cl_2d(
3662 ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f16.f32(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3663 ; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
3664 ; CHECK-NEXT: ret void
3666 %dsdh32 = fpext half %dsdh to float
3667 %dtdh32 = fpext half %dtdh to float
3668 %dsdv32 = fpext half %dsdv to float
3669 %dtdv32 = fpext half %dtdv to float
3670 %res = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3671 store <4 x float> %res, <4 x float> addrspace(1)* %out
3675 define amdgpu_kernel void @image_sample_g16_cd_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s) {
3676 ; CHECK-LABEL: @image_sample_g16_cd_1d(
3677 ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f32(i32 15, half [[DSDH:%.*]], half [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3678 ; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
3679 ; CHECK-NEXT: ret void
3681 %dsdh32 = fpext half %dsdh to float
3682 %dsdv32 = fpext half %dsdv to float
3683 %res = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f32.f32(i32 15, float %dsdh32, float %dsdv32, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3684 store <4 x float> %res, <4 x float> addrspace(1)* %out
3688 define amdgpu_kernel void @image_sample_g16_cd_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) {
3689 ; CHECK-LABEL: @image_sample_g16_cd_2d(
3690 ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f32(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3691 ; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
3692 ; CHECK-NEXT: ret void
3694 %dsdh32 = fpext half %dsdh to float
3695 %dtdh32 = fpext half %dtdh to float
3696 %dsdv32 = fpext half %dsdv to float
3697 %dtdv32 = fpext half %dtdv to float
3698 %res = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f32.f32(i32 15, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3699 store <4 x float> %res, <4 x float> addrspace(1)* %out
3703 define amdgpu_kernel void @image_sample_g16_c_cd_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s) {
3704 ; CHECK-LABEL: @image_sample_g16_c_cd_1d(
3705 ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f16.f32(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3706 ; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
3707 ; CHECK-NEXT: ret void
3709 %dsdh32 = fpext half %dsdh to float
3710 %dsdv32 = fpext half %dsdv to float
3711 %res = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh32, float %dsdv32, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3712 store <4 x float> %res, <4 x float> addrspace(1)* %out
3716 define amdgpu_kernel void @image_sample_g16_c_cd_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) {
3717 ; CHECK-LABEL: @image_sample_g16_c_cd_2d(
3718 ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f16.f32(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3719 ; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
3720 ; CHECK-NEXT: ret void
3722 %dsdh32 = fpext half %dsdh to float
3723 %dtdh32 = fpext half %dtdh to float
3724 %dsdv32 = fpext half %dsdv to float
3725 %dtdv32 = fpext half %dtdv to float
3726 %res = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3727 store <4 x float> %res, <4 x float> addrspace(1)* %out
3731 define amdgpu_kernel void @image_sample_g16_cd_cl_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s, float %clamp) {
3732 ; CHECK-LABEL: @image_sample_g16_cd_cl_1d(
3733 ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f32(i32 15, half [[DSDH:%.*]], half [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3734 ; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
3735 ; CHECK-NEXT: ret void
3737 %dsdh32 = fpext half %dsdh to float
3738 %dsdv32 = fpext half %dsdv to float
3739 %res = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f32.f32(i32 15, float %dsdh32, float %dsdv32, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3740 store <4 x float> %res, <4 x float> addrspace(1)* %out
3744 define amdgpu_kernel void @image_sample_g16_cd_cl_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) {
3745 ; CHECK-LABEL: @image_sample_g16_cd_cl_2d(
3746 ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f32(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3747 ; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
3748 ; CHECK-NEXT: ret void
3750 %dsdh32 = fpext half %dsdh to float
3751 %dtdh32 = fpext half %dtdh to float
3752 %dsdv32 = fpext half %dsdv to float
3753 %dtdv32 = fpext half %dtdv to float
3754 %res = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f32.f32(i32 15, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3755 store <4 x float> %res, <4 x float> addrspace(1)* %out
3759 define amdgpu_kernel void @image_sample_g16_c_cd_cl_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp) {
3760 ; CHECK-LABEL: @image_sample_g16_c_cd_cl_1d(
3761 ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f16.f32(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3762 ; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
3763 ; CHECK-NEXT: ret void
3765 %dsdh32 = fpext half %dsdh to float
3766 %dsdv32 = fpext half %dsdv to float
3767 %res = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh32, float %dsdv32, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3768 store <4 x float> %res, <4 x float> addrspace(1)* %out
3772 define amdgpu_kernel void @image_sample_g16_c_cd_cl_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) {
3773 ; CHECK-LABEL: @image_sample_g16_c_cd_cl_2d(
3774 ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f16.f32(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3775 ; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
3776 ; CHECK-NEXT: ret void
3778 %dsdh32 = fpext half %dsdh to float
3779 %dtdh32 = fpext half %dtdh to float
3780 %dsdv32 = fpext half %dsdv to float
3781 %dtdv32 = fpext half %dtdv to float
3782 %res = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3783 store <4 x float> %res, <4 x float> addrspace(1)* %out
3787 define amdgpu_kernel void @image_sample_g16_c_d_o_2darray_V1(float addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice) {
3788 ; CHECK-LABEL: @image_sample_g16_c_d_o_2darray_V1(
3789 ; CHECK-NEXT: [[TMP1:%.*]] = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f16.f32(i32 4, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[SLICE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3790 ; CHECK-NEXT: store float [[TMP1]], float addrspace(1)* [[OUT:%.*]], align 4
3791 ; CHECK-NEXT: ret void
3793 %dsdh32 = fpext half %dsdh to float
3794 %dtdh32 = fpext half %dtdh to float
3795 %dsdv32 = fpext half %dsdv to float
3796 %dtdv32 = fpext half %dtdv to float
3797 %res = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f32.f32(i32 4, i32 %offset, float %zcompare, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3798 store float %res, float addrspace(1)* %out
3802 define amdgpu_kernel void @image_sample_g16_c_d_o_2darray_V2(<2 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice) {
3803 ; CHECK-LABEL: @image_sample_g16_c_d_o_2darray_V2(
3804 ; CHECK-NEXT: [[TMP1:%.*]] = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f32(i32 6, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[SLICE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3805 ; CHECK-NEXT: store <2 x float> [[TMP1]], <2 x float> addrspace(1)* [[OUT:%.*]], align 8
3806 ; CHECK-NEXT: ret void
3808 %dsdh32 = fpext half %dsdh to float
3809 %dtdh32 = fpext half %dtdh to float
3810 %dsdv32 = fpext half %dsdv to float
3811 %dtdv32 = fpext half %dtdv to float
3812 %res = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f32(i32 6, i32 %offset, float %zcompare, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3813 store <2 x float> %res, <2 x float> addrspace(1)* %out
3817 ; --------------------------------------------------------------------
3818 ; llvm.amdgcn.image.sample a16 preserve fast-math flags
3819 ; --------------------------------------------------------------------
3821 define amdgpu_kernel void @image_sample_a16_1d_nnan(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s) {
3822 ; CHECK-LABEL: @image_sample_a16_1d_nnan(
3823 ; CHECK-NEXT: [[TMP1:%.*]] = call nnan <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f16(i32 15, half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3824 ; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
3825 ; CHECK-NEXT: ret void
3827 %s32 = fpext half %s to float
3828 %res = call nnan <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3829 store <4 x float> %res, <4 x float> addrspace(1)* %out
3833 define amdgpu_kernel void @image_sample_a16_1d_nnan_ninf_nsz(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s) {
3834 ; CHECK-LABEL: @image_sample_a16_1d_nnan_ninf_nsz(
3835 ; CHECK-NEXT: [[TMP1:%.*]] = call nnan ninf nsz <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f16(i32 15, half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3836 ; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
3837 ; CHECK-NEXT: ret void
3839 %s32 = fpext half %s to float
3840 %res = call nnan ninf nsz <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3841 store <4 x float> %res, <4 x float> addrspace(1)* %out
3845 define amdgpu_kernel void @image_sample_a16_1d_fast(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s) {
3846 ; CHECK-LABEL: @image_sample_a16_1d_fast(
3847 ; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f16(i32 15, half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3848 ; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
3849 ; CHECK-NEXT: ret void
3851 %s32 = fpext half %s to float
3852 %res = call fast <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3853 store <4 x float> %res, <4 x float> addrspace(1)* %out
3857 define amdgpu_kernel void @image_sample_a16_2d_nnan(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) {
3858 ; CHECK-LABEL: @image_sample_a16_2d_nnan(
3859 ; CHECK-NEXT: [[TMP1:%.*]] = call nnan <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f16(i32 15, half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3860 ; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
3861 ; CHECK-NEXT: ret void
3863 %s32 = fpext half %s to float
3864 %t32 = fpext half %t to float
3865 %res = call nnan <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %s32, float %t32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3866 store <4 x float> %res, <4 x float> addrspace(1)* %out
3870 define amdgpu_kernel void @image_sample_a16_3d_nnan(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %r) {
3871 ; CHECK-LABEL: @image_sample_a16_3d_nnan(
3872 ; CHECK-NEXT: [[TMP1:%.*]] = call nnan <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f16(i32 15, half [[S:%.*]], half [[T:%.*]], half [[R:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3873 ; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
3874 ; CHECK-NEXT: ret void
3876 %s32 = fpext half %s to float
3877 %t32 = fpext half %t to float
3878 %r32 = fpext half %r to float
3879 %res = call nnan <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32 15, float %s32, float %t32, float %r32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3880 store <4 x float> %res, <4 x float> addrspace(1)* %out
3884 define amdgpu_kernel void @image_sample_a16_cube_nnan(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %face) {
3886 ; CHECK-LABEL: @image_sample_a16_cube_nnan(
3887 ; CHECK-NEXT: [[TMP1:%.*]] = call nnan <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f16(i32 15, half [[S:%.*]], half [[T:%.*]], half [[FACE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3888 ; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
3889 ; CHECK-NEXT: ret void
3891 %s32 = fpext half %s to float
3892 %t32 = fpext half %t to float
3893 %face32 = fpext half %face to float
3894 %res = call nnan <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f32(i32 15, float %s32, float %t32, float %face32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3895 store <4 x float> %res, <4 x float> addrspace(1)* %out
3899 define amdgpu_kernel void @image_sample_a16_1darray_nnan(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %slice) {
3900 ; CHECK-LABEL: @image_sample_a16_1darray_nnan(
3901 ; CHECK-NEXT: [[TMP1:%.*]] = call nnan <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f16(i32 15, half [[S:%.*]], half [[SLICE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3902 ; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
3903 ; CHECK-NEXT: ret void
3905 %s32 = fpext half %s to float
3906 %slice32 = fpext half %slice to float
3907 %res = call nnan <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32(i32 15, float %s32, float %slice32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3908 store <4 x float> %res, <4 x float> addrspace(1)* %out
3912 define amdgpu_kernel void @image_sample_a16_2darray_nnan(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %slice) {
3913 ; CHECK-LABEL: @image_sample_a16_2darray_nnan(
3914 ; CHECK-NEXT: [[TMP1:%.*]] = call nnan <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f16(i32 15, half [[S:%.*]], half [[T:%.*]], half [[SLICE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
3915 ; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
3916 ; CHECK-NEXT: ret void
3918 %s32 = fpext half %s to float
3919 %t32 = fpext half %t to float
3920 %slice32 = fpext half %slice to float
3921 %res = call nnan <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f32(i32 15, float %s32, float %t32, float %slice32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
3922 store <4 x float> %res, <4 x float> addrspace(1)* %out