llvm/test/CodeGen/AMDGPU/group-image-instructions.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
   2 ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11 %s
   3
   4 define amdgpu_ps void @group_image_sample(i32 inreg noundef %globalTable, i32 inreg noundef %userdata6, i32 inreg noundef %userdata7, i32 inreg noundef %userdata8, i32 inreg noundef %PrimMask, <2 x float> noundef %PerspInterpSample, <2 x float> noundef %PerspInterpCenter, <2 x float> noundef %PerspInterpCentroid) #2 {
   5 ; GFX11-LABEL: group_image_sample:
   6 ; GFX11:       ; %bb.0: ; %.entry
   7 ; GFX11-NEXT:    s_mov_b32 s33, exec_lo
   8 ; GFX11-NEXT:    s_wqm_b32 exec_lo, exec_lo
   9 ; GFX11-NEXT:    s_mov_b32 m0, s4
  10 ; GFX11-NEXT:    s_getpc_b64 s[4:5]
  11 ; GFX11-NEXT:    s_mov_b32 s0, s1
  12 ; GFX11-NEXT:    s_mov_b32 s6, s3
  13 ; GFX11-NEXT:    s_mov_b32 s1, s5
  14 ; GFX11-NEXT:    s_mov_b32 s3, s5
  15 ; GFX11-NEXT:    s_mov_b32 s7, s5
  16 ; GFX11-NEXT:    s_load_b128 s[12:15], s[0:1], 0x0
  17 ; GFX11-NEXT:    s_load_b128 s[8:11], s[2:3], 0x0
  18 ; GFX11-NEXT:    s_load_b256 s[0:7], s[6:7], 0x0
  19 ; GFX11-NEXT:    s_mov_b32 s16, exec_lo
  20 ; GFX11-NEXT:    s_wqm_b32 exec_lo, exec_lo
  21 ; GFX11-NEXT:    lds_param_load v2, attr0.y wait_vdst:15
  22 ; GFX11-NEXT:    lds_param_load v3, attr0.x wait_vdst:15
  23 ; GFX11-NEXT:    s_mov_b32 exec_lo, s16
  24 ; GFX11-NEXT:    v_interp_p10_f32 v4, v2, v0, v2 wait_exp:1
  25 ; GFX11-NEXT:    v_interp_p10_f32 v0, v3, v0, v3 wait_exp:0
  26 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
  27 ; GFX11-NEXT:    s_clause 0xf
  28 ; GFX11-NEXT:    s_buffer_load_b64 s[16:17], s[12:15], 0x10
  29 ; GFX11-NEXT:    s_buffer_load_b64 s[18:19], s[12:15], 0x20
  30 ; GFX11-NEXT:    s_buffer_load_b64 s[20:21], s[12:15], 0x30
  31 ; GFX11-NEXT:    s_buffer_load_b64 s[22:23], s[12:15], 0x40
  32 ; GFX11-NEXT:    s_buffer_load_b64 s[24:25], s[12:15], 0x50
  33 ; GFX11-NEXT:    s_buffer_load_b64 s[26:27], s[12:15], 0x60
  34 ; GFX11-NEXT:    s_buffer_load_b64 s[28:29], s[12:15], 0x70
  35 ; GFX11-NEXT:    s_buffer_load_b64 s[30:31], s[12:15], 0x80
  36 ; GFX11-NEXT:    s_buffer_load_b64 s[34:35], s[12:15], 0x90
  37 ; GFX11-NEXT:    s_buffer_load_b64 s[36:37], s[12:15], 0xa0
  38 ; GFX11-NEXT:    s_buffer_load_b64 s[38:39], s[12:15], 0xb0
  39 ; GFX11-NEXT:    s_buffer_load_b64 s[40:41], s[12:15], 0xc0
  40 ; GFX11-NEXT:    s_buffer_load_b64 s[42:43], s[12:15], 0xd0
  41 ; GFX11-NEXT:    s_buffer_load_b64 s[44:45], s[12:15], 0xe0
  42 ; GFX11-NEXT:    s_buffer_load_b64 s[46:47], s[12:15], 0xf0
  43 ; GFX11-NEXT:    s_buffer_load_b64 s[12:13], s[12:15], 0x100
  44 ; GFX11-NEXT:    v_interp_p2_f32 v36, v2, v1, v4 wait_exp:7
  45 ; GFX11-NEXT:    v_interp_p2_f32 v0, v3, v1, v0 wait_exp:7
  46 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
  47 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
  48 ; GFX11-NEXT:    v_add_f32_e32 v5, s17, v36
  49 ; GFX11-NEXT:    v_add_f32_e32 v4, s16, v0
  50 ; GFX11-NEXT:    v_add_f32_e32 v8, s18, v0
  51 ; GFX11-NEXT:    v_add_f32_e32 v9, s19, v36
  52 ; GFX11-NEXT:    v_add_f32_e32 v12, s20, v0
  53 ; GFX11-NEXT:    v_add_f32_e32 v13, s21, v36
  54 ; GFX11-NEXT:    v_add_f32_e32 v16, s22, v0
  55 ; GFX11-NEXT:    v_add_f32_e32 v17, s23, v36
  56 ; GFX11-NEXT:    v_add_f32_e32 v20, s24, v0
  57 ; GFX11-NEXT:    v_add_f32_e32 v21, s25, v36
  58 ; GFX11-NEXT:    v_add_f32_e32 v24, s26, v0
  59 ; GFX11-NEXT:    v_add_f32_e32 v25, s27, v36
  60 ; GFX11-NEXT:    v_add_f32_e32 v28, s28, v0
  61 ; GFX11-NEXT:    v_add_f32_e32 v29, s29, v36
  62 ; GFX11-NEXT:    v_add_f32_e32 v32, s30, v0
  63 ; GFX11-NEXT:    v_add_f32_e32 v33, s31, v36
  64 ; GFX11-NEXT:    s_clause 0x7
  65 ; GFX11-NEXT:    image_sample v[4:7], v[4:5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
  66 ; GFX11-NEXT:    image_sample v[8:11], v[8:9], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
  67 ; GFX11-NEXT:    image_sample v[12:15], v[12:13], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
  68 ; GFX11-NEXT:    image_sample v[16:19], v[16:17], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
  69 ; GFX11-NEXT:    image_sample v[20:23], v[20:21], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
  70 ; GFX11-NEXT:    image_sample v[24:27], v[24:25], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
  71 ; GFX11-NEXT:    image_sample v[28:31], v[28:29], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
  72 ; GFX11-NEXT:    image_sample v[32:35], v[32:33], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
  73 ; GFX11-NEXT:    v_add_f32_e32 v37, s34, v0
  74 ; GFX11-NEXT:    v_add_f32_e32 v38, s35, v36
  75 ; GFX11-NEXT:    v_add_f32_e32 v40, s36, v0
  76 ; GFX11-NEXT:    v_add_f32_e32 v41, s37, v36
  77 ; GFX11-NEXT:    v_add_f32_e32 v44, s38, v0
  78 ; GFX11-NEXT:    v_add_f32_e32 v45, s39, v36
  79 ; GFX11-NEXT:    v_add_f32_e32 v48, s40, v0
  80 ; GFX11-NEXT:    v_add_f32_e32 v49, s41, v36
  81 ; GFX11-NEXT:    v_add_f32_e32 v52, s42, v0
  82 ; GFX11-NEXT:    v_add_f32_e32 v53, s43, v36
  83 ; GFX11-NEXT:    v_add_f32_e32 v56, s44, v0
  84 ; GFX11-NEXT:    v_add_f32_e32 v57, s45, v36
  85 ; GFX11-NEXT:    v_add_f32_e32 v60, s46, v0
  86 ; GFX11-NEXT:    v_add_f32_e32 v61, s47, v36
  87 ; GFX11-NEXT:    v_add_f32_e32 v0, s12, v0
  88 ; GFX11-NEXT:    v_add_f32_e32 v1, s13, v36
  89 ; GFX11-NEXT:    s_and_b32 exec_lo, exec_lo, s33
  90 ; GFX11-NEXT:    s_clause 0x7
  91 ; GFX11-NEXT:    image_sample v[36:39], v[37:38], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
  92 ; GFX11-NEXT:    image_sample v[40:43], v[40:41], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
  93 ; GFX11-NEXT:    image_sample v[44:47], v[44:45], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
  94 ; GFX11-NEXT:    image_sample v[48:51], v[48:49], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
  95 ; GFX11-NEXT:    image_sample v[52:55], v[52:53], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
  96 ; GFX11-NEXT:    image_sample v[56:59], v[56:57], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
  97 ; GFX11-NEXT:    image_sample v[60:63], v[60:61], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
  98 ; GFX11-NEXT:    image_sample v[64:67], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
  99 ; GFX11-NEXT:    s_waitcnt vmcnt(14)
 100 ; GFX11-NEXT:    v_dual_add_f32 v0, v8, v4 :: v_dual_add_f32 v1, v9, v5
 101 ; GFX11-NEXT:    v_dual_add_f32 v4, v10, v6 :: v_dual_add_f32 v5, v11, v7
 102 ; GFX11-NEXT:    s_waitcnt vmcnt(13)
 103 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
 104 ; GFX11-NEXT:    v_dual_add_f32 v0, v12, v0 :: v_dual_add_f32 v1, v13, v1
 105 ; GFX11-NEXT:    v_dual_add_f32 v4, v14, v4 :: v_dual_add_f32 v5, v15, v5
 106 ; GFX11-NEXT:    s_waitcnt vmcnt(12)
 107 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
 108 ; GFX11-NEXT:    v_dual_add_f32 v0, v16, v0 :: v_dual_add_f32 v1, v17, v1
 109 ; GFX11-NEXT:    v_dual_add_f32 v4, v18, v4 :: v_dual_add_f32 v5, v19, v5
 110 ; GFX11-NEXT:    s_waitcnt vmcnt(11)
 111 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
 112 ; GFX11-NEXT:    v_dual_add_f32 v0, v20, v0 :: v_dual_add_f32 v1, v21, v1
 113 ; GFX11-NEXT:    v_dual_add_f32 v4, v22, v4 :: v_dual_add_f32 v5, v23, v5
 114 ; GFX11-NEXT:    s_waitcnt vmcnt(10)
 115 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
 116 ; GFX11-NEXT:    v_dual_add_f32 v0, v24, v0 :: v_dual_add_f32 v1, v25, v1
 117 ; GFX11-NEXT:    v_dual_add_f32 v4, v26, v4 :: v_dual_add_f32 v5, v27, v5
 118 ; GFX11-NEXT:    s_waitcnt vmcnt(9)
 119 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
 120 ; GFX11-NEXT:    v_dual_add_f32 v0, v28, v0 :: v_dual_add_f32 v1, v29, v1
 121 ; GFX11-NEXT:    v_dual_add_f32 v4, v30, v4 :: v_dual_add_f32 v5, v31, v5
 122 ; GFX11-NEXT:    s_waitcnt vmcnt(8)
 123 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
 124 ; GFX11-NEXT:    v_dual_add_f32 v0, v32, v0 :: v_dual_add_f32 v1, v33, v1
 125 ; GFX11-NEXT:    v_dual_add_f32 v4, v34, v4 :: v_dual_add_f32 v5, v35, v5
 126 ; GFX11-NEXT:    s_waitcnt vmcnt(7)
 127 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
 128 ; GFX11-NEXT:    v_dual_add_f32 v0, v36, v0 :: v_dual_add_f32 v1, v37, v1
 129 ; GFX11-NEXT:    v_dual_add_f32 v4, v38, v4 :: v_dual_add_f32 v5, v39, v5
 130 ; GFX11-NEXT:    s_waitcnt vmcnt(6)
 131 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
 132 ; GFX11-NEXT:    v_dual_add_f32 v0, v40, v0 :: v_dual_add_f32 v1, v41, v1
 133 ; GFX11-NEXT:    v_dual_add_f32 v4, v42, v4 :: v_dual_add_f32 v5, v43, v5
 134 ; GFX11-NEXT:    s_waitcnt vmcnt(5)
 135 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
 136 ; GFX11-NEXT:    v_dual_add_f32 v0, v44, v0 :: v_dual_add_f32 v1, v45, v1
 137 ; GFX11-NEXT:    v_dual_add_f32 v4, v46, v4 :: v_dual_add_f32 v5, v47, v5
 138 ; GFX11-NEXT:    s_waitcnt vmcnt(4)
 139 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
 140 ; GFX11-NEXT:    v_dual_add_f32 v0, v48, v0 :: v_dual_add_f32 v1, v49, v1
 141 ; GFX11-NEXT:    v_dual_add_f32 v4, v50, v4 :: v_dual_add_f32 v5, v51, v5
 142 ; GFX11-NEXT:    s_waitcnt vmcnt(3)
 143 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
 144 ; GFX11-NEXT:    v_dual_add_f32 v0, v52, v0 :: v_dual_add_f32 v1, v53, v1
 145 ; GFX11-NEXT:    v_dual_add_f32 v4, v54, v4 :: v_dual_add_f32 v5, v55, v5
 146 ; GFX11-NEXT:    s_waitcnt vmcnt(2)
 147 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
 148 ; GFX11-NEXT:    v_dual_add_f32 v0, v56, v0 :: v_dual_add_f32 v1, v57, v1
 149 ; GFX11-NEXT:    v_dual_add_f32 v4, v58, v4 :: v_dual_add_f32 v5, v59, v5
 150 ; GFX11-NEXT:    s_waitcnt vmcnt(1)
 151 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
 152 ; GFX11-NEXT:    v_dual_add_f32 v0, v60, v0 :: v_dual_add_f32 v1, v61, v1
 153 ; GFX11-NEXT:    v_dual_add_f32 v4, v62, v4 :: v_dual_add_f32 v5, v63, v5
 154 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 155 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
 156 ; GFX11-NEXT:    v_dual_add_f32 v0, v64, v0 :: v_dual_add_f32 v1, v65, v1
 157 ; GFX11-NEXT:    v_dual_add_f32 v4, v66, v4 :: v_dual_add_f32 v5, v67, v5
 158 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
 159 ; GFX11-NEXT:    v_cvt_pk_rtz_f16_f32_e32 v0, v0, v1
 160 ; GFX11-NEXT:    v_cvt_pk_rtz_f16_f32_e32 v1, v4, v5
 161 ; GFX11-NEXT:    exp mrt0 v0, v1, off, off done
 162 ; GFX11-NEXT:    s_endpgm
 163 .entry:
 164   %i = call i64 @llvm.amdgcn.s.getpc()
 165   %i1 = and i64 %i, -4294967296
 166   %i2 = zext i32 %userdata6 to i64
 167   %i3 = or disjoint i64 %i1, %i2
 168   %i4 = inttoptr i64 %i3 to ptr addrspace(4)
 169   %i5 = load <4 x i32>, ptr addrspace(4) %i4, align 16
 170   %i6 = zext i32 %userdata7 to i64
 171   %i7 = or disjoint i64 %i1, %i6
 172   %i8 = inttoptr i64 %i7 to ptr addrspace(4)
 173   %i9 = load <4 x i32>, ptr addrspace(4) %i8, align 4, !invariant.load !0
 174   %i10 = zext i32 %userdata8 to i64
 175   %i11 = or disjoint i64 %i1, %i10
 176   %i12 = inttoptr i64 %i11 to ptr addrspace(4)
 177   %i13 = load <8 x i32>, ptr addrspace(4) %i12, align 4, !invariant.load !0
 178   %i14 = call float @llvm.amdgcn.lds.param.load(i32 1, i32 0, i32 %PrimMask)
 179   %PerspInterpCenter.i1 = extractelement <2 x float> %PerspInterpCenter, i64 1
 180   %PerspInterpCenter.i0 = extractelement <2 x float> %PerspInterpCenter, i64 0
 181   %i15 = call float @llvm.amdgcn.interp.inreg.p10(float %i14, float %PerspInterpCenter.i0, float %i14)
 182   %i16 = call float @llvm.amdgcn.interp.inreg.p2(float %i14, float %PerspInterpCenter.i1, float %i15)
 183   %i17 = call float @llvm.amdgcn.lds.param.load(i32 0, i32 0, i32 %PrimMask)
 184   %i18 = call float @llvm.amdgcn.interp.inreg.p10(float %i17, float %PerspInterpCenter.i0, float %i17)
 185   %i19 = call float @llvm.amdgcn.interp.inreg.p2(float %i17, float %PerspInterpCenter.i1, float %i18)
 186   %i20 = call <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32> %i5, i32 16, i32 0), !invariant.load !0
 187   %i21 = shufflevector <2 x i32> %i20, <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
 188   %i22 = bitcast <4 x i32> %i21 to <4 x float>
 189   %.i0 = extractelement <4 x float> %i22, i64 0
 190   %.i1 = extractelement <4 x float> %i22, i64 1
 191   %.i03 = fadd reassoc nnan nsz arcp contract afn float %.i0, %i19
 192   %.i14 = fadd reassoc nnan nsz arcp contract afn float %.i1, %i16
 193   %i23 = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32.v8i32.v4i32(i32 15, float %.i03, float %.i14, <8 x i32> %i13, <4 x i32> %i9, i1 false, i32 0, i32 0)
 194   %.i010 = extractelement <4 x float> %i23, i64 0
 195   %.i113 = extractelement <4 x float> %i23, i64 1
 196   %.i215 = extractelement <4 x float> %i23, i64 2
 197   %.i317 = extractelement <4 x float> %i23, i64 3
 198   %i24 = call <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32> %i5, i32 32, i32 0), !invariant.load !0
 199   %i25 = shufflevector <2 x i32> %i24, <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
 200   %i26 = bitcast <4 x i32> %i25 to <4 x float>
 201   %.i05 = extractelement <4 x float> %i26, i64 0
 202   %.i16 = extractelement <4 x float> %i26, i64 1
 203   %.i07 = fadd reassoc nnan nsz arcp contract afn float %.i05, %i19
 204   %.i18 = fadd reassoc nnan nsz arcp contract afn float %.i16, %i16
 205   %i27 = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32.v8i32.v4i32(i32 15, float %.i07, float %.i18, <8 x i32> %i13, <4 x i32> %i9, i1 false, i32 0, i32 0)
 206   %.i09 = extractelement <4 x float> %i27, i64 0
 207   %.i011 = fadd reassoc nnan nsz arcp contract afn float %.i09, %.i010
 208   %.i112 = extractelement <4 x float> %i27, i64 1
 209   %.i114 = fadd reassoc nnan nsz arcp contract afn float %.i112, %.i113
 210   %.i2 = extractelement <4 x float> %i27, i64 2
 211   %.i216 = fadd reassoc nnan nsz arcp contract afn float %.i2, %.i215
 212   %.i3 = extractelement <4 x float> %i27, i64 3
 213   %.i318 = fadd reassoc nnan nsz arcp contract afn float %.i3, %.i317
 214   %i28 = call <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32> %i5, i32 48, i32 0), !invariant.load !0
 215   %i29 = shufflevector <2 x i32> %i28, <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
 216   %i30 = bitcast <4 x i32> %i29 to <4 x float>
 217   %.i019 = extractelement <4 x float> %i30, i64 0
 218   %.i120 = extractelement <4 x float> %i30, i64 1
 219   %.i021 = fadd reassoc nnan nsz arcp contract afn float %.i019, %i19
 220   %.i122 = fadd reassoc nnan nsz arcp contract afn float %.i120, %i16
 221   %i31 = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32.v8i32.v4i32(i32 15, float %.i021, float %.i122, <8 x i32> %i13, <4 x i32> %i9, i1 false, i32 0, i32 0)
 222   %.i023 = extractelement <4 x float> %i31, i64 0
 223   %.i024 = fadd reassoc nnan nsz arcp contract afn float %.i023, %.i011
 224   %.i125 = extractelement <4 x float> %i31, i64 1
 225   %.i126 = fadd reassoc nnan nsz arcp contract afn float %.i125, %.i114
 226   %.i227 = extractelement <4 x float> %i31, i64 2
 227   %.i228 = fadd reassoc nnan nsz arcp contract afn float %.i227, %.i216
 228   %.i329 = extractelement <4 x float> %i31, i64 3
 229   %.i330 = fadd reassoc nnan nsz arcp contract afn float %.i329, %.i318
 230   %i32 = call <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32> %i5, i32 64, i32 0), !invariant.load !0
 231   %i33 = shufflevector <2 x i32> %i32, <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
 232   %i34 = bitcast <4 x i32> %i33 to <4 x float>
 233   %.i031 = extractelement <4 x float> %i34, i64 0
 234   %.i132 = extractelement <4 x float> %i34, i64 1
 235   %.i033 = fadd reassoc nnan nsz arcp contract afn float %.i031, %i19
 236   %.i134 = fadd reassoc nnan nsz arcp contract afn float %.i132, %i16
 237   %i35 = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32.v8i32.v4i32(i32 15, float %.i033, float %.i134, <8 x i32> %i13, <4 x i32> %i9, i1 false, i32 0, i32 0)
 238   %.i035 = extractelement <4 x float> %i35, i64 0
 239   %.i036 = fadd reassoc nnan nsz arcp contract afn float %.i035, %.i024
 240   %.i137 = extractelement <4 x float> %i35, i64 1
 241   %.i138 = fadd reassoc nnan nsz arcp contract afn float %.i137, %.i126
 242   %.i239 = extractelement <4 x float> %i35, i64 2
 243   %.i240 = fadd reassoc nnan nsz arcp contract afn float %.i239, %.i228
 244   %.i341 = extractelement <4 x float> %i35, i64 3
 245   %.i342 = fadd reassoc nnan nsz arcp contract afn float %.i341, %.i330
 246   %i36 = call <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32> %i5, i32 80, i32 0), !invariant.load !0
 247   %i37 = shufflevector <2 x i32> %i36, <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
 248   %i38 = bitcast <4 x i32> %i37 to <4 x float>
 249   %.i043 = extractelement <4 x float> %i38, i64 0
 250   %.i144 = extractelement <4 x float> %i38, i64 1
 251   %.i045 = fadd reassoc nnan nsz arcp contract afn float %.i043, %i19
 252   %.i146 = fadd reassoc nnan nsz arcp contract afn float %.i144, %i16
 253   %i39 = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32.v8i32.v4i32(i32 15, float %.i045, float %.i146, <8 x i32> %i13, <4 x i32> %i9, i1 false, i32 0, i32 0)
 254   %.i047 = extractelement <4 x float> %i39, i64 0
 255   %.i048 = fadd reassoc nnan nsz arcp contract afn float %.i047, %.i036
 256   %.i149 = extractelement <4 x float> %i39, i64 1
 257   %.i150 = fadd reassoc nnan nsz arcp contract afn float %.i149, %.i138
 258   %.i251 = extractelement <4 x float> %i39, i64 2
 259   %.i252 = fadd reassoc nnan nsz arcp contract afn float %.i251, %.i240
 260   %.i353 = extractelement <4 x float> %i39, i64 3
 261   %.i354 = fadd reassoc nnan nsz arcp contract afn float %.i353, %.i342
 262   %i40 = call <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32> %i5, i32 96, i32 0), !invariant.load !0
 263   %i41 = shufflevector <2 x i32> %i40, <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
 264   %i42 = bitcast <4 x i32> %i41 to <4 x float>
 265   %.i055 = extractelement <4 x float> %i42, i64 0
 266   %.i156 = extractelement <4 x float> %i42, i64 1
 267   %.i057 = fadd reassoc nnan nsz arcp contract afn float %.i055, %i19
 268   %.i158 = fadd reassoc nnan nsz arcp contract afn float %.i156, %i16
 269   %i43 = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32.v8i32.v4i32(i32 15, float %.i057, float %.i158, <8 x i32> %i13, <4 x i32> %i9, i1 false, i32 0, i32 0)
 270   %.i059 = extractelement <4 x float> %i43, i64 0
 271   %.i060 = fadd reassoc nnan nsz arcp contract afn float %.i059, %.i048
 272   %.i161 = extractelement <4 x float> %i43, i64 1
 273   %.i162 = fadd reassoc nnan nsz arcp contract afn float %.i161, %.i150
 274   %.i263 = extractelement <4 x float> %i43, i64 2
 275   %.i264 = fadd reassoc nnan nsz arcp contract afn float %.i263, %.i252
 276   %.i365 = extractelement <4 x float> %i43, i64 3
 277   %.i366 = fadd reassoc nnan nsz arcp contract afn float %.i365, %.i354
 278   %i44 = call <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32> %i5, i32 112, i32 0), !invariant.load !0
 279   %i45 = shufflevector <2 x i32> %i44, <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
 280   %i46 = bitcast <4 x i32> %i45 to <4 x float>
 281   %.i067 = extractelement <4 x float> %i46, i64 0
 282   %.i168 = extractelement <4 x float> %i46, i64 1
 283   %.i069 = fadd reassoc nnan nsz arcp contract afn float %.i067, %i19
 284   %.i170 = fadd reassoc nnan nsz arcp contract afn float %.i168, %i16
 285   %i47 = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32.v8i32.v4i32(i32 15, float %.i069, float %.i170, <8 x i32> %i13, <4 x i32> %i9, i1 false, i32 0, i32 0)
 286   %.i071 = extractelement <4 x float> %i47, i64 0
 287   %.i072 = fadd reassoc nnan nsz arcp contract afn float %.i071, %.i060
 288   %.i173 = extractelement <4 x float> %i47, i64 1
 289   %.i174 = fadd reassoc nnan nsz arcp contract afn float %.i173, %.i162
 290   %.i275 = extractelement <4 x float> %i47, i64 2
 291   %.i276 = fadd reassoc nnan nsz arcp contract afn float %.i275, %.i264
 292   %.i377 = extractelement <4 x float> %i47, i64 3
 293   %.i378 = fadd reassoc nnan nsz arcp contract afn float %.i377, %.i366
 294   %i48 = call <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32> %i5, i32 128, i32 0), !invariant.load !0
 295   %i49 = shufflevector <2 x i32> %i48, <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
 296   %i50 = bitcast <4 x i32> %i49 to <4 x float>
 297   %.i079 = extractelement <4 x float> %i50, i64 0
 298   %.i180 = extractelement <4 x float> %i50, i64 1
 299   %.i081 = fadd reassoc nnan nsz arcp contract afn float %.i079, %i19
 300   %.i182 = fadd reassoc nnan nsz arcp contract afn float %.i180, %i16
 301   %i51 = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32.v8i32.v4i32(i32 15, float %.i081, float %.i182, <8 x i32> %i13, <4 x i32> %i9, i1 false, i32 0, i32 0)
 302   %.i083 = extractelement <4 x float> %i51, i64 0
 303   %.i084 = fadd reassoc nnan nsz arcp contract afn float %.i083, %.i072
 304   %.i185 = extractelement <4 x float> %i51, i64 1
 305   %.i186 = fadd reassoc nnan nsz arcp contract afn float %.i185, %.i174
 306   %.i287 = extractelement <4 x float> %i51, i64 2
 307   %.i288 = fadd reassoc nnan nsz arcp contract afn float %.i287, %.i276
 308   %.i389 = extractelement <4 x float> %i51, i64 3
 309   %.i390 = fadd reassoc nnan nsz arcp contract afn float %.i389, %.i378
 310   %i52 = call <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32> %i5, i32 144, i32 0), !invariant.load !0
 311   %i53 = shufflevector <2 x i32> %i52, <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
 312   %i54 = bitcast <4 x i32> %i53 to <4 x float>
 313   %.i091 = extractelement <4 x float> %i54, i64 0
 314   %.i192 = extractelement <4 x float> %i54, i64 1
 315   %.i093 = fadd reassoc nnan nsz arcp contract afn float %.i091, %i19
 316   %.i194 = fadd reassoc nnan nsz arcp contract afn float %.i192, %i16
 317   %i55 = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32.v8i32.v4i32(i32 15, float %.i093, float %.i194, <8 x i32> %i13, <4 x i32> %i9, i1 false, i32 0, i32 0)
 318   %.i095 = extractelement <4 x float> %i55, i64 0
 319   %.i096 = fadd reassoc nnan nsz arcp contract afn float %.i095, %.i084
 320   %.i197 = extractelement <4 x float> %i55, i64 1
 321   %.i198 = fadd reassoc nnan nsz arcp contract afn float %.i197, %.i186
 322   %.i299 = extractelement <4 x float> %i55, i64 2
 323   %.i2100 = fadd reassoc nnan nsz arcp contract afn float %.i299, %.i288
 324   %.i3101 = extractelement <4 x float> %i55, i64 3
 325   %.i3102 = fadd reassoc nnan nsz arcp contract afn float %.i3101, %.i390
 326   %i56 = call <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32> %i5, i32 160, i32 0), !invariant.load !0
 327   %i57 = shufflevector <2 x i32> %i56, <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
 328   %i58 = bitcast <4 x i32> %i57 to <4 x float>
 329   %.i0103 = extractelement <4 x float> %i58, i64 0
 330   %.i1104 = extractelement <4 x float> %i58, i64 1
 331   %.i0105 = fadd reassoc nnan nsz arcp contract afn float %.i0103, %i19
 332   %.i1106 = fadd reassoc nnan nsz arcp contract afn float %.i1104, %i16
 333   %i59 = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32.v8i32.v4i32(i32 15, float %.i0105, float %.i1106, <8 x i32> %i13, <4 x i32> %i9, i1 false, i32 0, i32 0)
 334   %.i0107 = extractelement <4 x float> %i59, i64 0
 335   %.i0108 = fadd reassoc nnan nsz arcp contract afn float %.i0107, %.i096
 336   %.i1109 = extractelement <4 x float> %i59, i64 1
 337   %.i1110 = fadd reassoc nnan nsz arcp contract afn float %.i1109, %.i198
 338   %.i2111 = extractelement <4 x float> %i59, i64 2
 339   %.i2112 = fadd reassoc nnan nsz arcp contract afn float %.i2111, %.i2100
 340   %.i3113 = extractelement <4 x float> %i59, i64 3
 341   %.i3114 = fadd reassoc nnan nsz arcp contract afn float %.i3113, %.i3102
 342   %i60 = call <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32> %i5, i32 176, i32 0), !invariant.load !0
 343   %i61 = shufflevector <2 x i32> %i60, <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
 344   %i62 = bitcast <4 x i32> %i61 to <4 x float>
 345   %.i0115 = extractelement <4 x float> %i62, i64 0
 346   %.i1116 = extractelement <4 x float> %i62, i64 1
 347   %.i0117 = fadd reassoc nnan nsz arcp contract afn float %.i0115, %i19
 348   %.i1118 = fadd reassoc nnan nsz arcp contract afn float %.i1116, %i16
 349   %i63 = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32.v8i32.v4i32(i32 15, float %.i0117, float %.i1118, <8 x i32> %i13, <4 x i32> %i9, i1 false, i32 0, i32 0)
 350   %.i0119 = extractelement <4 x float> %i63, i64 0
 351   %.i0120 = fadd reassoc nnan nsz arcp contract afn float %.i0119, %.i0108
 352   %.i1121 = extractelement <4 x float> %i63, i64 1
 353   %.i1122 = fadd reassoc nnan nsz arcp contract afn float %.i1121, %.i1110
 354   %.i2123 = extractelement <4 x float> %i63, i64 2
 355   %.i2124 = fadd reassoc nnan nsz arcp contract afn float %.i2123, %.i2112
 356   %.i3125 = extractelement <4 x float> %i63, i64 3
 357   %.i3126 = fadd reassoc nnan nsz arcp contract afn float %.i3125, %.i3114
 358   %i64 = call <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32> %i5, i32 192, i32 0), !invariant.load !0
 359   %i65 = shufflevector <2 x i32> %i64, <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
 360   %i66 = bitcast <4 x i32> %i65 to <4 x float>
 361   %.i0127 = extractelement <4 x float> %i66, i64 0
 362   %.i1128 = extractelement <4 x float> %i66, i64 1
 363   %.i0129 = fadd reassoc nnan nsz arcp contract afn float %.i0127, %i19
 364   %.i1130 = fadd reassoc nnan nsz arcp contract afn float %.i1128, %i16
 365   %i67 = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32.v8i32.v4i32(i32 15, float %.i0129, float %.i1130, <8 x i32> %i13, <4 x i32> %i9, i1 false, i32 0, i32 0)
 366   %.i0131 = extractelement <4 x float> %i67, i64 0
 367   %.i0132 = fadd reassoc nnan nsz arcp contract afn float %.i0131, %.i0120
 368   %.i1133 = extractelement <4 x float> %i67, i64 1
 369   %.i1134 = fadd reassoc nnan nsz arcp contract afn float %.i1133, %.i1122
 370   %.i2135 = extractelement <4 x float> %i67, i64 2
 371   %.i2136 = fadd reassoc nnan nsz arcp contract afn float %.i2135, %.i2124
 372   %.i3137 = extractelement <4 x float> %i67, i64 3
 373   %.i3138 = fadd reassoc nnan nsz arcp contract afn float %.i3137, %.i3126
 374   %i68 = call <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32> %i5, i32 208, i32 0), !invariant.load !0
 375   %i69 = shufflevector <2 x i32> %i68, <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
 376   %i70 = bitcast <4 x i32> %i69 to <4 x float>
 377   %.i0139 = extractelement <4 x float> %i70, i64 0
 378   %.i1140 = extractelement <4 x float> %i70, i64 1
 379   %.i0141 = fadd reassoc nnan nsz arcp contract afn float %.i0139, %i19
 380   %.i1142 = fadd reassoc nnan nsz arcp contract afn float %.i1140, %i16
 381   %i71 = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32.v8i32.v4i32(i32 15, float %.i0141, float %.i1142, <8 x i32> %i13, <4 x i32> %i9, i1 false, i32 0, i32 0)
 382   %.i0143 = extractelement <4 x float> %i71, i64 0
 383   %.i0144 = fadd reassoc nnan nsz arcp contract afn float %.i0143, %.i0132
 384   %.i1145 = extractelement <4 x float> %i71, i64 1
 385   %.i1146 = fadd reassoc nnan nsz arcp contract afn float %.i1145, %.i1134
 386   %.i2147 = extractelement <4 x float> %i71, i64 2
 387   %.i2148 = fadd reassoc nnan nsz arcp contract afn float %.i2147, %.i2136
 388   %.i3149 = extractelement <4 x float> %i71, i64 3
 389   %.i3150 = fadd reassoc nnan nsz arcp contract afn float %.i3149, %.i3138
 390   %i72 = call <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32> %i5, i32 224, i32 0), !invariant.load !0
 391   %i73 = shufflevector <2 x i32> %i72, <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
 392   %i74 = bitcast <4 x i32> %i73 to <4 x float>
 393   %.i0151 = extractelement <4 x float> %i74, i64 0
 394   %.i1152 = extractelement <4 x float> %i74, i64 1
 395   %.i0153 = fadd reassoc nnan nsz arcp contract afn float %.i0151, %i19
 396   %.i1154 = fadd reassoc nnan nsz arcp contract afn float %.i1152, %i16
 397   %i75 = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32.v8i32.v4i32(i32 15, float %.i0153, float %.i1154, <8 x i32> %i13, <4 x i32> %i9, i1 false, i32 0, i32 0)
 398   %.i0155 = extractelement <4 x float> %i75, i64 0
 399   %.i0156 = fadd reassoc nnan nsz arcp contract afn float %.i0155, %.i0144
 400   %.i1157 = extractelement <4 x float> %i75, i64 1
 401   %.i1158 = fadd reassoc nnan nsz arcp contract afn float %.i1157, %.i1146
 402   %.i2159 = extractelement <4 x float> %i75, i64 2
 403   %.i2160 = fadd reassoc nnan nsz arcp contract afn float %.i2159, %.i2148
 404   %.i3161 = extractelement <4 x float> %i75, i64 3
 405   %.i3162 = fadd reassoc nnan nsz arcp contract afn float %.i3161, %.i3150
 406   %i76 = call <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32> %i5, i32 240, i32 0), !invariant.load !0
 407   %i77 = shufflevector <2 x i32> %i76, <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
 408   %i78 = bitcast <4 x i32> %i77 to <4 x float>
 409   %.i0163 = extractelement <4 x float> %i78, i64 0
 410   %.i1164 = extractelement <4 x float> %i78, i64 1
 411   %.i0165 = fadd reassoc nnan nsz arcp contract afn float %.i0163, %i19
 412   %.i1166 = fadd reassoc nnan nsz arcp contract afn float %.i1164, %i16
 413   %i79 = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32.v8i32.v4i32(i32 15, float %.i0165, float %.i1166, <8 x i32> %i13, <4 x i32> %i9, i1 false, i32 0, i32 0)
 414   %.i0167 = extractelement <4 x float> %i79, i64 0
 415   %.i0168 = fadd reassoc nnan nsz arcp contract afn float %.i0167, %.i0156
 416   %.i1169 = extractelement <4 x float> %i79, i64 1
 417   %.i1170 = fadd reassoc nnan nsz arcp contract afn float %.i1169, %.i1158
 418   %.i2171 = extractelement <4 x float> %i79, i64 2
 419   %.i2172 = fadd reassoc nnan nsz arcp contract afn float %.i2171, %.i2160
 420   %.i3173 = extractelement <4 x float> %i79, i64 3
 421   %.i3174 = fadd reassoc nnan nsz arcp contract afn float %.i3173, %.i3162
 422   %i80 = call <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32> %i5, i32 256, i32 0), !invariant.load !0
 423   %i81 = shufflevector <2 x i32> %i80, <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
 424   %i82 = bitcast <4 x i32> %i81 to <4 x float>
 425   %.i0175 = extractelement <4 x float> %i82, i64 0
 426   %.i1176 = extractelement <4 x float> %i82, i64 1
 427   %.i0177 = fadd reassoc nnan nsz arcp contract afn float %.i0175, %i19
 428   %.i1178 = fadd reassoc nnan nsz arcp contract afn float %.i1176, %i16
 429   %i83 = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32.v8i32.v4i32(i32 15, float %.i0177, float %.i1178, <8 x i32> %i13, <4 x i32> %i9, i1 false, i32 0, i32 0)
 430   %.i0179 = extractelement <4 x float> %i83, i64 0
 431   %.i0180 = fadd reassoc nnan nsz arcp contract afn float %.i0179, %.i0168
 432   %.i1181 = extractelement <4 x float> %i83, i64 1
 433   %.i1182 = fadd reassoc nnan nsz arcp contract afn float %.i1181, %.i1170
 434   %.i2183 = extractelement <4 x float> %i83, i64 2
 435   %.i2184 = fadd reassoc nnan nsz arcp contract afn float %.i2183, %.i2172
 436   %.i3185 = extractelement <4 x float> %i83, i64 3
 437   %.i3186 = fadd reassoc nnan nsz arcp contract afn float %.i3185, %.i3174
 438   %i84 = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %.i0180, float %.i1182)
 439   %i85 = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %.i2184, float %.i3186)
 440   %i86 = bitcast <2 x half> %i84 to float
 441   %i87 = bitcast <2 x half> %i85 to float
 442   call void @llvm.amdgcn.exp.f32(i32 0, i32 3, float %i86, float %i87, float poison, float poison, i1 true, i1 true)
 443   ret void
 444 }
 445
 446 declare noundef i64 @llvm.amdgcn.s.getpc() #3
 447 declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #5
 448 declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) #3
 449 declare void @llvm.amdgcn.exp.f32(i32 immarg, i32 immarg, float, float, float, float, i1 immarg, i1 immarg) #4
 450 declare float @llvm.amdgcn.lds.param.load(i32 immarg, i32 immarg, i32) #3
 451 declare float @llvm.amdgcn.interp.inreg.p10(float, float, float) #3
 452 declare float @llvm.amdgcn.interp.inreg.p2(float, float, float) #3
 453 declare <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32>, i32, i32 immarg) #8
 454
 455 attributes #2 = { alwaysinline nounwind memory(readwrite) "amdgpu-sched-strategy"="max-memory-clause" "amdgpu-max-memory-cluster-dwords"="32"}
 456 attributes #3 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
 457 attributes #4 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) }
 458 attributes #5 = { nocallback nofree nosync nounwind willreturn memory(read) }
 459 attributes #8 = { nocallback nofree nosync nounwind willreturn memory(none) }
 460
 461 !0 = !{}