llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.a16.dim.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
   2 ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9 %s
   3 ; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s
   4 ; RUN: llc -march=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s
   5
   6 define amdgpu_ps <4 x float> @gather4_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) {
   7 ; GFX9-LABEL: gather4_2d:
   8 ; GFX9:       ; %bb.0: ; %main_body
   9 ; GFX9-NEXT:    s_mov_b64 s[12:13], exec
  10 ; GFX9-NEXT:    s_wqm_b64 exec, exec
  11 ; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v0
  12 ; GFX9-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
  13 ; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
  14 ; GFX9-NEXT:    image_gather4 v[0:3], v0, s[0:7], s[8:11] dmask:0x1 a16
  15 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
  16 ; GFX9-NEXT:    ; return to shader part epilog
  17 ;
  18 ; GFX10-LABEL: gather4_2d:
  19 ; GFX10:       ; %bb.0: ; %main_body
  20 ; GFX10-NEXT:    s_mov_b32 s12, exec_lo
  21 ; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
  22 ; GFX10-NEXT:    v_and_b32_e32 v0, 0xffff, v0
  23 ; GFX10-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
  24 ; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
  25 ; GFX10-NEXT:    image_gather4 v[0:3], v0, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
  26 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
  27 ; GFX10-NEXT:    ; return to shader part epilog
  28 main_body:
  29   %v = call <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f16(i32 1, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
  30   ret <4 x float> %v
  31 }
  32
  33 define amdgpu_ps <4 x float> @gather4_cube(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %face) {
  34 ; GFX9-LABEL: gather4_cube:
  35 ; GFX9:       ; %bb.0: ; %main_body
  36 ; GFX9-NEXT:    s_mov_b64 s[12:13], exec
  37 ; GFX9-NEXT:    s_wqm_b64 exec, exec
  38 ; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v0
  39 ; GFX9-NEXT:    v_lshl_or_b32 v1, v1, 16, v0
  40 ; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
  41 ; GFX9-NEXT:    image_gather4 v[0:3], v[1:2], s[0:7], s[8:11] dmask:0x1 a16 da
  42 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
  43 ; GFX9-NEXT:    ; return to shader part epilog
  44 ;
  45 ; GFX10-LABEL: gather4_cube:
  46 ; GFX10:       ; %bb.0: ; %main_body
  47 ; GFX10-NEXT:    s_mov_b32 s12, exec_lo
  48 ; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
  49 ; GFX10-NEXT:    v_and_b32_e32 v0, 0xffff, v0
  50 ; GFX10-NEXT:    v_lshl_or_b32 v1, v1, 16, v0
  51 ; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
  52 ; GFX10-NEXT:    image_gather4 v[0:3], v[1:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_CUBE a16
  53 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
  54 ; GFX10-NEXT:    ; return to shader part epilog
  55 main_body:
  56   %v = call <4 x float> @llvm.amdgcn.image.gather4.cube.v4f32.f16(i32 1, half %s, half %t, half %face, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
  57   ret <4 x float> %v
  58 }
  59
  60 define amdgpu_ps <4 x float> @gather4_2darray(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %slice) {
  61 ; GFX9-LABEL: gather4_2darray:
  62 ; GFX9:       ; %bb.0: ; %main_body
  63 ; GFX9-NEXT:    s_mov_b64 s[12:13], exec
  64 ; GFX9-NEXT:    s_wqm_b64 exec, exec
  65 ; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v0
  66 ; GFX9-NEXT:    v_lshl_or_b32 v1, v1, 16, v0
  67 ; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
  68 ; GFX9-NEXT:    image_gather4 v[0:3], v[1:2], s[0:7], s[8:11] dmask:0x1 a16 da
  69 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
  70 ; GFX9-NEXT:    ; return to shader part epilog
  71 ;
  72 ; GFX10-LABEL: gather4_2darray:
  73 ; GFX10:       ; %bb.0: ; %main_body
  74 ; GFX10-NEXT:    s_mov_b32 s12, exec_lo
  75 ; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
  76 ; GFX10-NEXT:    v_and_b32_e32 v0, 0xffff, v0
  77 ; GFX10-NEXT:    v_lshl_or_b32 v1, v1, 16, v0
  78 ; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
  79 ; GFX10-NEXT:    image_gather4 v[0:3], v[1:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY a16
  80 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
  81 ; GFX10-NEXT:    ; return to shader part epilog
  82 main_body:
  83   %v = call <4 x float> @llvm.amdgcn.image.gather4.2darray.v4f32.f16(i32 1, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
  84   ret <4 x float> %v
  85 }
  86
  87 define amdgpu_ps <4 x float> @gather4_c_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t) {
  88 ; GFX9-LABEL: gather4_c_2d:
  89 ; GFX9:       ; %bb.0: ; %main_body
  90 ; GFX9-NEXT:    s_mov_b64 s[12:13], exec
  91 ; GFX9-NEXT:    s_wqm_b64 exec, exec
  92 ; GFX9-NEXT:    v_and_b32_e32 v1, 0xffff, v1
  93 ; GFX9-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
  94 ; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
  95 ; GFX9-NEXT:    image_gather4_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 a16
  96 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
  97 ; GFX9-NEXT:    ; return to shader part epilog
  98 ;
  99 ; GFX10-LABEL: gather4_c_2d:
 100 ; GFX10:       ; %bb.0: ; %main_body
 101 ; GFX10-NEXT:    s_mov_b32 s12, exec_lo
 102 ; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
 103 ; GFX10-NEXT:    v_and_b32_e32 v1, 0xffff, v1
 104 ; GFX10-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
 105 ; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
 106 ; GFX10-NEXT:    image_gather4_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
 107 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 108 ; GFX10-NEXT:    ; return to shader part epilog
 109 main_body:
 110   %v = call <4 x float> @llvm.amdgcn.image.gather4.c.2d.v4f32.f32(i32 1, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
 111   ret <4 x float> %v
 112 }
 113
 114 define amdgpu_ps <4 x float> @gather4_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %clamp) {
 115 ; GFX9-LABEL: gather4_cl_2d:
 116 ; GFX9:       ; %bb.0: ; %main_body
 117 ; GFX9-NEXT:    s_mov_b64 s[12:13], exec
 118 ; GFX9-NEXT:    s_wqm_b64 exec, exec
 119 ; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 120 ; GFX9-NEXT:    v_lshl_or_b32 v1, v1, 16, v0
 121 ; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
 122 ; GFX9-NEXT:    image_gather4_cl v[0:3], v[1:2], s[0:7], s[8:11] dmask:0x1 a16
 123 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 124 ; GFX9-NEXT:    ; return to shader part epilog
 125 ;
 126 ; GFX10-LABEL: gather4_cl_2d:
 127 ; GFX10:       ; %bb.0: ; %main_body
 128 ; GFX10-NEXT:    s_mov_b32 s12, exec_lo
 129 ; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
 130 ; GFX10-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 131 ; GFX10-NEXT:    v_lshl_or_b32 v1, v1, 16, v0
 132 ; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
 133 ; GFX10-NEXT:    image_gather4_cl v[0:3], v[1:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
 134 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 135 ; GFX10-NEXT:    ; return to shader part epilog
 136 main_body:
 137   %v = call <4 x float> @llvm.amdgcn.image.gather4.cl.2d.v4f32.f16(i32 1, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
 138   ret <4 x float> %v
 139 }
 140
 141 define amdgpu_ps <4 x float> @gather4_c_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t, half %clamp) {
 142 ; GFX9-LABEL: gather4_c_cl_2d:
 143 ; GFX9:       ; %bb.0: ; %main_body
 144 ; GFX9-NEXT:    s_mov_b64 s[12:13], exec
 145 ; GFX9-NEXT:    s_wqm_b64 exec, exec
 146 ; GFX9-NEXT:    v_mov_b32_e32 v5, v3
 147 ; GFX9-NEXT:    v_mov_b32_e32 v3, v0
 148 ; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v1
 149 ; GFX9-NEXT:    v_lshl_or_b32 v4, v2, 16, v0
 150 ; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
 151 ; GFX9-NEXT:    image_gather4_c_cl v[0:3], v[3:5], s[0:7], s[8:11] dmask:0x1 a16
 152 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 153 ; GFX9-NEXT:    ; return to shader part epilog
 154 ;
 155 ; GFX10-LABEL: gather4_c_cl_2d:
 156 ; GFX10:       ; %bb.0: ; %main_body
 157 ; GFX10-NEXT:    s_mov_b32 s12, exec_lo
 158 ; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
 159 ; GFX10-NEXT:    v_and_b32_e32 v1, 0xffff, v1
 160 ; GFX10-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
 161 ; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
 162 ; GFX10-NEXT:    image_gather4_c_cl v[0:3], [v0, v1, v3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
 163 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 164 ; GFX10-NEXT:    ; return to shader part epilog
 165 main_body:
 166   %v = call <4 x float> @llvm.amdgcn.image.gather4.c.cl.2d.v4f32.f32(i32 1, float %zcompare, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
 167   ret <4 x float> %v
 168 }
 169
 170 define amdgpu_ps <4 x float> @gather4_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, half %s, half %t) {
 171 ; GFX9-LABEL: gather4_b_2d:
 172 ; GFX9:       ; %bb.0: ; %main_body
 173 ; GFX9-NEXT:    s_mov_b64 s[12:13], exec
 174 ; GFX9-NEXT:    s_wqm_b64 exec, exec
 175 ; GFX9-NEXT:    v_and_b32_e32 v1, 0xffff, v1
 176 ; GFX9-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
 177 ; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
 178 ; GFX9-NEXT:    image_gather4_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 a16
 179 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 180 ; GFX9-NEXT:    ; return to shader part epilog
 181 ;
 182 ; GFX10-LABEL: gather4_b_2d:
 183 ; GFX10:       ; %bb.0: ; %main_body
 184 ; GFX10-NEXT:    s_mov_b32 s12, exec_lo
 185 ; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
 186 ; GFX10-NEXT:    v_and_b32_e32 v1, 0xffff, v1
 187 ; GFX10-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
 188 ; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
 189 ; GFX10-NEXT:    image_gather4_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
 190 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 191 ; GFX10-NEXT:    ; return to shader part epilog
 192 main_body:
 193   %v = call <4 x float> @llvm.amdgcn.image.gather4.b.2d.v4f32.f16.f16(i32 1, half %bias, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
 194   ret <4 x float> %v
 195 }
 196
 197 define amdgpu_ps <4 x float> @gather4_c_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, float %zcompare, half %s, half %t) {
 198 ; GFX9-LABEL: gather4_c_b_2d:
 199 ; GFX9:       ; %bb.0: ; %main_body
 200 ; GFX9-NEXT:    s_mov_b64 s[12:13], exec
 201 ; GFX9-NEXT:    s_wqm_b64 exec, exec
 202 ; GFX9-NEXT:    v_and_b32_e32 v2, 0xffff, v2
 203 ; GFX9-NEXT:    v_lshl_or_b32 v2, v3, 16, v2
 204 ; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
 205 ; GFX9-NEXT:    image_gather4_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 a16
 206 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 207 ; GFX9-NEXT:    ; return to shader part epilog
 208 ;
 209 ; GFX10-LABEL: gather4_c_b_2d:
 210 ; GFX10:       ; %bb.0: ; %main_body
 211 ; GFX10-NEXT:    s_mov_b32 s12, exec_lo
 212 ; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
 213 ; GFX10-NEXT:    v_and_b32_e32 v2, 0xffff, v2
 214 ; GFX10-NEXT:    v_lshl_or_b32 v2, v3, 16, v2
 215 ; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
 216 ; GFX10-NEXT:    image_gather4_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
 217 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 218 ; GFX10-NEXT:    ; return to shader part epilog
 219 main_body:
 220   %v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.2d.v4f32.f16.f16(i32 1, half %bias, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
 221   ret <4 x float> %v
 222 }
 223
 224 define amdgpu_ps <4 x float> @gather4_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, half %s, half %t, half %clamp) {
 225 ; GFX9-LABEL: gather4_b_cl_2d:
 226 ; GFX9:       ; %bb.0: ; %main_body
 227 ; GFX9-NEXT:    s_mov_b64 s[12:13], exec
 228 ; GFX9-NEXT:    s_wqm_b64 exec, exec
 229 ; GFX9-NEXT:    v_mov_b32_e32 v5, v3
 230 ; GFX9-NEXT:    v_mov_b32_e32 v3, v0
 231 ; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v1
 232 ; GFX9-NEXT:    v_lshl_or_b32 v4, v2, 16, v0
 233 ; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
 234 ; GFX9-NEXT:    image_gather4_b_cl v[0:3], v[3:5], s[0:7], s[8:11] dmask:0x1 a16
 235 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 236 ; GFX9-NEXT:    ; return to shader part epilog
 237 ;
 238 ; GFX10-LABEL: gather4_b_cl_2d:
 239 ; GFX10:       ; %bb.0: ; %main_body
 240 ; GFX10-NEXT:    s_mov_b32 s12, exec_lo
 241 ; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
 242 ; GFX10-NEXT:    v_and_b32_e32 v1, 0xffff, v1
 243 ; GFX10-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
 244 ; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
 245 ; GFX10-NEXT:    image_gather4_b_cl v[0:3], [v0, v1, v3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
 246 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 247 ; GFX10-NEXT:    ; return to shader part epilog
 248 main_body:
 249   %v = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.2d.v4f32.f16.f16(i32 1, half %bias, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
 250   ret <4 x float> %v
 251 }
 252
 253 define amdgpu_ps <4 x float> @gather4_c_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, float %zcompare, half %s, half %t, half %clamp) {
 254 ; GFX9-LABEL: gather4_c_b_cl_2d:
 255 ; GFX9:       ; %bb.0: ; %main_body
 256 ; GFX9-NEXT:    s_mov_b64 s[12:13], exec
 257 ; GFX9-NEXT:    s_wqm_b64 exec, exec
 258 ; GFX9-NEXT:    v_mov_b32_e32 v7, v4
 259 ; GFX9-NEXT:    v_mov_b32_e32 v4, v0
 260 ; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v2
 261 ; GFX9-NEXT:    v_mov_b32_e32 v5, v1
 262 ; GFX9-NEXT:    v_lshl_or_b32 v6, v3, 16, v0
 263 ; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
 264 ; GFX9-NEXT:    image_gather4_c_b_cl v[0:3], v[4:7], s[0:7], s[8:11] dmask:0x1 a16
 265 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 266 ; GFX9-NEXT:    ; return to shader part epilog
 267 ;
 268 ; GFX10-LABEL: gather4_c_b_cl_2d:
 269 ; GFX10:       ; %bb.0: ; %main_body
 270 ; GFX10-NEXT:    s_mov_b32 s12, exec_lo
 271 ; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
 272 ; GFX10-NEXT:    v_and_b32_e32 v2, 0xffff, v2
 273 ; GFX10-NEXT:    v_lshl_or_b32 v2, v3, 16, v2
 274 ; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
 275 ; GFX10-NEXT:    image_gather4_c_b_cl v[0:3], [v0, v1, v2, v4], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
 276 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 277 ; GFX10-NEXT:    ; return to shader part epilog
 278 main_body:
 279   %v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.2d.v4f32.f16.f16(i32 1, half %bias, float %zcompare, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
 280   ret <4 x float> %v
 281 }
 282
 283 define amdgpu_ps <4 x float> @gather4_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %lod) {
 284 ; GFX9-LABEL: gather4_l_2d:
 285 ; GFX9:       ; %bb.0: ; %main_body
 286 ; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 287 ; GFX9-NEXT:    v_lshl_or_b32 v1, v1, 16, v0
 288 ; GFX9-NEXT:    image_gather4_l v[0:3], v[1:2], s[0:7], s[8:11] dmask:0x1 a16
 289 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 290 ; GFX9-NEXT:    ; return to shader part epilog
 291 ;
 292 ; GFX10-LABEL: gather4_l_2d:
 293 ; GFX10:       ; %bb.0: ; %main_body
 294 ; GFX10-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 295 ; GFX10-NEXT:    v_lshl_or_b32 v1, v1, 16, v0
 296 ; GFX10-NEXT:    image_gather4_l v[0:3], v[1:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
 297 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 298 ; GFX10-NEXT:    ; return to shader part epilog
 299 main_body:
 300   %v = call <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f16(i32 1, half %s, half %t, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
 301   ret <4 x float> %v
 302 }
 303
 304 define amdgpu_ps <4 x float> @gather4_c_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t, half %lod) {
 305 ; GFX9-LABEL: gather4_c_l_2d:
 306 ; GFX9:       ; %bb.0: ; %main_body
 307 ; GFX9-NEXT:    v_mov_b32_e32 v5, v3
 308 ; GFX9-NEXT:    v_mov_b32_e32 v3, v0
 309 ; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v1
 310 ; GFX9-NEXT:    v_lshl_or_b32 v4, v2, 16, v0
 311 ; GFX9-NEXT:    image_gather4_c_l v[0:3], v[3:5], s[0:7], s[8:11] dmask:0x1 a16
 312 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 313 ; GFX9-NEXT:    ; return to shader part epilog
 314 ;
 315 ; GFX10-LABEL: gather4_c_l_2d:
 316 ; GFX10:       ; %bb.0: ; %main_body
 317 ; GFX10-NEXT:    v_and_b32_e32 v1, 0xffff, v1
 318 ; GFX10-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
 319 ; GFX10-NEXT:    image_gather4_c_l v[0:3], [v0, v1, v3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
 320 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 321 ; GFX10-NEXT:    ; return to shader part epilog
 322 main_body:
 323   %v = call <4 x float> @llvm.amdgcn.image.gather4.c.l.2d.v4f32.f32(i32 1, float %zcompare, half %s, half %t, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
 324   ret <4 x float> %v
 325 }
 326
 327 define amdgpu_ps <4 x float> @gather4_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) {
 328 ; GFX9-LABEL: gather4_lz_2d:
 329 ; GFX9:       ; %bb.0: ; %main_body
 330 ; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 331 ; GFX9-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
 332 ; GFX9-NEXT:    image_gather4_lz v[0:3], v0, s[0:7], s[8:11] dmask:0x1 a16
 333 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 334 ; GFX9-NEXT:    ; return to shader part epilog
 335 ;
 336 ; GFX10-LABEL: gather4_lz_2d:
 337 ; GFX10:       ; %bb.0: ; %main_body
 338 ; GFX10-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 339 ; GFX10-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
 340 ; GFX10-NEXT:    image_gather4_lz v[0:3], v0, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
 341 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 342 ; GFX10-NEXT:    ; return to shader part epilog
 343 main_body:
 344   %v = call <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f16(i32 1, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
 345   ret <4 x float> %v
 346 }
 347
 348 define amdgpu_ps <4 x float> @gather4_c_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t) {
 349 ; GFX9-LABEL: gather4_c_lz_2d:
 350 ; GFX9:       ; %bb.0: ; %main_body
 351 ; GFX9-NEXT:    v_and_b32_e32 v1, 0xffff, v1
 352 ; GFX9-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
 353 ; GFX9-NEXT:    image_gather4_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 a16
 354 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 355 ; GFX9-NEXT:    ; return to shader part epilog
 356 ;
 357 ; GFX10-LABEL: gather4_c_lz_2d:
 358 ; GFX10:       ; %bb.0: ; %main_body
 359 ; GFX10-NEXT:    v_and_b32_e32 v1, 0xffff, v1
 360 ; GFX10-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
 361 ; GFX10-NEXT:    image_gather4_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
 362 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 363 ; GFX10-NEXT:    ; return to shader part epilog
 364 main_body:
 365   %v = call <4 x float> @llvm.amdgcn.image.gather4.c.lz.2d.v4f32.f32(i32 1, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
 366   ret <4 x float> %v
 367 }
 368
 369 declare <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
 370 declare <4 x float> @llvm.amdgcn.image.gather4.cube.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
 371 declare <4 x float> @llvm.amdgcn.image.gather4.2darray.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
 372
 373 declare <4 x float> @llvm.amdgcn.image.gather4.c.2d.v4f32.f32(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
 374 declare <4 x float> @llvm.amdgcn.image.gather4.cl.2d.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
 375 declare <4 x float> @llvm.amdgcn.image.gather4.c.cl.2d.v4f32.f32(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
 376
 377 declare <4 x float> @llvm.amdgcn.image.gather4.b.2d.v4f32.f16.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
 378 declare <4 x float> @llvm.amdgcn.image.gather4.c.b.2d.v4f32.f16.f16(i32, half, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
 379 declare <4 x float> @llvm.amdgcn.image.gather4.b.cl.2d.v4f32.f16.f16(i32, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
 380 declare <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.2d.v4f32.f16.f16(i32, half, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
 381
 382 declare <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
 383 declare <4 x float> @llvm.amdgcn.image.gather4.c.l.2d.v4f32.f32(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
 384
 385 declare <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
 386 declare <4 x float> @llvm.amdgcn.image.gather4.c.lz.2d.v4f32.f32(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
 387
 388 attributes #0 = { nounwind }
 389 attributes #1 = { nounwind readonly }
 390 attributes #2 = { nounwind readnone }