llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.a16.dim.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
   2 ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9 %s
   3 ; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s
   4 ; RUN: llc -march=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s
   5
   6 define amdgpu_ps <4 x float> @gather4_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) {
   7 ; GFX9-LABEL: gather4_2d:
   8 ; GFX9:       ; %bb.0: ; %main_body
   9 ; GFX9-NEXT:    s_mov_b64 s[12:13], exec
  10 ; GFX9-NEXT:    s_wqm_b64 exec, exec
  11 ; GFX9-NEXT:    s_mov_b32 s14, 0x5040100
  12 ; GFX9-NEXT:    v_perm_b32 v0, v1, v0, s14
  13 ; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
  14 ; GFX9-NEXT:    image_gather4 v[0:3], v0, s[0:7], s[8:11] dmask:0x1 a16
  15 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
  16 ; GFX9-NEXT:    ; return to shader part epilog
  17 ;
  18 ; GFX10-LABEL: gather4_2d:
  19 ; GFX10:       ; %bb.0: ; %main_body
  20 ; GFX10-NEXT:    s_mov_b32 s12, exec_lo
  21 ; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
  22 ; GFX10-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
  23 ; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
  24 ; GFX10-NEXT:    image_gather4 v[0:3], v0, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
  25 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
  26 ; GFX10-NEXT:    ; return to shader part epilog
  27 main_body:
  28   %v = call <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f16(i32 1, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
  29   ret <4 x float> %v
  30 }
  31
  32 define amdgpu_ps <4 x float> @gather4_cube(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %face) {
  33 ; GFX9-LABEL: gather4_cube:
  34 ; GFX9:       ; %bb.0: ; %main_body
  35 ; GFX9-NEXT:    s_mov_b64 s[12:13], exec
  36 ; GFX9-NEXT:    s_wqm_b64 exec, exec
  37 ; GFX9-NEXT:    s_mov_b32 s14, 0x5040100
  38 ; GFX9-NEXT:    v_perm_b32 v1, v1, v0, s14
  39 ; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
  40 ; GFX9-NEXT:    image_gather4 v[0:3], v[1:2], s[0:7], s[8:11] dmask:0x1 a16 da
  41 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
  42 ; GFX9-NEXT:    ; return to shader part epilog
  43 ;
  44 ; GFX10-LABEL: gather4_cube:
  45 ; GFX10:       ; %bb.0: ; %main_body
  46 ; GFX10-NEXT:    s_mov_b32 s12, exec_lo
  47 ; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
  48 ; GFX10-NEXT:    v_perm_b32 v1, v1, v0, 0x5040100
  49 ; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
  50 ; GFX10-NEXT:    image_gather4 v[0:3], v[1:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_CUBE a16
  51 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
  52 ; GFX10-NEXT:    ; return to shader part epilog
  53 main_body:
  54   %v = call <4 x float> @llvm.amdgcn.image.gather4.cube.v4f32.f16(i32 1, half %s, half %t, half %face, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
  55   ret <4 x float> %v
  56 }
  57
  58 define amdgpu_ps <4 x float> @gather4_2darray(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %slice) {
  59 ; GFX9-LABEL: gather4_2darray:
  60 ; GFX9:       ; %bb.0: ; %main_body
  61 ; GFX9-NEXT:    s_mov_b64 s[12:13], exec
  62 ; GFX9-NEXT:    s_wqm_b64 exec, exec
  63 ; GFX9-NEXT:    s_mov_b32 s14, 0x5040100
  64 ; GFX9-NEXT:    v_perm_b32 v1, v1, v0, s14
  65 ; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
  66 ; GFX9-NEXT:    image_gather4 v[0:3], v[1:2], s[0:7], s[8:11] dmask:0x1 a16 da
  67 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
  68 ; GFX9-NEXT:    ; return to shader part epilog
  69 ;
  70 ; GFX10-LABEL: gather4_2darray:
  71 ; GFX10:       ; %bb.0: ; %main_body
  72 ; GFX10-NEXT:    s_mov_b32 s12, exec_lo
  73 ; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
  74 ; GFX10-NEXT:    v_perm_b32 v1, v1, v0, 0x5040100
  75 ; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
  76 ; GFX10-NEXT:    image_gather4 v[0:3], v[1:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY a16
  77 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
  78 ; GFX10-NEXT:    ; return to shader part epilog
  79 main_body:
  80   %v = call <4 x float> @llvm.amdgcn.image.gather4.2darray.v4f32.f16(i32 1, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
  81   ret <4 x float> %v
  82 }
  83
  84 define amdgpu_ps <4 x float> @gather4_c_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t) {
  85 ; GFX9-LABEL: gather4_c_2d:
  86 ; GFX9:       ; %bb.0: ; %main_body
  87 ; GFX9-NEXT:    s_mov_b64 s[12:13], exec
  88 ; GFX9-NEXT:    s_wqm_b64 exec, exec
  89 ; GFX9-NEXT:    s_mov_b32 s14, 0x5040100
  90 ; GFX9-NEXT:    v_perm_b32 v1, v2, v1, s14
  91 ; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
  92 ; GFX9-NEXT:    image_gather4_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 a16
  93 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
  94 ; GFX9-NEXT:    ; return to shader part epilog
  95 ;
  96 ; GFX10-LABEL: gather4_c_2d:
  97 ; GFX10:       ; %bb.0: ; %main_body
  98 ; GFX10-NEXT:    s_mov_b32 s12, exec_lo
  99 ; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
 100 ; GFX10-NEXT:    v_perm_b32 v1, v2, v1, 0x5040100
 101 ; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
 102 ; GFX10-NEXT:    image_gather4_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
 103 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 104 ; GFX10-NEXT:    ; return to shader part epilog
 105 main_body:
 106   %v = call <4 x float> @llvm.amdgcn.image.gather4.c.2d.v4f32.f32(i32 1, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
 107   ret <4 x float> %v
 108 }
 109
 110 define amdgpu_ps <4 x float> @gather4_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %clamp) {
 111 ; GFX9-LABEL: gather4_cl_2d:
 112 ; GFX9:       ; %bb.0: ; %main_body
 113 ; GFX9-NEXT:    s_mov_b64 s[12:13], exec
 114 ; GFX9-NEXT:    s_wqm_b64 exec, exec
 115 ; GFX9-NEXT:    s_mov_b32 s14, 0x5040100
 116 ; GFX9-NEXT:    v_perm_b32 v1, v1, v0, s14
 117 ; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
 118 ; GFX9-NEXT:    image_gather4_cl v[0:3], v[1:2], s[0:7], s[8:11] dmask:0x1 a16
 119 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 120 ; GFX9-NEXT:    ; return to shader part epilog
 121 ;
 122 ; GFX10-LABEL: gather4_cl_2d:
 123 ; GFX10:       ; %bb.0: ; %main_body
 124 ; GFX10-NEXT:    s_mov_b32 s12, exec_lo
 125 ; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
 126 ; GFX10-NEXT:    v_perm_b32 v1, v1, v0, 0x5040100
 127 ; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
 128 ; GFX10-NEXT:    image_gather4_cl v[0:3], v[1:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
 129 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 130 ; GFX10-NEXT:    ; return to shader part epilog
 131 main_body:
 132   %v = call <4 x float> @llvm.amdgcn.image.gather4.cl.2d.v4f32.f16(i32 1, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
 133   ret <4 x float> %v
 134 }
 135
 136 define amdgpu_ps <4 x float> @gather4_c_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t, half %clamp) {
 137 ; GFX9-LABEL: gather4_c_cl_2d:
 138 ; GFX9:       ; %bb.0: ; %main_body
 139 ; GFX9-NEXT:    s_mov_b64 s[12:13], exec
 140 ; GFX9-NEXT:    s_wqm_b64 exec, exec
 141 ; GFX9-NEXT:    s_mov_b32 s14, 0x5040100
 142 ; GFX9-NEXT:    v_mov_b32_e32 v5, v3
 143 ; GFX9-NEXT:    v_mov_b32_e32 v3, v0
 144 ; GFX9-NEXT:    v_perm_b32 v4, v2, v1, s14
 145 ; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
 146 ; GFX9-NEXT:    image_gather4_c_cl v[0:3], v[3:5], s[0:7], s[8:11] dmask:0x1 a16
 147 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 148 ; GFX9-NEXT:    ; return to shader part epilog
 149 ;
 150 ; GFX10-LABEL: gather4_c_cl_2d:
 151 ; GFX10:       ; %bb.0: ; %main_body
 152 ; GFX10-NEXT:    s_mov_b32 s12, exec_lo
 153 ; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
 154 ; GFX10-NEXT:    v_perm_b32 v1, v2, v1, 0x5040100
 155 ; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
 156 ; GFX10-NEXT:    image_gather4_c_cl v[0:3], [v0, v1, v3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
 157 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 158 ; GFX10-NEXT:    ; return to shader part epilog
 159 main_body:
 160   %v = call <4 x float> @llvm.amdgcn.image.gather4.c.cl.2d.v4f32.f32(i32 1, float %zcompare, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
 161   ret <4 x float> %v
 162 }
 163
 164 define amdgpu_ps <4 x float> @gather4_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, half %s, half %t) {
 165 ; GFX9-LABEL: gather4_b_2d:
 166 ; GFX9:       ; %bb.0: ; %main_body
 167 ; GFX9-NEXT:    s_mov_b64 s[12:13], exec
 168 ; GFX9-NEXT:    s_wqm_b64 exec, exec
 169 ; GFX9-NEXT:    s_mov_b32 s14, 0x5040100
 170 ; GFX9-NEXT:    v_perm_b32 v1, v2, v1, s14
 171 ; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
 172 ; GFX9-NEXT:    image_gather4_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 a16
 173 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 174 ; GFX9-NEXT:    ; return to shader part epilog
 175 ;
 176 ; GFX10-LABEL: gather4_b_2d:
 177 ; GFX10:       ; %bb.0: ; %main_body
 178 ; GFX10-NEXT:    s_mov_b32 s12, exec_lo
 179 ; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
 180 ; GFX10-NEXT:    v_perm_b32 v1, v2, v1, 0x5040100
 181 ; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
 182 ; GFX10-NEXT:    image_gather4_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
 183 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 184 ; GFX10-NEXT:    ; return to shader part epilog
 185 main_body:
 186   %v = call <4 x float> @llvm.amdgcn.image.gather4.b.2d.v4f32.f16.f16(i32 1, half %bias, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
 187   ret <4 x float> %v
 188 }
 189
 190 define amdgpu_ps <4 x float> @gather4_c_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, float %zcompare, half %s, half %t) {
 191 ; GFX9-LABEL: gather4_c_b_2d:
 192 ; GFX9:       ; %bb.0: ; %main_body
 193 ; GFX9-NEXT:    s_mov_b64 s[12:13], exec
 194 ; GFX9-NEXT:    s_wqm_b64 exec, exec
 195 ; GFX9-NEXT:    s_mov_b32 s14, 0x5040100
 196 ; GFX9-NEXT:    v_perm_b32 v2, v3, v2, s14
 197 ; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
 198 ; GFX9-NEXT:    image_gather4_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 a16
 199 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 200 ; GFX9-NEXT:    ; return to shader part epilog
 201 ;
 202 ; GFX10-LABEL: gather4_c_b_2d:
 203 ; GFX10:       ; %bb.0: ; %main_body
 204 ; GFX10-NEXT:    s_mov_b32 s12, exec_lo
 205 ; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
 206 ; GFX10-NEXT:    v_perm_b32 v2, v3, v2, 0x5040100
 207 ; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
 208 ; GFX10-NEXT:    image_gather4_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
 209 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 210 ; GFX10-NEXT:    ; return to shader part epilog
 211 main_body:
 212   %v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.2d.v4f32.f16.f16(i32 1, half %bias, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
 213   ret <4 x float> %v
 214 }
 215
 216 define amdgpu_ps <4 x float> @gather4_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, half %s, half %t, half %clamp) {
 217 ; GFX9-LABEL: gather4_b_cl_2d:
 218 ; GFX9:       ; %bb.0: ; %main_body
 219 ; GFX9-NEXT:    s_mov_b64 s[12:13], exec
 220 ; GFX9-NEXT:    s_wqm_b64 exec, exec
 221 ; GFX9-NEXT:    s_mov_b32 s14, 0x5040100
 222 ; GFX9-NEXT:    v_mov_b32_e32 v5, v3
 223 ; GFX9-NEXT:    v_mov_b32_e32 v3, v0
 224 ; GFX9-NEXT:    v_perm_b32 v4, v2, v1, s14
 225 ; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
 226 ; GFX9-NEXT:    image_gather4_b_cl v[0:3], v[3:5], s[0:7], s[8:11] dmask:0x1 a16
 227 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 228 ; GFX9-NEXT:    ; return to shader part epilog
 229 ;
 230 ; GFX10-LABEL: gather4_b_cl_2d:
 231 ; GFX10:       ; %bb.0: ; %main_body
 232 ; GFX10-NEXT:    s_mov_b32 s12, exec_lo
 233 ; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
 234 ; GFX10-NEXT:    v_perm_b32 v1, v2, v1, 0x5040100
 235 ; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
 236 ; GFX10-NEXT:    image_gather4_b_cl v[0:3], [v0, v1, v3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
 237 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 238 ; GFX10-NEXT:    ; return to shader part epilog
 239 main_body:
 240   %v = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.2d.v4f32.f16.f16(i32 1, half %bias, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
 241   ret <4 x float> %v
 242 }
 243
 244 define amdgpu_ps <4 x float> @gather4_c_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, float %zcompare, half %s, half %t, half %clamp) {
 245 ; GFX9-LABEL: gather4_c_b_cl_2d:
 246 ; GFX9:       ; %bb.0: ; %main_body
 247 ; GFX9-NEXT:    s_mov_b64 s[12:13], exec
 248 ; GFX9-NEXT:    s_wqm_b64 exec, exec
 249 ; GFX9-NEXT:    s_mov_b32 s14, 0x5040100
 250 ; GFX9-NEXT:    v_mov_b32_e32 v7, v4
 251 ; GFX9-NEXT:    v_mov_b32_e32 v5, v1
 252 ; GFX9-NEXT:    v_mov_b32_e32 v4, v0
 253 ; GFX9-NEXT:    v_perm_b32 v6, v3, v2, s14
 254 ; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
 255 ; GFX9-NEXT:    image_gather4_c_b_cl v[0:3], v[4:7], s[0:7], s[8:11] dmask:0x1 a16
 256 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 257 ; GFX9-NEXT:    ; return to shader part epilog
 258 ;
 259 ; GFX10-LABEL: gather4_c_b_cl_2d:
 260 ; GFX10:       ; %bb.0: ; %main_body
 261 ; GFX10-NEXT:    s_mov_b32 s12, exec_lo
 262 ; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
 263 ; GFX10-NEXT:    v_perm_b32 v2, v3, v2, 0x5040100
 264 ; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
 265 ; GFX10-NEXT:    image_gather4_c_b_cl v[0:3], [v0, v1, v2, v4], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
 266 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 267 ; GFX10-NEXT:    ; return to shader part epilog
 268 main_body:
 269   %v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.2d.v4f32.f16.f16(i32 1, half %bias, float %zcompare, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
 270   ret <4 x float> %v
 271 }
 272
 273 define amdgpu_ps <4 x float> @gather4_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %lod) {
 274 ; GFX9-LABEL: gather4_l_2d:
 275 ; GFX9:       ; %bb.0: ; %main_body
 276 ; GFX9-NEXT:    s_mov_b32 s12, 0x5040100
 277 ; GFX9-NEXT:    v_perm_b32 v1, v1, v0, s12
 278 ; GFX9-NEXT:    image_gather4_l v[0:3], v[1:2], s[0:7], s[8:11] dmask:0x1 a16
 279 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 280 ; GFX9-NEXT:    ; return to shader part epilog
 281 ;
 282 ; GFX10-LABEL: gather4_l_2d:
 283 ; GFX10:       ; %bb.0: ; %main_body
 284 ; GFX10-NEXT:    v_perm_b32 v1, v1, v0, 0x5040100
 285 ; GFX10-NEXT:    image_gather4_l v[0:3], v[1:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
 286 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 287 ; GFX10-NEXT:    ; return to shader part epilog
 288 main_body:
 289   %v = call <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f16(i32 1, half %s, half %t, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
 290   ret <4 x float> %v
 291 }
 292
 293 define amdgpu_ps <4 x float> @gather4_c_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t, half %lod) {
 294 ; GFX9-LABEL: gather4_c_l_2d:
 295 ; GFX9:       ; %bb.0: ; %main_body
 296 ; GFX9-NEXT:    s_mov_b32 s12, 0x5040100
 297 ; GFX9-NEXT:    v_mov_b32_e32 v5, v3
 298 ; GFX9-NEXT:    v_mov_b32_e32 v3, v0
 299 ; GFX9-NEXT:    v_perm_b32 v4, v2, v1, s12
 300 ; GFX9-NEXT:    image_gather4_c_l v[0:3], v[3:5], s[0:7], s[8:11] dmask:0x1 a16
 301 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 302 ; GFX9-NEXT:    ; return to shader part epilog
 303 ;
 304 ; GFX10-LABEL: gather4_c_l_2d:
 305 ; GFX10:       ; %bb.0: ; %main_body
 306 ; GFX10-NEXT:    v_perm_b32 v1, v2, v1, 0x5040100
 307 ; GFX10-NEXT:    image_gather4_c_l v[0:3], [v0, v1, v3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
 308 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 309 ; GFX10-NEXT:    ; return to shader part epilog
 310 main_body:
 311   %v = call <4 x float> @llvm.amdgcn.image.gather4.c.l.2d.v4f32.f32(i32 1, float %zcompare, half %s, half %t, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
 312   ret <4 x float> %v
 313 }
 314
 315 define amdgpu_ps <4 x float> @gather4_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) {
 316 ; GFX9-LABEL: gather4_lz_2d:
 317 ; GFX9:       ; %bb.0: ; %main_body
 318 ; GFX9-NEXT:    s_mov_b32 s12, 0x5040100
 319 ; GFX9-NEXT:    v_perm_b32 v0, v1, v0, s12
 320 ; GFX9-NEXT:    image_gather4_lz v[0:3], v0, s[0:7], s[8:11] dmask:0x1 a16
 321 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 322 ; GFX9-NEXT:    ; return to shader part epilog
 323 ;
 324 ; GFX10-LABEL: gather4_lz_2d:
 325 ; GFX10:       ; %bb.0: ; %main_body
 326 ; GFX10-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
 327 ; GFX10-NEXT:    image_gather4_lz v[0:3], v0, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
 328 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 329 ; GFX10-NEXT:    ; return to shader part epilog
 330 main_body:
 331   %v = call <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f16(i32 1, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
 332   ret <4 x float> %v
 333 }
 334
 335 define amdgpu_ps <4 x float> @gather4_c_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t) {
 336 ; GFX9-LABEL: gather4_c_lz_2d:
 337 ; GFX9:       ; %bb.0: ; %main_body
 338 ; GFX9-NEXT:    s_mov_b32 s12, 0x5040100
 339 ; GFX9-NEXT:    v_perm_b32 v1, v2, v1, s12
 340 ; GFX9-NEXT:    image_gather4_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 a16
 341 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 342 ; GFX9-NEXT:    ; return to shader part epilog
 343 ;
 344 ; GFX10-LABEL: gather4_c_lz_2d:
 345 ; GFX10:       ; %bb.0: ; %main_body
 346 ; GFX10-NEXT:    v_perm_b32 v1, v2, v1, 0x5040100
 347 ; GFX10-NEXT:    image_gather4_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
 348 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 349 ; GFX10-NEXT:    ; return to shader part epilog
 350 main_body:
 351   %v = call <4 x float> @llvm.amdgcn.image.gather4.c.lz.2d.v4f32.f32(i32 1, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
 352   ret <4 x float> %v
 353 }
 354
 355 declare <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
 356 declare <4 x float> @llvm.amdgcn.image.gather4.cube.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
 357 declare <4 x float> @llvm.amdgcn.image.gather4.2darray.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
 358
 359 declare <4 x float> @llvm.amdgcn.image.gather4.c.2d.v4f32.f32(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
 360 declare <4 x float> @llvm.amdgcn.image.gather4.cl.2d.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
 361 declare <4 x float> @llvm.amdgcn.image.gather4.c.cl.2d.v4f32.f32(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
 362
 363 declare <4 x float> @llvm.amdgcn.image.gather4.b.2d.v4f32.f16.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
 364 declare <4 x float> @llvm.amdgcn.image.gather4.c.b.2d.v4f32.f16.f16(i32, half, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
 365 declare <4 x float> @llvm.amdgcn.image.gather4.b.cl.2d.v4f32.f16.f16(i32, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
 366 declare <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.2d.v4f32.f16.f16(i32, half, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
 367
 368 declare <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
 369 declare <4 x float> @llvm.amdgcn.image.gather4.c.l.2d.v4f32.f32(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
 370
 371 declare <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
 372 declare <4 x float> @llvm.amdgcn.image.gather4.c.lz.2d.v4f32.f32(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
 373
 374 attributes #0 = { nounwind }
 375 attributes #1 = { nounwind readonly }
 376 attributes #2 = { nounwind readnone }