test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.ll

   1 ; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX6789,SI %s
   2 ; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX6789,GFX8910,SIVI,PRT %s
   3 ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX6789,PRT %s
   4 ; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-enable-prt-strict-null -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX900,NOPRT %s
   5 ; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefixes=GCN,GFX10 %s
   6
   7 ; GCN-LABEL: {{^}}load_1d:
   8 ; GFX6789: image_load v[0:3], v0, s[0:7] dmask:0xf unorm{{$}}
   9 ; GFX10: image_load v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm ;
  10 define amdgpu_ps <4 x float> @load_1d(<8 x i32> inreg %rsrc, i32 %s) {
  11 main_body:
  12   %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
  13   ret <4 x float> %v
  14 }
  15
  16 ; GCN-LABEL: {{^}}load_1d_tfe:
  17 ; PRT: v_mov_b32_e32 v0, 0
  18 ; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
  19 ; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
  20 ; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
  21 ; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
  22 ; NOPRT: v_mov_b32_e32 v4, 0
  23 ; NOPRT-NOT: v_mov_b32_e32 v0
  24 ; NOPRT-NOT: v_mov_b32_e32 v1
  25 ; NOPRT-NOT: v_mov_b32_e32 v2
  26 ; NOPRT-NOT: v_mov_b32_e32 v3
  27 ; GFX6789: image_load v[0:4], v{{[0-9]+}}, s[0:7] dmask:0xf unorm tfe{{$}}
  28 ; GFX10: image_load v[0:4], v{{[0-9]+}}, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm tfe ;
  29 ; SIVI: buffer_store_dword v4, off, s[8:11], 0
  30 ; GFX900: global_store_dword v[{{[0-9]+:[0-9]+}}], v4
  31 define amdgpu_ps <4 x float> @load_1d_tfe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s) {
  32 main_body:
  33   %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.1d.v4f32i32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 1, i32 0)
  34   %v.vec = extractvalue {<4 x float>, i32} %v, 0
  35   %v.err = extractvalue {<4 x float>, i32} %v, 1
  36   store i32 %v.err, i32 addrspace(1)* %out, align 4
  37   ret <4 x float> %v.vec
  38 }
  39
  40 ; GCN-LABEL: {{^}}load_1d_lwe:
  41 ; PRT: v_mov_b32_e32 v0, 0
  42 ; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
  43 ; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
  44 ; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
  45 ; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
  46 ; NOPRT: v_mov_b32_e32 v4, 0
  47 ; NOPRT-NOT: v_mov_b32_e32 v0
  48 ; NOPRT-NOT: v_mov_b32_e32 v1
  49 ; NOPRT-NOT: v_mov_b32_e32 v2
  50 ; NOPRT-NOT: v_mov_b32_e32 v3
  51 ; GFX6789: image_load v[0:4], v{{[0-9]+}}, s[0:7] dmask:0xf unorm lwe{{$}}
  52 ; GFX10: image_load v[0:4], v{{[0-9]+}}, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm lwe ;
  53 ; SIVI: buffer_store_dword v4, off, s[8:11], 0
  54 ; GFX900: global_store_dword v[{{[0-9]+:[0-9]+}}], v4
  55 define amdgpu_ps <4 x float> @load_1d_lwe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s) {
  56 main_body:
  57   %v = call {<4 x float>, i32} @llvm.amdgcn.image.load.1d.v4f32i32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 2, i32 0)
  58   %v.vec = extractvalue {<4 x float>, i32} %v, 0
  59   %v.err = extractvalue {<4 x float>, i32} %v, 1
  60   store i32 %v.err, i32 addrspace(1)* %out, align 4
  61   ret <4 x float> %v.vec
  62 }
  63
  64 ; GCN-LABEL: {{^}}load_2d:
  65 ; GFX6789: image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm{{$}}
  66 ; GFX10: image_load v[0:3], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm ;
  67 define amdgpu_ps <4 x float> @load_2d(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
  68 main_body:
  69   %v = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
  70   ret <4 x float> %v
  71 }
  72
  73 ; GCN-LABEL: {{^}}load_2d_tfe:
  74 ; PRT: v_mov_b32_e32 v0, 0
  75 ; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
  76 ; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
  77 ; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
  78 ; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
  79 ; NOPRT: v_mov_b32_e32 v4, 0
  80 ; NOPRT-NOT: v_mov_b32_e32 v0
  81 ; NOPRT-NOT: v_mov_b32_e32 v1
  82 ; NOPRT-NOT: v_mov_b32_e32 v2
  83 ; NOPRT-NOT: v_mov_b32_e32 v3
  84 ; GFX6789: image_load v[0:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0xf unorm tfe{{$}}
  85 ; GFX10: image_load v[0:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm tfe ;
  86 ; SIVI: buffer_store_dword v4, off, s[8:11], 0
  87 ; GFX900: global_store_dword v[{{[0-9]+:[0-9]+}}], v4
  88 define amdgpu_ps <4 x float> @load_2d_tfe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t) {
  89 main_body:
  90   %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.2d.v4f32i32.i32(i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
  91   %v.vec = extractvalue {<4 x float>, i32} %v, 0
  92   %v.err = extractvalue {<4 x float>, i32} %v, 1
  93   store i32 %v.err, i32 addrspace(1)* %out, align 4
  94   ret <4 x float> %v.vec
  95 }
  96
  97 ; GCN-LABEL: {{^}}load_3d:
  98 ; GFX6789: image_load v[0:3], v[0:3], s[0:7] dmask:0xf unorm{{$}}
  99 ; GFX10: image_load v[0:3], v[0:2], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm ;
 100 define amdgpu_ps <4 x float> @load_3d(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %r) {
 101 main_body:
 102   %v = call <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %r, <8 x i32> %rsrc, i32 0, i32 0)
 103   ret <4 x float> %v
 104 }
 105
 106 ; GCN-LABEL: {{^}}load_3d_tfe_lwe:
 107 ; PRT: v_mov_b32_e32 v0, 0
 108 ; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
 109 ; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
 110 ; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
 111 ; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
 112 ; NOPRT: v_mov_b32_e32 v4, 0
 113 ; NOPRT-NOT: v_mov_b32_e32 v0
 114 ; NOPRT-NOT: v_mov_b32_e32 v1
 115 ; NOPRT-NOT: v_mov_b32_e32 v2
 116 ; NOPRT-NOT: v_mov_b32_e32 v3
 117 ; GFX6789: image_load v[0:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0xf unorm tfe lwe{{$}}
 118 ; GFX10: image_load v[0:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm tfe lwe ;
 119 ; SIVI: buffer_store_dword v4, off, s[8:11], 0
 120 ; GFX900: global_store_dword v[{{[0-9]+:[0-9]+}}], v4
 121 define amdgpu_ps <4 x float> @load_3d_tfe_lwe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t, i32 %r) {
 122 main_body:
 123   %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.3d.v4f32i32.i32(i32 15, i32 %s, i32 %t, i32 %r, <8 x i32> %rsrc, i32 3, i32 0)
 124   %v.vec = extractvalue {<4 x float>, i32} %v, 0
 125   %v.err = extractvalue {<4 x float>, i32} %v, 1
 126   store i32 %v.err, i32 addrspace(1)* %out, align 4
 127   ret <4 x float> %v.vec
 128 }
 129
 130 ; GCN-LABEL: {{^}}load_cube:
 131 ; GFX6789: image_load v[0:3], v[0:3], s[0:7] dmask:0xf unorm da{{$}}
 132 ; GFX10: image_load v[0:3], v[0:2], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_CUBE unorm ;
 133 define amdgpu_ps <4 x float> @load_cube(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %slice) {
 134 main_body:
 135   %v = call <4 x float> @llvm.amdgcn.image.load.cube.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0)
 136   ret <4 x float> %v
 137 }
 138
 139 ; GCN-LABEL: {{^}}load_cube_lwe:
 140 ; PRT: v_mov_b32_e32 v0, 0
 141 ; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
 142 ; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
 143 ; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
 144 ; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
 145 ; NOPRT: v_mov_b32_e32 v4, 0
 146 ; NOPRT-NOT: v_mov_b32_e32 v0
 147 ; NOPRT-NOT: v_mov_b32_e32 v1
 148 ; NOPRT-NOT: v_mov_b32_e32 v2
 149 ; NOPRT-NOT: v_mov_b32_e32 v3
 150 ; GFX6789: image_load v[0:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0xf unorm lwe da{{$}}
 151 ; GFX10: image_load v[0:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_CUBE unorm lwe ;
 152 ; SIVI: buffer_store_dword v4, off, s[8:11], 0
 153 ; GFX900: global_store_dword v[{{[0-9]+:[0-9]+}}], v4
 154 define amdgpu_ps <4 x float> @load_cube_lwe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t, i32 %slice) {
 155 main_body:
 156   %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.cube.v4f32i32.i32(i32 15, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 2, i32 0)
 157   %v.vec = extractvalue {<4 x float>, i32} %v, 0
 158   %v.err = extractvalue {<4 x float>, i32} %v, 1
 159   store i32 %v.err, i32 addrspace(1)* %out, align 4
 160   ret <4 x float> %v.vec
 161 }
 162
 163 ; GCN-LABEL: {{^}}load_1darray:
 164 ; GFX6789: image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm da{{$}}
 165 ; GFX10: image_load v[0:3], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY unorm ;
 166 define amdgpu_ps <4 x float> @load_1darray(<8 x i32> inreg %rsrc, i32 %s, i32 %slice) {
 167 main_body:
 168   %v = call <4 x float> @llvm.amdgcn.image.load.1darray.v4f32.i32(i32 15, i32 %s, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0)
 169   ret <4 x float> %v
 170 }
 171
 172 ; GCN-LABEL: {{^}}load_1darray_tfe:
 173 ; PRT: v_mov_b32_e32 v0, 0
 174 ; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
 175 ; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
 176 ; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
 177 ; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
 178 ; NOPRT: v_mov_b32_e32 v4, 0
 179 ; NOPRT-NOT: v_mov_b32_e32 v0
 180 ; NOPRT-NOT: v_mov_b32_e32 v1
 181 ; NOPRT-NOT: v_mov_b32_e32 v2
 182 ; NOPRT-NOT: v_mov_b32_e32 v3
 183 ; GFX6789: image_load v[0:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0xf unorm tfe da{{$}}
 184 ; GFX10: image_load v[0:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY unorm tfe ;
 185 ; SIVI: buffer_store_dword v4, off, s[8:11], 0
 186 ; GFX900: global_store_dword v[{{[0-9]+:[0-9]+}}], v4
 187 define amdgpu_ps <4 x float> @load_1darray_tfe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %slice) {
 188 main_body:
 189   %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.1darray.v4f32i32.i32(i32 15, i32 %s, i32 %slice, <8 x i32> %rsrc, i32 1, i32 0)
 190   %v.vec = extractvalue {<4 x float>, i32} %v, 0
 191   %v.err = extractvalue {<4 x float>, i32} %v, 1
 192   store i32 %v.err, i32 addrspace(1)* %out, align 4
 193   ret <4 x float> %v.vec
 194 }
 195
 196 ; GCN-LABEL: {{^}}load_2darray:
 197 ; GFX6789: image_load v[0:3], v[0:3], s[0:7] dmask:0xf unorm da{{$}}
 198 ; GFX10: image_load v[0:3], v[0:2], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY unorm ;
 199 define amdgpu_ps <4 x float> @load_2darray(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %slice) {
 200 main_body:
 201   %v = call <4 x float> @llvm.amdgcn.image.load.2darray.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0)
 202   ret <4 x float> %v
 203 }
 204
 205 ; GCN-LABEL: {{^}}load_2darray_lwe:
 206 ; PRT: v_mov_b32_e32 v0, 0
 207 ; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
 208 ; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
 209 ; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
 210 ; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
 211 ; NOPRT: v_mov_b32_e32 v4, 0
 212 ; NOPRT-NOT: v_mov_b32_e32 v0
 213 ; NOPRT-NOT: v_mov_b32_e32 v1
 214 ; NOPRT-NOT: v_mov_b32_e32 v2
 215 ; NOPRT-NOT: v_mov_b32_e32 v3
 216 ; GFX6789: image_load v[0:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0xf unorm lwe da{{$}}
 217 ; GFX10: image_load v[0:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY unorm lwe ;
 218 ; SIVI: buffer_store_dword v4, off, s[8:11], 0
 219 ; GFX900: global_store_dword v[{{[0-9]+:[0-9]+}}], v4
 220 define amdgpu_ps <4 x float> @load_2darray_lwe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t, i32 %slice) {
 221 main_body:
 222   %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.2darray.v4f32i32.i32(i32 15, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 2, i32 0)
 223   %v.vec = extractvalue {<4 x float>, i32} %v, 0
 224   %v.err = extractvalue {<4 x float>, i32} %v, 1
 225   store i32 %v.err, i32 addrspace(1)* %out, align 4
 226   ret <4 x float> %v.vec
 227 }
 228
 229 ; GCN-LABEL: {{^}}load_2dmsaa:
 230 ; GFX6789: image_load v[0:3], v[0:3], s[0:7] dmask:0xf unorm{{$}}
 231 ; GFX10: image_load v[0:3], v[0:2], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA unorm ;
 232 define amdgpu_ps <4 x float> @load_2dmsaa(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %fragid) {
 233 main_body:
 234   %v = call <4 x float> @llvm.amdgcn.image.load.2dmsaa.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
 235   ret <4 x float> %v
 236 }
 237
 238 ; GCN-LABEL: {{^}}load_2dmsaa_both:
 239 ; PRT: v_mov_b32_e32 v0, 0
 240 ; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
 241 ; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
 242 ; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
 243 ; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
 244 ; NOPRT: v_mov_b32_e32 v4, 0
 245 ; NOPRT-NOT: v_mov_b32_e32 v0
 246 ; NOPRT-NOT: v_mov_b32_e32 v1
 247 ; NOPRT-NOT: v_mov_b32_e32 v2
 248 ; NOPRT-NOT: v_mov_b32_e32 v3
 249 ; GFX6789: image_load v[0:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0xf unorm tfe lwe{{$}}
 250 ; GFX10: image_load v[0:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA unorm tfe lwe ;
 251 ; SIVI: buffer_store_dword v4, off, s[8:11], 0
 252 ; GFX900: global_store_dword v[{{[0-9]+:[0-9]+}}], v4
 253 define amdgpu_ps <4 x float> @load_2dmsaa_both(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t, i32 %fragid) {
 254 main_body:
 255   %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.2dmsaa.v4f32i32.i32(i32 15, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 3, i32 0)
 256   %v.vec = extractvalue {<4 x float>, i32} %v, 0
 257   %v.err = extractvalue {<4 x float>, i32} %v, 1
 258   store i32 %v.err, i32 addrspace(1)* %out, align 4
 259   ret <4 x float> %v.vec
 260 }
 261
 262 ; GCN-LABEL: {{^}}load_2darraymsaa:
 263 ; GFX6789: image_load v[0:3], v[0:3], s[0:7] dmask:0xf unorm da{{$}}
 264 ; GFX10: image_load v[0:3], v[0:3], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm ;
 265 define amdgpu_ps <4 x float> @load_2darraymsaa(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %slice, i32 %fragid) {
 266 main_body:
 267   %v = call <4 x float> @llvm.amdgcn.image.load.2darraymsaa.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %slice, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
 268   ret <4 x float> %v
 269 }
 270
 271 ; GCN-LABEL: {{^}}load_2darraymsaa_tfe:
 272 ; PRT: v_mov_b32_e32 v0, 0
 273 ; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
 274 ; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
 275 ; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
 276 ; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
 277 ; NOPRT: v_mov_b32_e32 v4, 0
 278 ; NOPRT-NOT: v_mov_b32_e32 v0
 279 ; NOPRT-NOT: v_mov_b32_e32 v1
 280 ; NOPRT-NOT: v_mov_b32_e32 v2
 281 ; NOPRT-NOT: v_mov_b32_e32 v3
 282 ; GFX6789: image_load v[0:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0xf unorm tfe da{{$}}
 283 ; GFX10: image_load v[0:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm tfe ;
 284 ; SIVI: buffer_store_dword v4, off, s[8:11], 0
 285 ; GFX900: global_store_dword v[{{[0-9]+:[0-9]+}}], v4
 286 define amdgpu_ps <4 x float> @load_2darraymsaa_tfe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t, i32 %slice, i32 %fragid) {
 287 main_body:
 288   %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.2darraymsaa.v4f32i32.i32(i32 15, i32 %s, i32 %t, i32 %slice, i32 %fragid, <8 x i32> %rsrc, i32 1, i32 0)
 289   %v.vec = extractvalue {<4 x float>, i32} %v, 0
 290   %v.err = extractvalue {<4 x float>, i32} %v, 1
 291   store i32 %v.err, i32 addrspace(1)* %out, align 4
 292   ret <4 x float> %v.vec
 293 }
 294
 295 ; GCN-LABEL: {{^}}load_mip_1d:
 296 ; GFX6789: image_load_mip v[0:3], v[0:1], s[0:7] dmask:0xf unorm{{$}}
 297 ; GFX10: image_load_mip v[0:3], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm ;
 298 define amdgpu_ps <4 x float> @load_mip_1d(<8 x i32> inreg %rsrc, i32 %s, i32 %mip) {
 299 main_body:
 300   %v = call <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32(i32 15, i32 %s, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
 301   ret <4 x float> %v
 302 }
 303
 304 ; GCN-LABEL: {{^}}load_mip_1d_lwe:
 305 ; PRT: v_mov_b32_e32 v0, 0
 306 ; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
 307 ; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
 308 ; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
 309 ; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
 310 ; NOPRT: v_mov_b32_e32 v4, 0
 311 ; NOPRT-NOT: v_mov_b32_e32 v0
 312 ; NOPRT-NOT: v_mov_b32_e32 v1
 313 ; NOPRT-NOT: v_mov_b32_e32 v2
 314 ; NOPRT-NOT: v_mov_b32_e32 v3
 315 ; GFX6789: image_load_mip v[0:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0xf unorm lwe{{$}}
 316 ; GFX10: image_load_mip v[0:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm lwe ;
 317 ; SIVI: buffer_store_dword v4, off, s[8:11], 0
 318 ; GFX900: global_store_dword v[{{[0-9]+:[0-9]+}}], v4
 319 define amdgpu_ps <4 x float> @load_mip_1d_lwe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %mip) {
 320 main_body:
 321   %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.mip.1d.v4f32i32.i32(i32 15, i32 %s, i32 %mip, <8 x i32> %rsrc, i32 2, i32 0)
 322   %v.vec = extractvalue {<4 x float>, i32} %v, 0
 323   %v.err = extractvalue {<4 x float>, i32} %v, 1
 324   store i32 %v.err, i32 addrspace(1)* %out, align 4
 325   ret <4 x float> %v.vec
 326 }
 327
 328 ; GCN-LABEL: {{^}}load_mip_2d:
 329 ; GFX6789: image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf unorm{{$}}
 330 ; GFX10: image_load_mip v[0:3], v[0:2], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm ;
 331 define amdgpu_ps <4 x float> @load_mip_2d(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %mip) {
 332 main_body:
 333   %v = call <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
 334   ret <4 x float> %v
 335 }
 336
 337 ; GCN-LABEL: {{^}}load_mip_2d_tfe:
 338 ; PRT: v_mov_b32_e32 v0, 0
 339 ; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
 340 ; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
 341 ; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
 342 ; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
 343 ; NOPRT: v_mov_b32_e32 v4, 0
 344 ; NOPRT-NOT: v_mov_b32_e32 v0
 345 ; NOPRT-NOT: v_mov_b32_e32 v1
 346 ; NOPRT-NOT: v_mov_b32_e32 v2
 347 ; NOPRT-NOT: v_mov_b32_e32 v3
 348 ; GFX6789: image_load_mip v[0:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0xf unorm tfe{{$}}
 349 ; GFX10: image_load_mip v[0:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm tfe ;
 350 ; SIVI: buffer_store_dword v4, off, s[8:11], 0
 351 ; GFX900: global_store_dword v[{{[0-9]+:[0-9]+}}], v4
 352 define amdgpu_ps <4 x float> @load_mip_2d_tfe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t, i32 %mip) {
 353 main_body:
 354   %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.mip.2d.v4f32i32.i32(i32 15, i32 %s, i32 %t, i32 %mip, <8 x i32> %rsrc, i32 1, i32 0)
 355   %v.vec = extractvalue {<4 x float>, i32} %v, 0
 356   %v.err = extractvalue {<4 x float>, i32} %v, 1
 357   store i32 %v.err, i32 addrspace(1)* %out, align 4
 358   ret <4 x float> %v.vec
 359 }
 360
 361 ; Make sure that error flag is returned even with dmask 0
 362 ; GCN-LABEL: {{^}}load_1d_V2_tfe_dmask0:
 363 ; GCN: v_mov_b32_e32 v1, 0
 364 ; PRT-DAG: v_mov_b32_e32 v2, v1
 365 ; PRT: image_load v[1:2], v0, s[0:7] dmask:0x1 unorm tfe{{$}}
 366 ; NOPRT-NOT: v_mov_b32_e32 v1
 367 ; NOPRT: image_load v[0:1], v0, s[0:7] dmask:0x1 unorm tfe{{$}}
 368 define amdgpu_ps float @load_1d_V2_tfe_dmask0(<8 x i32> inreg %rsrc, i32 %s) {
 369 main_body:
 370   %v = call {<2 x float>,i32} @llvm.amdgcn.image.load.1d.v2f32i32.i32(i32 0, i32 %s, <8 x i32> %rsrc, i32 1, i32 0)
 371   %v.err = extractvalue {<2 x float>, i32} %v, 1
 372   %vv = bitcast i32 %v.err to float
 373   ret float %vv
 374 }
 375
 376 ; GCN-LABEL: {{^}}load_1d_V1_tfe_dmask0:
 377 ; GCN: v_mov_b32_e32 v1, 0
 378 ; PRT-DAG: v_mov_b32_e32 v2, v1
 379 ; PRT: image_load v[1:2], v0, s[0:7] dmask:0x1 unorm tfe{{$}}
 380 ; NOPRT-NOT: v_mov_b32_e32 v1
 381 ; NOPRT: image_load v[0:1], v0, s[0:7] dmask:0x1 unorm tfe{{$}}
 382 define amdgpu_ps float @load_1d_V1_tfe_dmask0(<8 x i32> inreg %rsrc, i32 %s) {
 383 main_body:
 384   %v = call {float,i32} @llvm.amdgcn.image.load.1d.f32i32.i32(i32 0, i32 %s, <8 x i32> %rsrc, i32 1, i32 0)
 385   %v.err = extractvalue {float, i32} %v, 1
 386   %vv = bitcast i32 %v.err to float
 387   ret float %vv
 388 }
 389
 390 ; GCN-LABEL: {{^}}load_mip_2d_tfe_dmask0:
 391 ; GCN: v_mov_b32_e32 v3, 0
 392 ; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v3
 393 ; PRT: image_load_mip v[3:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0x1 unorm tfe{{$}}
 394 ; NOPRT-NOT: v_mov_b32_e32 v2
 395 ; NOPRT: image_load_mip v[2:3], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0x1 unorm tfe{{$}}
 396 define amdgpu_ps float @load_mip_2d_tfe_dmask0(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %mip) {
 397 main_body:
 398   %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.mip.2d.v4f32i32.i32(i32 0, i32 %s, i32 %t, i32 %mip, <8 x i32> %rsrc, i32 1, i32 0)
 399   %v.err = extractvalue {<4 x float>, i32} %v, 1
 400   %vv = bitcast i32 %v.err to float
 401   ret float %vv
 402 }
 403
 404 ; Do not make dmask 0 even if no result (other than tfe) is used.
 405 ; GCN-LABEL: {{^}}load_mip_2d_tfe_nouse:
 406 ; GCN: v_mov_b32_e32 v3, 0
 407 ; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v3
 408 ; PRT: image_load_mip v[3:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0x1 unorm tfe{{$}}
 409 ; NOPRT-NOT: v_mov_b32_e32 v2
 410 ; NOPRT: image_load_mip v[2:3], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0x1 unorm tfe{{$}}
 411 define amdgpu_ps float @load_mip_2d_tfe_nouse(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %mip) {
 412 main_body:
 413   %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.mip.2d.v4f32i32.i32(i32 15, i32 %s, i32 %t, i32 %mip, <8 x i32> %rsrc, i32 1, i32 0)
 414   %v.err = extractvalue {<4 x float>, i32} %v, 1
 415   %vv = bitcast i32 %v.err to float
 416   ret float %vv
 417 }
 418
 419 ; GCN-LABEL: {{^}}load_mip_2d_tfe_nouse_V2:
 420 ; GCN: v_mov_b32_e32 v3, 0
 421 ; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v3
 422 ; PRT: image_load_mip v[3:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0x1 unorm tfe{{$}}
 423 ; NOPRT-NOT: v_mov_b32_e32 v2
 424 ; NOPRT: image_load_mip v[2:3], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0x1 unorm tfe{{$}}
 425 define amdgpu_ps float @load_mip_2d_tfe_nouse_V2(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %mip) {
 426 main_body:
 427   %v = call {<2 x float>,i32} @llvm.amdgcn.image.load.mip.2d.v2f32i32.i32(i32 6, i32 %s, i32 %t, i32 %mip, <8 x i32> %rsrc, i32 1, i32 0)
 428   %v.err = extractvalue {<2 x float>, i32} %v, 1
 429   %vv = bitcast i32 %v.err to float
 430   ret float %vv
 431 }
 432
 433 ; GCN-LABEL: {{^}}load_mip_2d_tfe_nouse_V1:
 434 ; GCN: v_mov_b32_e32 v3, 0
 435 ; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v3
 436 ; PRT: image_load_mip v[3:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0x2 unorm tfe{{$}}
 437 ; NOPRT-NOT: v_mov_b32_e32 v2
 438 ; NOPRT: image_load_mip v[2:3], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0x2 unorm tfe{{$}}
 439 define amdgpu_ps float @load_mip_2d_tfe_nouse_V1(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %mip) {
 440 main_body:
 441   %v = call {float, i32} @llvm.amdgcn.image.load.mip.2d.f32i32.i32(i32 2, i32 %s, i32 %t, i32 %mip, <8 x i32> %rsrc, i32 1, i32 0)
 442   %v.err = extractvalue {float, i32} %v, 1
 443   %vv = bitcast i32 %v.err to float
 444   ret float %vv
 445 }
 446
 447 ; Check for dmask being materially smaller than return type
 448 ; GCN-LABEL: {{^}}load_1d_tfe_V4_dmask3:
 449 ; PRT: v_mov_b32_e32 v0, 0
 450 ; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
 451 ; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
 452 ; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
 453 ; NOPRT: v_mov_b32_e32 v3, 0
 454 ; NOPRT-NOT: v_mov_b32_e32 v0
 455 ; NOPRT-NOT: v_mov_b32_e32 v1
 456 ; NOPRT-NOT: v_mov_b32_e32 v2
 457 ; GFX6789: image_load v[0:3], v{{[0-9]+}}, s[0:7] dmask:0x7 unorm tfe{{$}}
 458 ; GFX10: image_load v[0:3], v{{[0-9]+}}, s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_1D unorm tfe ;
 459 ; SIVI: buffer_store_dword v3, off, s[8:11], 0
 460 ; GFX900: global_store_dword v[{{[0-9]+:[0-9]+}}], v3
 461 define amdgpu_ps <4 x float> @load_1d_tfe_V4_dmask3(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s) {
 462 main_body:
 463   %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.1d.v4f32i32.i32(i32 7, i32 %s, <8 x i32> %rsrc, i32 1, i32 0)
 464   %v.vec = extractvalue {<4 x float>, i32} %v, 0
 465   %v.err = extractvalue {<4 x float>, i32} %v, 1
 466   store i32 %v.err, i32 addrspace(1)* %out, align 4
 467   ret <4 x float> %v.vec
 468 }
 469
 470 ; GCN-LABEL: {{^}}load_1d_tfe_V4_dmask2:
 471 ; PRT: v_mov_b32_e32 v0, 0
 472 ; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
 473 ; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
 474 ; NOPRT: v_mov_b32_e32 v2, 0
 475 ; NOPRT-NOT: v_mov_b32_e32 v0
 476 ; NOPRT-NOT: v_mov_b32_e32 v1
 477 ; GFX6789: image_load v[0:2], v{{[0-9]+}}, s[0:7] dmask:0x6 unorm tfe{{$}}
 478 ; GFX10: image_load v[0:2], v{{[0-9]+}}, s[0:7] dmask:0x6 dim:SQ_RSRC_IMG_1D unorm tfe ;
 479 ; SIVI: buffer_store_dword v2, off, s[8:11], 0
 480 ; GFX900: global_store_dword v[{{[0-9]+:[0-9]+}}], v2
 481 define amdgpu_ps <4 x float> @load_1d_tfe_V4_dmask2(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s) {
 482 main_body:
 483   %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.1d.v4f32i32.i32(i32 6, i32 %s, <8 x i32> %rsrc, i32 1, i32 0)
 484   %v.vec = extractvalue {<4 x float>, i32} %v, 0
 485   %v.err = extractvalue {<4 x float>, i32} %v, 1
 486   store i32 %v.err, i32 addrspace(1)* %out, align 4
 487   ret <4 x float> %v.vec
 488 }
 489
 490 ; GCN-LABEL: {{^}}load_1d_tfe_V4_dmask1:
 491 ; PRT: v_mov_b32_e32 v0, 0
 492 ; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
 493 ; NOPRT: v_mov_b32_e32 v1, 0
 494 ; NOPRT-NOT: v_mov_b32_e32 v0
 495 ; GFX6789: image_load v[0:1], v{{[0-9]+}}, s[0:7] dmask:0x8 unorm tfe{{$}}
 496 ; GFX10: image_load v[0:1], v{{[0-9]+}}, s[0:7] dmask:0x8 dim:SQ_RSRC_IMG_1D unorm tfe ;
 497 ; SIVI: buffer_store_dword v1, off, s[8:11], 0
 498 ; GFX900: global_store_dword v[{{[0-9]+:[0-9]+}}], v1
 499 define amdgpu_ps <4 x float> @load_1d_tfe_V4_dmask1(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s) {
 500 main_body:
 501   %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.1d.v4f32i32.i32(i32 8, i32 %s, <8 x i32> %rsrc, i32 1, i32 0)
 502   %v.vec = extractvalue {<4 x float>, i32} %v, 0
 503   %v.err = extractvalue {<4 x float>, i32} %v, 1
 504   store i32 %v.err, i32 addrspace(1)* %out, align 4
 505   ret <4 x float> %v.vec
 506 }
 507
 508 ; GCN-LABEL: {{^}}load_1d_tfe_V2_dmask1:
 509 ; PRT: v_mov_b32_e32 v0, 0
 510 ; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
 511 ; NOPRT: v_mov_b32_e32 v1, 0
 512 ; NOPRT-NOT: v_mov_b32_e32 v0
 513 ; GFX6789: image_load v[0:1], v{{[0-9]+}}, s[0:7] dmask:0x8 unorm tfe{{$}}
 514 ; GFX10: image_load v[0:1], v{{[0-9]+}}, s[0:7] dmask:0x8 dim:SQ_RSRC_IMG_1D unorm tfe ;
 515 ; SIVI: buffer_store_dword v1, off, s[8:11], 0
 516 ; GFX900: global_store_dword v[{{[0-9]+:[0-9]+}}], v1
 517 define amdgpu_ps <2 x float> @load_1d_tfe_V2_dmask1(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s) {
 518 main_body:
 519   %v = call {<2 x float>,i32} @llvm.amdgcn.image.load.1d.v2f32i32.i32(i32 8, i32 %s, <8 x i32> %rsrc, i32 1, i32 0)
 520   %v.vec = extractvalue {<2 x float>, i32} %v, 0
 521   %v.err = extractvalue {<2 x float>, i32} %v, 1
 522   store i32 %v.err, i32 addrspace(1)* %out, align 4
 523   ret <2 x float> %v.vec
 524 }
 525
 526
 527 ; GCN-LABEL: {{^}}load_mip_3d:
 528 ; GFX6789: image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf unorm{{$}}
 529 ; GFX10: image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm ;
 530 define amdgpu_ps <4 x float> @load_mip_3d(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %r, i32 %mip) {
 531 main_body:
 532   %v = call <4 x float> @llvm.amdgcn.image.load.mip.3d.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %r, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
 533   ret <4 x float> %v
 534 }
 535
 536 ; GCN-LABEL: {{^}}load_mip_cube:
 537 ; GFX6789: image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf unorm da{{$}}
 538 ; GFX10: image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_CUBE unorm ;
 539 define amdgpu_ps <4 x float> @load_mip_cube(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %slice, i32 %mip) {
 540 main_body:
 541   %v = call <4 x float> @llvm.amdgcn.image.load.mip.cube.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %slice, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
 542   ret <4 x float> %v
 543 }
 544
 545 ; GCN-LABEL: {{^}}load_mip_1darray:
 546 ; GFX6789: image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf unorm da{{$}}
 547 ; GFX10: image_load_mip v[0:3], v[0:2], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY unorm ;
 548 define amdgpu_ps <4 x float> @load_mip_1darray(<8 x i32> inreg %rsrc, i32 %s, i32 %slice, i32 %mip) {
 549 main_body:
 550   %v = call <4 x float> @llvm.amdgcn.image.load.mip.1darray.v4f32.i32(i32 15, i32 %s, i32 %slice, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
 551   ret <4 x float> %v
 552 }
 553
 554 ; GCN-LABEL: {{^}}load_mip_2darray:
 555 ; GFX6789: image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf unorm da{{$}}
 556 ; GFX10: image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY unorm ;
 557 define amdgpu_ps <4 x float> @load_mip_2darray(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %slice, i32 %mip) {
 558 main_body:
 559   %v = call <4 x float> @llvm.amdgcn.image.load.mip.2darray.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %slice, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
 560   ret <4 x float> %v
 561 }
 562
 563 ; GCN-LABEL: {{^}}store_1d:
 564 ; GFX6789: image_store v[0:3], v4, s[0:7] dmask:0xf unorm{{$}}
 565 ; GFX10: image_store v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm ;
 566 define amdgpu_ps void @store_1d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s) {
 567 main_body:
 568   call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
 569   ret void
 570 }
 571
 572 ; GCN-LABEL: {{^}}store_2d:
 573 ; GFX6789: image_store v[0:3], v[4:5], s[0:7] dmask:0xf unorm{{$}}
 574 ; GFX10: image_store v[0:3], v[4:5], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm ;
 575 define amdgpu_ps void @store_2d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t) {
 576 main_body:
 577   call void @llvm.amdgcn.image.store.2d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
 578   ret void
 579 }
 580
 581 ; GCN-LABEL: {{^}}store_3d:
 582 ; GFX6789: image_store v[0:3], v[4:7], s[0:7] dmask:0xf unorm{{$}}
 583 ; GFX10: image_store v[0:3], v[4:6], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm ;
 584 define amdgpu_ps void @store_3d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %r) {
 585 main_body:
 586   call void @llvm.amdgcn.image.store.3d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %r, <8 x i32> %rsrc, i32 0, i32 0)
 587   ret void
 588 }
 589
 590 ; GCN-LABEL: {{^}}store_cube:
 591 ; GFX6789: image_store v[0:3], v[4:7], s[0:7] dmask:0xf unorm da{{$}}
 592 ; GFX10: image_store v[0:3], v[4:6], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_CUBE unorm ;
 593 define amdgpu_ps void @store_cube(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %slice) {
 594 main_body:
 595   call void @llvm.amdgcn.image.store.cube.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0)
 596   ret void
 597 }
 598
 599 ; GCN-LABEL: {{^}}store_1darray:
 600 ; GFX6789: image_store v[0:3], v[4:5], s[0:7] dmask:0xf unorm da{{$}}
 601 ; GFX10: image_store v[0:3], v[4:5], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY unorm ;
 602 define amdgpu_ps void @store_1darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %slice) {
 603 main_body:
 604   call void @llvm.amdgcn.image.store.1darray.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0)
 605   ret void
 606 }
 607
 608 ; GCN-LABEL: {{^}}store_2darray:
 609 ; GFX6789: image_store v[0:3], v[4:7], s[0:7] dmask:0xf unorm da{{$}}
 610 ; GFX10: image_store v[0:3], v[4:6], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY unorm ;
 611 define amdgpu_ps void @store_2darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %slice) {
 612 main_body:
 613   call void @llvm.amdgcn.image.store.2darray.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0)
 614   ret void
 615 }
 616
 617 ; GCN-LABEL: {{^}}store_2dmsaa:
 618 ; GFX6789: image_store v[0:3], v[4:7], s[0:7] dmask:0xf unorm{{$}}
 619 ; GFX10: image_store v[0:3], v[4:6], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA unorm ;
 620 define amdgpu_ps void @store_2dmsaa(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %fragid) {
 621 main_body:
 622   call void @llvm.amdgcn.image.store.2dmsaa.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
 623   ret void
 624 }
 625
 626 ; GCN-LABEL: {{^}}store_2darraymsaa:
 627 ; GFX6789: image_store v[0:3], v[4:7], s[0:7] dmask:0xf unorm da{{$}}
 628 ; GFX10: image_store v[0:3], v[4:7], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm ;
 629 define amdgpu_ps void @store_2darraymsaa(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %slice, i32 %fragid) {
 630 main_body:
 631   call void @llvm.amdgcn.image.store.2darraymsaa.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %slice, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
 632   ret void
 633 }
 634
 635 ; GCN-LABEL: {{^}}store_mip_1d:
 636 ; GFX6789: image_store_mip v[0:3], v[4:5], s[0:7] dmask:0xf unorm{{$}}
 637 ; GFX10: image_store_mip v[0:3], v[4:5], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm ;
 638 define amdgpu_ps void @store_mip_1d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %mip) {
 639 main_body:
 640   call void @llvm.amdgcn.image.store.mip.1d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
 641   ret void
 642 }
 643
 644 ; GCN-LABEL: {{^}}store_mip_2d:
 645 ; GFX6789: image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf unorm{{$}}
 646 ; GFX10: image_store_mip v[0:3], v[4:6], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm ;
 647 define amdgpu_ps void @store_mip_2d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %mip) {
 648 main_body:
 649   call void @llvm.amdgcn.image.store.mip.2d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
 650   ret void
 651 }
 652
 653 ; GCN-LABEL: {{^}}store_mip_3d:
 654 ; GFX6789: image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf unorm{{$}}
 655 ; GFX10: image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm ;
 656 define amdgpu_ps void @store_mip_3d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %r, i32 %mip) {
 657 main_body:
 658   call void @llvm.amdgcn.image.store.mip.3d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %r, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
 659   ret void
 660 }
 661
 662 ; GCN-LABEL: {{^}}store_mip_cube:
 663 ; GFX6789: image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf unorm da{{$}}
 664 ; GFX10: image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_CUBE unorm ;
 665 define amdgpu_ps void @store_mip_cube(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %slice, i32 %mip) {
 666 main_body:
 667   call void @llvm.amdgcn.image.store.mip.cube.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %slice, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
 668   ret void
 669 }
 670
 671 ; GCN-LABEL: {{^}}store_mip_1darray:
 672 ; GFX6789: image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf unorm da{{$}}
 673 ; GFX10: image_store_mip v[0:3], v[4:6], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY unorm ;
 674 define amdgpu_ps void @store_mip_1darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %slice, i32 %mip) {
 675 main_body:
 676   call void @llvm.amdgcn.image.store.mip.1darray.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %slice, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
 677   ret void
 678 }
 679
 680 ; GCN-LABEL: {{^}}store_mip_2darray:
 681 ; GFX6789: image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf unorm da{{$}}
 682 ; GFX10: image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY unorm ;
 683 define amdgpu_ps void @store_mip_2darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %slice, i32 %mip) {
 684 main_body:
 685   call void @llvm.amdgcn.image.store.mip.2darray.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %slice, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
 686   ret void
 687 }
 688
 689 ; GCN-LABEL: {{^}}getresinfo_1d:
 690 ; GFX6789: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm{{$}}
 691 ; GFX10: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm ;
 692 define amdgpu_ps <4 x float> @getresinfo_1d(<8 x i32> inreg %rsrc, i32 %mip) {
 693 main_body:
 694   %v = call <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i32(i32 15, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
 695   ret <4 x float> %v
 696 }
 697
 698 ; GCN-LABEL: {{^}}getresinfo_2d:
 699 ; GFX6789: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm{{$}}
 700 ; GFX10: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm ;
 701 define amdgpu_ps <4 x float> @getresinfo_2d(<8 x i32> inreg %rsrc, i32 %mip) {
 702 main_body:
 703   %v = call <4 x float> @llvm.amdgcn.image.getresinfo.2d.v4f32.i32(i32 15, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
 704   ret <4 x float> %v
 705 }
 706
 707 ; GCN-LABEL: {{^}}getresinfo_3d:
 708 ; GFX6789: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm{{$}}
 709 ; GFX10: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm ;
 710 define amdgpu_ps <4 x float> @getresinfo_3d(<8 x i32> inreg %rsrc, i32 %mip) {
 711 main_body:
 712   %v = call <4 x float> @llvm.amdgcn.image.getresinfo.3d.v4f32.i32(i32 15, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
 713   ret <4 x float> %v
 714 }
 715
 716 ; GCN-LABEL: {{^}}getresinfo_cube:
 717 ; GFX6789: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm da{{$}}
 718 ; GFX10: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_CUBE unorm ;
 719 define amdgpu_ps <4 x float> @getresinfo_cube(<8 x i32> inreg %rsrc, i32 %mip) {
 720 main_body:
 721   %v = call <4 x float> @llvm.amdgcn.image.getresinfo.cube.v4f32.i32(i32 15, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
 722   ret <4 x float> %v
 723 }
 724
 725 ; GCN-LABEL: {{^}}getresinfo_1darray:
 726 ; GFX6789: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm da{{$}}
 727 ; GFX10: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY unorm ;
 728 define amdgpu_ps <4 x float> @getresinfo_1darray(<8 x i32> inreg %rsrc, i32 %mip) {
 729 main_body:
 730   %v = call <4 x float> @llvm.amdgcn.image.getresinfo.1darray.v4f32.i32(i32 15, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
 731   ret <4 x float> %v
 732 }
 733
 734 ; GCN-LABEL: {{^}}getresinfo_2darray:
 735 ; GFX6789: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm da{{$}}
 736 ; GFX10: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY unorm ;
 737 define amdgpu_ps <4 x float> @getresinfo_2darray(<8 x i32> inreg %rsrc, i32 %mip) {
 738 main_body:
 739   %v = call <4 x float> @llvm.amdgcn.image.getresinfo.2darray.v4f32.i32(i32 15, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
 740   ret <4 x float> %v
 741 }
 742
 743 ; GCN-LABEL: {{^}}getresinfo_2dmsaa:
 744 ; GFX6789: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm{{$}}
 745 ; GFX10: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA unorm ;
 746 define amdgpu_ps <4 x float> @getresinfo_2dmsaa(<8 x i32> inreg %rsrc, i32 %mip) {
 747 main_body:
 748   %v = call <4 x float> @llvm.amdgcn.image.getresinfo.2dmsaa.v4f32.i32(i32 15, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
 749   ret <4 x float> %v
 750 }
 751
 752 ; GCN-LABEL: {{^}}getresinfo_2darraymsaa:
 753 ; GFX6789: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm da{{$}}
 754 ; GFX10: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm ;
 755 define amdgpu_ps <4 x float> @getresinfo_2darraymsaa(<8 x i32> inreg %rsrc, i32 %mip) {
 756 main_body:
 757   %v = call <4 x float> @llvm.amdgcn.image.getresinfo.2darraymsaa.v4f32.i32(i32 15, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
 758   ret <4 x float> %v
 759 }
 760
 761 ; GCN-LABEL: {{^}}load_1d_V1:
 762 ; GFX6789: image_load v0, v0, s[0:7] dmask:0x8 unorm{{$}}
 763 ; GFX10: image_load v0, v0, s[0:7] dmask:0x8 dim:SQ_RSRC_IMG_1D unorm ;
 764 define amdgpu_ps float @load_1d_V1(<8 x i32> inreg %rsrc, i32 %s) {
 765 main_body:
 766   %v = call float @llvm.amdgcn.image.load.1d.f32.i32(i32 8, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
 767   ret float %v
 768 }
 769
 770 ; GCN-LABEL: {{^}}load_1d_V2:
 771 ; GFX6789: image_load v[0:1], v0, s[0:7] dmask:0x9 unorm{{$}}
 772 ; GFX10: image_load v[0:1], v0, s[0:7] dmask:0x9 dim:SQ_RSRC_IMG_1D unorm ;
 773 define amdgpu_ps <2 x float> @load_1d_V2(<8 x i32> inreg %rsrc, i32 %s) {
 774 main_body:
 775   %v = call <2 x float> @llvm.amdgcn.image.load.1d.v2f32.i32(i32 9, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
 776   ret <2 x float> %v
 777 }
 778
 779 ; GCN-LABEL: {{^}}store_1d_V1:
 780 ; GFX6789: image_store v0, v1, s[0:7] dmask:0x2 unorm{{$}}
 781 ; GFX10: image_store v0, v1, s[0:7] dmask:0x2 dim:SQ_RSRC_IMG_1D unorm ;
 782 define amdgpu_ps void @store_1d_V1(<8 x i32> inreg %rsrc, float %vdata, i32 %s) {
 783 main_body:
 784   call void @llvm.amdgcn.image.store.1d.f32.i32(float %vdata, i32 2, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
 785   ret void
 786 }
 787
 788 ; GCN-LABEL: {{^}}store_1d_V2:
 789 ; GFX6789: image_store v[0:1], v2, s[0:7] dmask:0xc unorm{{$}}
 790 ; GFX10: image_store v[0:1], v2, s[0:7] dmask:0xc dim:SQ_RSRC_IMG_1D unorm ;
 791 define amdgpu_ps void @store_1d_V2(<8 x i32> inreg %rsrc, <2 x float> %vdata, i32 %s) {
 792 main_body:
 793   call void @llvm.amdgcn.image.store.1d.v2f32.i32(<2 x float> %vdata, i32 12, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
 794   ret void
 795 }
 796
 797 ; GCN-LABEL: {{^}}load_1d_glc:
 798 ; GFX6789: image_load v[0:3], v0, s[0:7] dmask:0xf unorm glc{{$}}
 799 ; GFX10: image_load v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm glc ;
 800 define amdgpu_ps <4 x float> @load_1d_glc(<8 x i32> inreg %rsrc, i32 %s) {
 801 main_body:
 802   %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 1)
 803   ret <4 x float> %v
 804 }
 805
 806 ; GCN-LABEL: {{^}}load_1d_slc:
 807 ; GFX6789: image_load v[0:3], v0, s[0:7] dmask:0xf unorm slc{{$}}
 808 ; GFX10: image_load v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm slc ;
 809 define amdgpu_ps <4 x float> @load_1d_slc(<8 x i32> inreg %rsrc, i32 %s) {
 810 main_body:
 811   %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 2)
 812   ret <4 x float> %v
 813 }
 814
 815 ; GCN-LABEL: {{^}}load_1d_glc_slc:
 816 ; GFX6789: image_load v[0:3], v0, s[0:7] dmask:0xf unorm glc slc{{$}}
 817 ; GFX10: image_load v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm glc slc ;
 818 define amdgpu_ps <4 x float> @load_1d_glc_slc(<8 x i32> inreg %rsrc, i32 %s) {
 819 main_body:
 820   %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 3)
 821   ret <4 x float> %v
 822 }
 823
 824 ; GCN-LABEL: {{^}}store_1d_glc:
 825 ; GFX6789: image_store v[0:3], v4, s[0:7] dmask:0xf unorm glc{{$}}
 826 ; GFX10: image_store v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm glc ;
 827 define amdgpu_ps void @store_1d_glc(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s) {
 828 main_body:
 829   call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 1)
 830   ret void
 831 }
 832
 833 ; GCN-LABEL: {{^}}store_1d_slc:
 834 ; GFX6789: image_store v[0:3], v4, s[0:7] dmask:0xf unorm slc{{$}}
 835 ; GFX10: image_store v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm slc ;
 836 define amdgpu_ps void @store_1d_slc(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s) {
 837 main_body:
 838   call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 2)
 839   ret void
 840 }
 841
 842 ; GCN-LABEL: {{^}}store_1d_glc_slc:
 843 ; GFX6789: image_store v[0:3], v4, s[0:7] dmask:0xf unorm glc slc{{$}}
 844 ; GFX10: image_store v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm glc slc ;
 845 define amdgpu_ps void @store_1d_glc_slc(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s) {
 846 main_body:
 847   call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 3)
 848   ret void
 849 }
 850
 851 ; GCN-LABEL: {{^}}getresinfo_dmask0:
 852 ; GCN-NOT: image
 853 ; GCN: ; return to shader part epilog
 854 define amdgpu_ps <4 x float> @getresinfo_dmask0(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %mip) #0 {
 855 main_body:
 856   %r = call <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i32(i32 0, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
 857   ret <4 x float> %r
 858 }
 859
 860 ; Ideally, the register allocator would avoid the wait here
 861 ;
 862 ; GCN-LABEL: {{^}}image_store_wait:
 863 ; GCN: image_store v[0:3], v4, s[0:7] dmask:0xf
 864 ; SI: s_waitcnt expcnt(0)
 865 ; GCN: image_load v[0:3], v4, s[8:15] dmask:0xf
 866 ; GCN: s_waitcnt vmcnt(0)
 867 ; GCN: image_store v[0:3], v4, s[16:23] dmask:0xf
 868 define amdgpu_ps void @image_store_wait(<8 x i32> inreg %arg, <8 x i32> inreg %arg1, <8 x i32> inreg %arg2, <4 x float> %arg3, i32 %arg4) #0 {
 869 main_body:
 870   call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %arg3, i32 15, i32 %arg4, <8 x i32> %arg, i32 0, i32 0)
 871   %data = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %arg4, <8 x i32> %arg1, i32 0, i32 0)
 872   call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %data, i32 15, i32 %arg4, <8 x i32> %arg2, i32 0, i32 0)
 873   ret void
 874 }
 875
 876 ; SI won't merge ds memory operations, because of the signed offset bug, so
 877 ; we only have check lines for VI+.
 878 ; GFX8910-LABEL: image_load_mmo
 879 ; GFX8910: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0
 880 ; GFX8910: ds_write2_b32 v{{[0-9]+}}, [[ZERO]], [[ZERO]] offset1:4
 881 define amdgpu_ps float @image_load_mmo(<8 x i32> inreg %rsrc, float addrspace(3)* %lds, <2 x i32> %c) #0 {
 882   store float 0.000000e+00, float addrspace(3)* %lds
 883   %c0 = extractelement <2 x i32> %c, i32 0
 884   %c1 = extractelement <2 x i32> %c, i32 1
 885   %tex = call float @llvm.amdgcn.image.load.2d.f32.i32(i32 1, i32 %c0, i32 %c1, <8 x i32> %rsrc, i32 0, i32 0)
 886   %tmp2 = getelementptr float, float addrspace(3)* %lds, i32 4
 887   store float 0.000000e+00, float addrspace(3)* %tmp2
 888   ret float %tex
 889 }
 890
 891 declare <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #1
 892 declare {float,i32} @llvm.amdgcn.image.load.1d.f32i32.i32(i32, i32, <8 x i32>, i32, i32) #1
 893 declare {<2 x float>,i32} @llvm.amdgcn.image.load.1d.v2f32i32.i32(i32, i32, <8 x i32>, i32, i32) #1
 894 declare {<4 x float>,i32} @llvm.amdgcn.image.load.1d.v4f32i32.i32(i32, i32, <8 x i32>, i32, i32) #1
 895 declare <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
 896 declare {<4 x float>,i32} @llvm.amdgcn.image.load.2d.v4f32i32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
 897 declare <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
 898 declare {<4 x float>,i32} @llvm.amdgcn.image.load.3d.v4f32i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
 899 declare <4 x float> @llvm.amdgcn.image.load.cube.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
 900 declare {<4 x float>,i32} @llvm.amdgcn.image.load.cube.v4f32i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
 901 declare <4 x float> @llvm.amdgcn.image.load.1darray.v4f32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
 902 declare {<4 x float>,i32} @llvm.amdgcn.image.load.1darray.v4f32i32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
 903 declare <4 x float> @llvm.amdgcn.image.load.2darray.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
 904 declare {<4 x float>,i32} @llvm.amdgcn.image.load.2darray.v4f32i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
 905 declare <4 x float> @llvm.amdgcn.image.load.2dmsaa.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
 906 declare {<4 x float>,i32} @llvm.amdgcn.image.load.2dmsaa.v4f32i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
 907 declare <4 x float> @llvm.amdgcn.image.load.2darraymsaa.v4f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1
 908 declare {<4 x float>,i32} @llvm.amdgcn.image.load.2darraymsaa.v4f32i32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1
 909
 910 declare <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
 911 declare <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
 912 declare {<4 x float>,i32} @llvm.amdgcn.image.load.mip.1d.v4f32i32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
 913 declare {<4 x float>,i32} @llvm.amdgcn.image.load.mip.2d.v4f32i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
 914 declare {<2 x float>,i32} @llvm.amdgcn.image.load.mip.2d.v2f32i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
 915 declare {float,i32} @llvm.amdgcn.image.load.mip.2d.f32i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
 916 declare <4 x float> @llvm.amdgcn.image.load.mip.3d.v4f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1
 917 declare <4 x float> @llvm.amdgcn.image.load.mip.cube.v4f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1
 918 declare <4 x float> @llvm.amdgcn.image.load.mip.1darray.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
 919 declare <4 x float> @llvm.amdgcn.image.load.mip.2darray.v4f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1
 920
 921 declare void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float>, i32, i32, <8 x i32>, i32, i32) #0
 922 declare void @llvm.amdgcn.image.store.2d.v4f32.i32(<4 x float>, i32, i32, i32, <8 x i32>, i32, i32) #0
 923 declare void @llvm.amdgcn.image.store.3d.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
 924 declare void @llvm.amdgcn.image.store.cube.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
 925 declare void @llvm.amdgcn.image.store.1darray.v4f32.i32(<4 x float>, i32, i32, i32, <8 x i32>, i32, i32) #0
 926 declare void @llvm.amdgcn.image.store.2darray.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
 927 declare void @llvm.amdgcn.image.store.2dmsaa.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
 928 declare void @llvm.amdgcn.image.store.2darraymsaa.v4f32.i32(<4 x float>, i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
 929
 930 declare void @llvm.amdgcn.image.store.mip.1d.v4f32.i32(<4 x float>, i32, i32, i32, <8 x i32>, i32, i32) #0
 931 declare void @llvm.amdgcn.image.store.mip.2d.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
 932 declare void @llvm.amdgcn.image.store.mip.3d.v4f32.i32(<4 x float>, i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
 933 declare void @llvm.amdgcn.image.store.mip.cube.v4f32.i32(<4 x float>, i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
 934 declare void @llvm.amdgcn.image.store.mip.1darray.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
 935 declare void @llvm.amdgcn.image.store.mip.2darray.v4f32.i32(<4 x float>, i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
 936
 937 declare <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #2
 938 declare <4 x float> @llvm.amdgcn.image.getresinfo.2d.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #2
 939 declare <4 x float> @llvm.amdgcn.image.getresinfo.3d.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #2
 940 declare <4 x float> @llvm.amdgcn.image.getresinfo.cube.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #2
 941 declare <4 x float> @llvm.amdgcn.image.getresinfo.1darray.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #2
 942 declare <4 x float> @llvm.amdgcn.image.getresinfo.2darray.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #2
 943 declare <4 x float> @llvm.amdgcn.image.getresinfo.2dmsaa.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #2
 944 declare <4 x float> @llvm.amdgcn.image.getresinfo.2darraymsaa.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #2
 945
 946 declare float @llvm.amdgcn.image.load.1d.f32.i32(i32, i32, <8 x i32>, i32, i32) #1
 947 declare float @llvm.amdgcn.image.load.2d.f32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
 948 declare <2 x float> @llvm.amdgcn.image.load.1d.v2f32.i32(i32, i32, <8 x i32>, i32, i32) #1
 949 declare void @llvm.amdgcn.image.store.1d.f32.i32(float, i32, i32, <8 x i32>, i32, i32) #0
 950 declare void @llvm.amdgcn.image.store.1d.v2f32.i32(<2 x float>, i32, i32, <8 x i32>, i32, i32) #0
 951
 952 attributes #0 = { nounwind }
 953 attributes #1 = { nounwind readonly }
 954 attributes #2 = { nounwind readnone }