test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.ll

   1 ; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SI,SIVI,PRT %s
   2 ; RUN: llc -march=amdgcn -mcpu=fiji  -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI,SIVI,PRT %s
   3 ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX900,PRT %s
   4 ; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-enable-prt-strict-null -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX900,NOPRT %s
   5
   6 ; GCN-LABEL: {{^}}load_1d:
   7 ; GCN: image_load v[0:3], v0, s[0:7] dmask:0xf unorm{{$}}
   8 define amdgpu_ps <4 x float> @load_1d(<8 x i32> inreg %rsrc, i32 %s) {
   9 main_body:
  10   %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
  11   ret <4 x float> %v
  12 }
  13
  14 ; GCN-LABEL: {{^}}load_1d_tfe:
  15 ; PRT: v_mov_b32_e32 v0, 0
  16 ; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
  17 ; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
  18 ; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
  19 ; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
  20 ; NOPRT: v_mov_b32_e32 v4, 0
  21 ; NOPRT-NOT: v_mov_b32_e32 v0
  22 ; NOPRT-NOT: v_mov_b32_e32 v1
  23 ; NOPRT-NOT: v_mov_b32_e32 v2
  24 ; NOPRT-NOT: v_mov_b32_e32 v3
  25 ; GCN: image_load v[0:7], v{{[0-9]+}}, s[0:7] dmask:0xf unorm tfe{{$}}
  26 ; SIVI: buffer_store_dword v4, off, s[8:11], 0
  27 ; GFX900: global_store_dword v[{{[0-9]+:[0-9]+}}], v4
  28 define amdgpu_ps <4 x float> @load_1d_tfe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s) {
  29 main_body:
  30   %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.1d.v4f32i32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 1, i32 0)
  31   %v.vec = extractvalue {<4 x float>, i32} %v, 0
  32   %v.err = extractvalue {<4 x float>, i32} %v, 1
  33   store i32 %v.err, i32 addrspace(1)* %out, align 4
  34   ret <4 x float> %v.vec
  35 }
  36
  37 ; GCN-LABEL: {{^}}load_1d_lwe:
  38 ; PRT: v_mov_b32_e32 v0, 0
  39 ; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
  40 ; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
  41 ; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
  42 ; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
  43 ; NOPRT: v_mov_b32_e32 v4, 0
  44 ; NOPRT-NOT: v_mov_b32_e32 v0
  45 ; NOPRT-NOT: v_mov_b32_e32 v1
  46 ; NOPRT-NOT: v_mov_b32_e32 v2
  47 ; NOPRT-NOT: v_mov_b32_e32 v3
  48 ; GCN: image_load v[0:7], v{{[0-9]+}}, s[0:7] dmask:0xf unorm lwe{{$}}
  49 ; SIVI: buffer_store_dword v4, off, s[8:11], 0
  50 ; GFX900: global_store_dword v[{{[0-9]+:[0-9]+}}], v4
  51 define amdgpu_ps <4 x float> @load_1d_lwe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s) {
  52 main_body:
  53   %v = call {<4 x float>, i32} @llvm.amdgcn.image.load.1d.v4f32i32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 2, i32 0)
  54   %v.vec = extractvalue {<4 x float>, i32} %v, 0
  55   %v.err = extractvalue {<4 x float>, i32} %v, 1
  56   store i32 %v.err, i32 addrspace(1)* %out, align 4
  57   ret <4 x float> %v.vec
  58 }
  59
  60 ; GCN-LABEL: {{^}}load_2d:
  61 ; GCN: image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm{{$}}
  62 define amdgpu_ps <4 x float> @load_2d(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
  63 main_body:
  64   %v = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
  65   ret <4 x float> %v
  66 }
  67
  68 ; GCN-LABEL: {{^}}load_2d_tfe:
  69 ; PRT: v_mov_b32_e32 v0, 0
  70 ; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
  71 ; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
  72 ; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
  73 ; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
  74 ; NOPRT: v_mov_b32_e32 v4, 0
  75 ; NOPRT-NOT: v_mov_b32_e32 v0
  76 ; NOPRT-NOT: v_mov_b32_e32 v1
  77 ; NOPRT-NOT: v_mov_b32_e32 v2
  78 ; NOPRT-NOT: v_mov_b32_e32 v3
  79 ; GCN: image_load v[0:7], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0xf unorm tfe{{$}}
  80 ; SIVI: buffer_store_dword v4, off, s[8:11], 0
  81 ; GFX900: global_store_dword v[{{[0-9]+:[0-9]+}}], v4
  82 define amdgpu_ps <4 x float> @load_2d_tfe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t) {
  83 main_body:
  84   %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.2d.v4f32i32.i32(i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
  85   %v.vec = extractvalue {<4 x float>, i32} %v, 0
  86   %v.err = extractvalue {<4 x float>, i32} %v, 1
  87   store i32 %v.err, i32 addrspace(1)* %out, align 4
  88   ret <4 x float> %v.vec
  89 }
  90
  91 ; GCN-LABEL: {{^}}load_3d:
  92 ; GCN: image_load v[0:3], v[0:3], s[0:7] dmask:0xf unorm{{$}}
  93 define amdgpu_ps <4 x float> @load_3d(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %r) {
  94 main_body:
  95   %v = call <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %r, <8 x i32> %rsrc, i32 0, i32 0)
  96   ret <4 x float> %v
  97 }
  98
  99 ; GCN-LABEL: {{^}}load_3d_tfe_lwe:
 100 ; PRT: v_mov_b32_e32 v0, 0
 101 ; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
 102 ; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
 103 ; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
 104 ; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
 105 ; NOPRT: v_mov_b32_e32 v4, 0
 106 ; NOPRT-NOT: v_mov_b32_e32 v0
 107 ; NOPRT-NOT: v_mov_b32_e32 v1
 108 ; NOPRT-NOT: v_mov_b32_e32 v2
 109 ; NOPRT-NOT: v_mov_b32_e32 v3
 110 ; GCN: image_load v[0:7], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0xf unorm tfe lwe{{$}}
 111 ; SIVI: buffer_store_dword v4, off, s[8:11], 0
 112 ; GFX900: global_store_dword v[{{[0-9]+:[0-9]+}}], v4
 113 define amdgpu_ps <4 x float> @load_3d_tfe_lwe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t, i32 %r) {
 114 main_body:
 115   %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.3d.v4f32i32.i32(i32 15, i32 %s, i32 %t, i32 %r, <8 x i32> %rsrc, i32 3, i32 0)
 116   %v.vec = extractvalue {<4 x float>, i32} %v, 0
 117   %v.err = extractvalue {<4 x float>, i32} %v, 1
 118   store i32 %v.err, i32 addrspace(1)* %out, align 4
 119   ret <4 x float> %v.vec
 120 }
 121
 122 ; GCN-LABEL: {{^}}load_cube:
 123 ; GCN: image_load v[0:3], v[0:3], s[0:7] dmask:0xf unorm da{{$}}
 124 define amdgpu_ps <4 x float> @load_cube(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %slice) {
 125 main_body:
 126   %v = call <4 x float> @llvm.amdgcn.image.load.cube.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0)
 127   ret <4 x float> %v
 128 }
 129
 130 ; GCN-LABEL: {{^}}load_cube_lwe:
 131 ; PRT: v_mov_b32_e32 v0, 0
 132 ; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
 133 ; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
 134 ; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
 135 ; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
 136 ; NOPRT: v_mov_b32_e32 v4, 0
 137 ; NOPRT-NOT: v_mov_b32_e32 v0
 138 ; NOPRT-NOT: v_mov_b32_e32 v1
 139 ; NOPRT-NOT: v_mov_b32_e32 v2
 140 ; NOPRT-NOT: v_mov_b32_e32 v3
 141 ; GCN: image_load v[0:7], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0xf unorm lwe da{{$}}
 142 ; SIVI: buffer_store_dword v4, off, s[8:11], 0
 143 ; GFX900: global_store_dword v[{{[0-9]+:[0-9]+}}], v4
 144 define amdgpu_ps <4 x float> @load_cube_lwe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t, i32 %slice) {
 145 main_body:
 146   %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.cube.v4f32i32.i32(i32 15, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 2, i32 0)
 147   %v.vec = extractvalue {<4 x float>, i32} %v, 0
 148   %v.err = extractvalue {<4 x float>, i32} %v, 1
 149   store i32 %v.err, i32 addrspace(1)* %out, align 4
 150   ret <4 x float> %v.vec
 151 }
 152
 153 ; GCN-LABEL: {{^}}load_1darray:
 154 ; GCN: image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm da{{$}}
 155 define amdgpu_ps <4 x float> @load_1darray(<8 x i32> inreg %rsrc, i32 %s, i32 %slice) {
 156 main_body:
 157   %v = call <4 x float> @llvm.amdgcn.image.load.1darray.v4f32.i32(i32 15, i32 %s, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0)
 158   ret <4 x float> %v
 159 }
 160
 161 ; GCN-LABEL: {{^}}load_1darray_tfe:
 162 ; PRT: v_mov_b32_e32 v0, 0
 163 ; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
 164 ; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
 165 ; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
 166 ; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
 167 ; NOPRT: v_mov_b32_e32 v4, 0
 168 ; NOPRT-NOT: v_mov_b32_e32 v0
 169 ; NOPRT-NOT: v_mov_b32_e32 v1
 170 ; NOPRT-NOT: v_mov_b32_e32 v2
 171 ; NOPRT-NOT: v_mov_b32_e32 v3
 172 ; GCN: image_load v[0:7], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0xf unorm tfe da{{$}}
 173 ; SIVI: buffer_store_dword v4, off, s[8:11], 0
 174 ; GFX900: global_store_dword v[{{[0-9]+:[0-9]+}}], v4
 175 define amdgpu_ps <4 x float> @load_1darray_tfe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %slice) {
 176 main_body:
 177   %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.1darray.v4f32i32.i32(i32 15, i32 %s, i32 %slice, <8 x i32> %rsrc, i32 1, i32 0)
 178   %v.vec = extractvalue {<4 x float>, i32} %v, 0
 179   %v.err = extractvalue {<4 x float>, i32} %v, 1
 180   store i32 %v.err, i32 addrspace(1)* %out, align 4
 181   ret <4 x float> %v.vec
 182 }
 183
 184 ; GCN-LABEL: {{^}}load_2darray:
 185 ; GCN: image_load v[0:3], v[0:3], s[0:7] dmask:0xf unorm da{{$}}
 186 define amdgpu_ps <4 x float> @load_2darray(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %slice) {
 187 main_body:
 188   %v = call <4 x float> @llvm.amdgcn.image.load.2darray.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0)
 189   ret <4 x float> %v
 190 }
 191
 192 ; GCN-LABEL: {{^}}load_2darray_lwe:
 193 ; PRT: v_mov_b32_e32 v0, 0
 194 ; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
 195 ; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
 196 ; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
 197 ; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
 198 ; NOPRT: v_mov_b32_e32 v4, 0
 199 ; NOPRT-NOT: v_mov_b32_e32 v0
 200 ; NOPRT-NOT: v_mov_b32_e32 v1
 201 ; NOPRT-NOT: v_mov_b32_e32 v2
 202 ; NOPRT-NOT: v_mov_b32_e32 v3
 203 ; GCN: image_load v[0:7], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0xf unorm lwe da{{$}}
 204 ; SIVI: buffer_store_dword v4, off, s[8:11], 0
 205 ; GFX900: global_store_dword v[{{[0-9]+:[0-9]+}}], v4
 206 define amdgpu_ps <4 x float> @load_2darray_lwe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t, i32 %slice) {
 207 main_body:
 208   %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.2darray.v4f32i32.i32(i32 15, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 2, i32 0)
 209   %v.vec = extractvalue {<4 x float>, i32} %v, 0
 210   %v.err = extractvalue {<4 x float>, i32} %v, 1
 211   store i32 %v.err, i32 addrspace(1)* %out, align 4
 212   ret <4 x float> %v.vec
 213 }
 214
 215 ; GCN-LABEL: {{^}}load_2dmsaa:
 216 ; GCN: image_load v[0:3], v[0:3], s[0:7] dmask:0xf unorm{{$}}
 217 define amdgpu_ps <4 x float> @load_2dmsaa(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %fragid) {
 218 main_body:
 219   %v = call <4 x float> @llvm.amdgcn.image.load.2dmsaa.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
 220   ret <4 x float> %v
 221 }
 222
 223 ; GCN-LABEL: {{^}}load_2dmsaa_both:
 224 ; PRT: v_mov_b32_e32 v0, 0
 225 ; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
 226 ; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
 227 ; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
 228 ; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
 229 ; NOPRT: v_mov_b32_e32 v4, 0
 230 ; NOPRT-NOT: v_mov_b32_e32 v0
 231 ; NOPRT-NOT: v_mov_b32_e32 v1
 232 ; NOPRT-NOT: v_mov_b32_e32 v2
 233 ; NOPRT-NOT: v_mov_b32_e32 v3
 234 ; GCN: image_load v[0:7], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0xf unorm tfe lwe{{$}}
 235 ; SIVI: buffer_store_dword v4, off, s[8:11], 0
 236 ; GFX900: global_store_dword v[{{[0-9]+:[0-9]+}}], v4
 237 define amdgpu_ps <4 x float> @load_2dmsaa_both(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t, i32 %fragid) {
 238 main_body:
 239   %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.2dmsaa.v4f32i32.i32(i32 15, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 3, i32 0)
 240   %v.vec = extractvalue {<4 x float>, i32} %v, 0
 241   %v.err = extractvalue {<4 x float>, i32} %v, 1
 242   store i32 %v.err, i32 addrspace(1)* %out, align 4
 243   ret <4 x float> %v.vec
 244 }
 245
 246 ; GCN-LABEL: {{^}}load_2darraymsaa:
 247 ; GCN: image_load v[0:3], v[0:3], s[0:7] dmask:0xf unorm da{{$}}
 248 define amdgpu_ps <4 x float> @load_2darraymsaa(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %slice, i32 %fragid) {
 249 main_body:
 250   %v = call <4 x float> @llvm.amdgcn.image.load.2darraymsaa.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %slice, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
 251   ret <4 x float> %v
 252 }
 253
 254 ; GCN-LABEL: {{^}}load_2darraymsaa_tfe:
 255 ; PRT: v_mov_b32_e32 v0, 0
 256 ; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
 257 ; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
 258 ; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
 259 ; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
 260 ; NOPRT: v_mov_b32_e32 v4, 0
 261 ; NOPRT-NOT: v_mov_b32_e32 v0
 262 ; NOPRT-NOT: v_mov_b32_e32 v1
 263 ; NOPRT-NOT: v_mov_b32_e32 v2
 264 ; NOPRT-NOT: v_mov_b32_e32 v3
 265 ; GCN: image_load v[0:7], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0xf unorm tfe da{{$}}
 266 ; SIVI: buffer_store_dword v4, off, s[8:11], 0
 267 ; GFX900: global_store_dword v[{{[0-9]+:[0-9]+}}], v4
 268 define amdgpu_ps <4 x float> @load_2darraymsaa_tfe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t, i32 %slice, i32 %fragid) {
 269 main_body:
 270   %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.2darraymsaa.v4f32i32.i32(i32 15, i32 %s, i32 %t, i32 %slice, i32 %fragid, <8 x i32> %rsrc, i32 1, i32 0)
 271   %v.vec = extractvalue {<4 x float>, i32} %v, 0
 272   %v.err = extractvalue {<4 x float>, i32} %v, 1
 273   store i32 %v.err, i32 addrspace(1)* %out, align 4
 274   ret <4 x float> %v.vec
 275 }
 276
 277 ; GCN-LABEL: {{^}}load_mip_1d:
 278 ; GCN: image_load_mip v[0:3], v[0:1], s[0:7] dmask:0xf unorm{{$}}
 279 define amdgpu_ps <4 x float> @load_mip_1d(<8 x i32> inreg %rsrc, i32 %s, i32 %mip) {
 280 main_body:
 281   %v = call <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32(i32 15, i32 %s, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
 282   ret <4 x float> %v
 283 }
 284
 285 ; GCN-LABEL: {{^}}load_mip_1d_lwe:
 286 ; PRT: v_mov_b32_e32 v0, 0
 287 ; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
 288 ; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
 289 ; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
 290 ; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
 291 ; NOPRT: v_mov_b32_e32 v4, 0
 292 ; NOPRT-NOT: v_mov_b32_e32 v0
 293 ; NOPRT-NOT: v_mov_b32_e32 v1
 294 ; NOPRT-NOT: v_mov_b32_e32 v2
 295 ; NOPRT-NOT: v_mov_b32_e32 v3
 296 ; GCN: image_load_mip v[0:7], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0xf unorm lwe{{$}}
 297 ; SIVI: buffer_store_dword v4, off, s[8:11], 0
 298 ; GFX900: global_store_dword v[{{[0-9]+:[0-9]+}}], v4
 299 define amdgpu_ps <4 x float> @load_mip_1d_lwe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %mip) {
 300 main_body:
 301   %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.mip.1d.v4f32i32.i32(i32 15, i32 %s, i32 %mip, <8 x i32> %rsrc, i32 2, i32 0)
 302   %v.vec = extractvalue {<4 x float>, i32} %v, 0
 303   %v.err = extractvalue {<4 x float>, i32} %v, 1
 304   store i32 %v.err, i32 addrspace(1)* %out, align 4
 305   ret <4 x float> %v.vec
 306 }
 307
 308 ; GCN-LABEL: {{^}}load_mip_2d:
 309 ; GCN: image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf unorm{{$}}
 310 define amdgpu_ps <4 x float> @load_mip_2d(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %mip) {
 311 main_body:
 312   %v = call <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
 313   ret <4 x float> %v
 314 }
 315
 316 ; GCN-LABEL: {{^}}load_mip_2d_tfe:
 317 ; PRT: v_mov_b32_e32 v0, 0
 318 ; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
 319 ; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
 320 ; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
 321 ; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
 322 ; NOPRT: v_mov_b32_e32 v4, 0
 323 ; NOPRT-NOT: v_mov_b32_e32 v0
 324 ; NOPRT-NOT: v_mov_b32_e32 v1
 325 ; NOPRT-NOT: v_mov_b32_e32 v2
 326 ; NOPRT-NOT: v_mov_b32_e32 v3
 327 ; GCN: image_load_mip v[0:7], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0xf unorm tfe{{$}}
 328 ; SIVI: buffer_store_dword v4, off, s[8:11], 0
 329 ; GFX900: global_store_dword v[{{[0-9]+:[0-9]+}}], v4
 330 define amdgpu_ps <4 x float> @load_mip_2d_tfe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t, i32 %mip) {
 331 main_body:
 332   %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.mip.2d.v4f32i32.i32(i32 15, i32 %s, i32 %t, i32 %mip, <8 x i32> %rsrc, i32 1, i32 0)
 333   %v.vec = extractvalue {<4 x float>, i32} %v, 0
 334   %v.err = extractvalue {<4 x float>, i32} %v, 1
 335   store i32 %v.err, i32 addrspace(1)* %out, align 4
 336   ret <4 x float> %v.vec
 337 }
 338
 339 ; Make sure that error flag is returned even with dmask 0
 340 ; GCN-LABEL: {{^}}load_1d_V2_tfe_dmask0:
 341 ; GCN: v_mov_b32_e32 v1, 0
 342 ; PRT-DAG: v_mov_b32_e32 v2, v1
 343 ; PRT: image_load v[1:2], v0, s[0:7] dmask:0x1 unorm tfe{{$}}
 344 ; NOPRT-NOT: v_mov_b32_e32 v1
 345 ; NOPRT: image_load v[0:1], v0, s[0:7] dmask:0x1 unorm tfe{{$}}
 346 define amdgpu_ps float @load_1d_V2_tfe_dmask0(<8 x i32> inreg %rsrc, i32 %s) {
 347 main_body:
 348   %v = call {<2 x float>,i32} @llvm.amdgcn.image.load.1d.v2f32i32.i32(i32 0, i32 %s, <8 x i32> %rsrc, i32 1, i32 0)
 349   %v.err = extractvalue {<2 x float>, i32} %v, 1
 350   %vv = bitcast i32 %v.err to float
 351   ret float %vv
 352 }
 353
 354 ; GCN-LABEL: {{^}}load_1d_V1_tfe_dmask0:
 355 ; GCN: v_mov_b32_e32 v1, 0
 356 ; PRT-DAG: v_mov_b32_e32 v2, v1
 357 ; PRT: image_load v[1:2], v0, s[0:7] dmask:0x1 unorm tfe{{$}}
 358 ; NOPRT-NOT: v_mov_b32_e32 v1
 359 ; NOPRT: image_load v[0:1], v0, s[0:7] dmask:0x1 unorm tfe{{$}}
 360 define amdgpu_ps float @load_1d_V1_tfe_dmask0(<8 x i32> inreg %rsrc, i32 %s) {
 361 main_body:
 362   %v = call {float,i32} @llvm.amdgcn.image.load.1d.f32i32.i32(i32 0, i32 %s, <8 x i32> %rsrc, i32 1, i32 0)
 363   %v.err = extractvalue {float, i32} %v, 1
 364   %vv = bitcast i32 %v.err to float
 365   ret float %vv
 366 }
 367
 368 ; GCN-LABEL: {{^}}load_mip_2d_tfe_dmask0:
 369 ; GCN: v_mov_b32_e32 v3, 0
 370 ; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v3
 371 ; PRT: image_load_mip v[3:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0x1 unorm tfe{{$}}
 372 ; NOPRT-NOT: v_mov_b32_e32 v2
 373 ; NOPRT: image_load_mip v[2:3], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0x1 unorm tfe{{$}}
 374 define amdgpu_ps float @load_mip_2d_tfe_dmask0(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %mip) {
 375 main_body:
 376   %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.mip.2d.v4f32i32.i32(i32 0, i32 %s, i32 %t, i32 %mip, <8 x i32> %rsrc, i32 1, i32 0)
 377   %v.err = extractvalue {<4 x float>, i32} %v, 1
 378   %vv = bitcast i32 %v.err to float
 379   ret float %vv
 380 }
 381
 382 ; Do not make dmask 0 even if no result (other than tfe) is used.
 383 ; GCN-LABEL: {{^}}load_mip_2d_tfe_nouse:
 384 ; GCN: v_mov_b32_e32 v3, 0
 385 ; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v3
 386 ; PRT: image_load_mip v[3:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0x1 unorm tfe{{$}}
 387 ; NOPRT-NOT: v_mov_b32_e32 v2
 388 ; NOPRT: image_load_mip v[2:3], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0x1 unorm tfe{{$}}
 389 define amdgpu_ps float @load_mip_2d_tfe_nouse(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %mip) {
 390 main_body:
 391   %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.mip.2d.v4f32i32.i32(i32 15, i32 %s, i32 %t, i32 %mip, <8 x i32> %rsrc, i32 1, i32 0)
 392   %v.err = extractvalue {<4 x float>, i32} %v, 1
 393   %vv = bitcast i32 %v.err to float
 394   ret float %vv
 395 }
 396
 397 ; GCN-LABEL: {{^}}load_mip_2d_tfe_nouse_V2:
 398 ; GCN: v_mov_b32_e32 v3, 0
 399 ; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v3
 400 ; PRT: image_load_mip v[3:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0x1 unorm tfe{{$}}
 401 ; NOPRT-NOT: v_mov_b32_e32 v2
 402 ; NOPRT: image_load_mip v[2:3], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0x1 unorm tfe{{$}}
 403 define amdgpu_ps float @load_mip_2d_tfe_nouse_V2(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %mip) {
 404 main_body:
 405   %v = call {<2 x float>,i32} @llvm.amdgcn.image.load.mip.2d.v2f32i32.i32(i32 6, i32 %s, i32 %t, i32 %mip, <8 x i32> %rsrc, i32 1, i32 0)
 406   %v.err = extractvalue {<2 x float>, i32} %v, 1
 407   %vv = bitcast i32 %v.err to float
 408   ret float %vv
 409 }
 410
 411 ; GCN-LABEL: {{^}}load_mip_2d_tfe_nouse_V1:
 412 ; GCN: v_mov_b32_e32 v3, 0
 413 ; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v3
 414 ; PRT: image_load_mip v[3:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0x2 unorm tfe{{$}}
 415 ; NOPRT-NOT: v_mov_b32_e32 v2
 416 ; NOPRT: image_load_mip v[2:3], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0x2 unorm tfe{{$}}
 417 define amdgpu_ps float @load_mip_2d_tfe_nouse_V1(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %mip) {
 418 main_body:
 419   %v = call {float, i32} @llvm.amdgcn.image.load.mip.2d.f32i32.i32(i32 2, i32 %s, i32 %t, i32 %mip, <8 x i32> %rsrc, i32 1, i32 0)
 420   %v.err = extractvalue {float, i32} %v, 1
 421   %vv = bitcast i32 %v.err to float
 422   ret float %vv
 423 }
 424
 425 ; Check for dmask being materially smaller than return type
 426 ; GCN-LABEL: {{^}}load_1d_tfe_V4_dmask3:
 427 ; PRT: v_mov_b32_e32 v0, 0
 428 ; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
 429 ; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
 430 ; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
 431 ; NOPRT: v_mov_b32_e32 v3, 0
 432 ; NOPRT-NOT: v_mov_b32_e32 v0
 433 ; NOPRT-NOT: v_mov_b32_e32 v1
 434 ; NOPRT-NOT: v_mov_b32_e32 v2
 435 ; GCN: image_load v[0:3], v{{[0-9]+}}, s[0:7] dmask:0x7 unorm tfe{{$}}
 436 ; SIVI: buffer_store_dword v3, off, s[8:11], 0
 437 ; GFX900: global_store_dword v[{{[0-9]+:[0-9]+}}], v3
 438 define amdgpu_ps <4 x float> @load_1d_tfe_V4_dmask3(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s) {
 439 main_body:
 440   %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.1d.v4f32i32.i32(i32 7, i32 %s, <8 x i32> %rsrc, i32 1, i32 0)
 441   %v.vec = extractvalue {<4 x float>, i32} %v, 0
 442   %v.err = extractvalue {<4 x float>, i32} %v, 1
 443   store i32 %v.err, i32 addrspace(1)* %out, align 4
 444   ret <4 x float> %v.vec
 445 }
 446
 447 ; GCN-LABEL: {{^}}load_1d_tfe_V4_dmask2:
 448 ; PRT: v_mov_b32_e32 v0, 0
 449 ; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
 450 ; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
 451 ; NOPRT: v_mov_b32_e32 v2, 0
 452 ; NOPRT-NOT: v_mov_b32_e32 v0
 453 ; NOPRT-NOT: v_mov_b32_e32 v1
 454 ; GCN: image_load v[0:3], v{{[0-9]+}}, s[0:7] dmask:0x6 unorm tfe{{$}}
 455 ; SIVI: buffer_store_dword v2, off, s[8:11], 0
 456 ; GFX900: global_store_dword v[{{[0-9]+:[0-9]+}}], v2
 457 define amdgpu_ps <4 x float> @load_1d_tfe_V4_dmask2(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s) {
 458 main_body:
 459   %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.1d.v4f32i32.i32(i32 6, i32 %s, <8 x i32> %rsrc, i32 1, i32 0)
 460   %v.vec = extractvalue {<4 x float>, i32} %v, 0
 461   %v.err = extractvalue {<4 x float>, i32} %v, 1
 462   store i32 %v.err, i32 addrspace(1)* %out, align 4
 463   ret <4 x float> %v.vec
 464 }
 465
 466 ; GCN-LABEL: {{^}}load_1d_tfe_V4_dmask1:
 467 ; PRT: v_mov_b32_e32 v0, 0
 468 ; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
 469 ; NOPRT: v_mov_b32_e32 v1, 0
 470 ; NOPRT-NOT: v_mov_b32_e32 v0
 471 ; GCN: image_load v[0:1], v{{[0-9]+}}, s[0:7] dmask:0x8 unorm tfe{{$}}
 472 ; SIVI: buffer_store_dword v1, off, s[8:11], 0
 473 ; GFX900: global_store_dword v[{{[0-9]+:[0-9]+}}], v1
 474 define amdgpu_ps <4 x float> @load_1d_tfe_V4_dmask1(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s) {
 475 main_body:
 476   %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.1d.v4f32i32.i32(i32 8, i32 %s, <8 x i32> %rsrc, i32 1, i32 0)
 477   %v.vec = extractvalue {<4 x float>, i32} %v, 0
 478   %v.err = extractvalue {<4 x float>, i32} %v, 1
 479   store i32 %v.err, i32 addrspace(1)* %out, align 4
 480   ret <4 x float> %v.vec
 481 }
 482
 483 ; GCN-LABEL: {{^}}load_1d_tfe_V2_dmask1:
 484 ; PRT: v_mov_b32_e32 v0, 0
 485 ; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
 486 ; NOPRT: v_mov_b32_e32 v1, 0
 487 ; NOPRT-NOT: v_mov_b32_e32 v0
 488 ; GCN: image_load v[0:1], v{{[0-9]+}}, s[0:7] dmask:0x8 unorm tfe{{$}}
 489 ; SIVI: buffer_store_dword v1, off, s[8:11], 0
 490 ; GFX900: global_store_dword v[{{[0-9]+:[0-9]+}}], v1
 491 define amdgpu_ps <2 x float> @load_1d_tfe_V2_dmask1(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s) {
 492 main_body:
 493   %v = call {<2 x float>,i32} @llvm.amdgcn.image.load.1d.v2f32i32.i32(i32 8, i32 %s, <8 x i32> %rsrc, i32 1, i32 0)
 494   %v.vec = extractvalue {<2 x float>, i32} %v, 0
 495   %v.err = extractvalue {<2 x float>, i32} %v, 1
 496   store i32 %v.err, i32 addrspace(1)* %out, align 4
 497   ret <2 x float> %v.vec
 498 }
 499
 500
 501 ; GCN-LABEL: {{^}}load_mip_3d:
 502 ; GCN: image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf unorm{{$}}
 503 define amdgpu_ps <4 x float> @load_mip_3d(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %r, i32 %mip) {
 504 main_body:
 505   %v = call <4 x float> @llvm.amdgcn.image.load.mip.3d.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %r, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
 506   ret <4 x float> %v
 507 }
 508
 509 ; GCN-LABEL: {{^}}load_mip_cube:
 510 ; GCN: image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf unorm da{{$}}
 511 define amdgpu_ps <4 x float> @load_mip_cube(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %slice, i32 %mip) {
 512 main_body:
 513   %v = call <4 x float> @llvm.amdgcn.image.load.mip.cube.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %slice, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
 514   ret <4 x float> %v
 515 }
 516
 517 ; GCN-LABEL: {{^}}load_mip_1darray:
 518 ; GCN: image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf unorm da{{$}}
 519 define amdgpu_ps <4 x float> @load_mip_1darray(<8 x i32> inreg %rsrc, i32 %s, i32 %slice, i32 %mip) {
 520 main_body:
 521   %v = call <4 x float> @llvm.amdgcn.image.load.mip.1darray.v4f32.i32(i32 15, i32 %s, i32 %slice, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
 522   ret <4 x float> %v
 523 }
 524
 525 ; GCN-LABEL: {{^}}load_mip_2darray:
 526 ; GCN: image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf unorm da{{$}}
 527 define amdgpu_ps <4 x float> @load_mip_2darray(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %slice, i32 %mip) {
 528 main_body:
 529   %v = call <4 x float> @llvm.amdgcn.image.load.mip.2darray.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %slice, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
 530   ret <4 x float> %v
 531 }
 532
 533 ; GCN-LABEL: {{^}}store_1d:
 534 ; GCN: image_store v[0:3], v4, s[0:7] dmask:0xf unorm{{$}}
 535 define amdgpu_ps void @store_1d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s) {
 536 main_body:
 537   call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
 538   ret void
 539 }
 540
 541 ; GCN-LABEL: {{^}}store_2d:
 542 ; GCN: image_store v[0:3], v[4:5], s[0:7] dmask:0xf unorm{{$}}
 543 define amdgpu_ps void @store_2d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t) {
 544 main_body:
 545   call void @llvm.amdgcn.image.store.2d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
 546   ret void
 547 }
 548
 549 ; GCN-LABEL: {{^}}store_3d:
 550 ; GCN: image_store v[0:3], v[4:7], s[0:7] dmask:0xf unorm{{$}}
 551 define amdgpu_ps void @store_3d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %r) {
 552 main_body:
 553   call void @llvm.amdgcn.image.store.3d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %r, <8 x i32> %rsrc, i32 0, i32 0)
 554   ret void
 555 }
 556
 557 ; GCN-LABEL: {{^}}store_cube:
 558 ; GCN: image_store v[0:3], v[4:7], s[0:7] dmask:0xf unorm da{{$}}
 559 define amdgpu_ps void @store_cube(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %slice) {
 560 main_body:
 561   call void @llvm.amdgcn.image.store.cube.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0)
 562   ret void
 563 }
 564
 565 ; GCN-LABEL: {{^}}store_1darray:
 566 ; GCN: image_store v[0:3], v[4:5], s[0:7] dmask:0xf unorm da{{$}}
 567 define amdgpu_ps void @store_1darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %slice) {
 568 main_body:
 569   call void @llvm.amdgcn.image.store.1darray.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0)
 570   ret void
 571 }
 572
 573 ; GCN-LABEL: {{^}}store_2darray:
 574 ; GCN: image_store v[0:3], v[4:7], s[0:7] dmask:0xf unorm da{{$}}
 575 define amdgpu_ps void @store_2darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %slice) {
 576 main_body:
 577   call void @llvm.amdgcn.image.store.2darray.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0)
 578   ret void
 579 }
 580
 581 ; GCN-LABEL: {{^}}store_2dmsaa:
 582 ; GCN: image_store v[0:3], v[4:7], s[0:7] dmask:0xf unorm{{$}}
 583 define amdgpu_ps void @store_2dmsaa(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %fragid) {
 584 main_body:
 585   call void @llvm.amdgcn.image.store.2dmsaa.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
 586   ret void
 587 }
 588
 589 ; GCN-LABEL: {{^}}store_2darraymsaa:
 590 ; GCN: image_store v[0:3], v[4:7], s[0:7] dmask:0xf unorm da{{$}}
 591 define amdgpu_ps void @store_2darraymsaa(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %slice, i32 %fragid) {
 592 main_body:
 593   call void @llvm.amdgcn.image.store.2darraymsaa.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %slice, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
 594   ret void
 595 }
 596
 597 ; GCN-LABEL: {{^}}store_mip_1d:
 598 ; GCN: image_store_mip v[0:3], v[4:5], s[0:7] dmask:0xf unorm{{$}}
 599 define amdgpu_ps void @store_mip_1d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %mip) {
 600 main_body:
 601   call void @llvm.amdgcn.image.store.mip.1d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
 602   ret void
 603 }
 604
 605 ; GCN-LABEL: {{^}}store_mip_2d:
 606 ; GCN: image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf unorm{{$}}
 607 define amdgpu_ps void @store_mip_2d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %mip) {
 608 main_body:
 609   call void @llvm.amdgcn.image.store.mip.2d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
 610   ret void
 611 }
 612
 613 ; GCN-LABEL: {{^}}store_mip_3d:
 614 ; GCN: image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf unorm{{$}}
 615 define amdgpu_ps void @store_mip_3d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %r, i32 %mip) {
 616 main_body:
 617   call void @llvm.amdgcn.image.store.mip.3d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %r, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
 618   ret void
 619 }
 620
 621 ; GCN-LABEL: {{^}}store_mip_cube:
 622 ; GCN: image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf unorm da{{$}}
 623 define amdgpu_ps void @store_mip_cube(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %slice, i32 %mip) {
 624 main_body:
 625   call void @llvm.amdgcn.image.store.mip.cube.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %slice, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
 626   ret void
 627 }
 628
 629 ; GCN-LABEL: {{^}}store_mip_1darray:
 630 ; GCN: image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf unorm da{{$}}
 631 define amdgpu_ps void @store_mip_1darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %slice, i32 %mip) {
 632 main_body:
 633   call void @llvm.amdgcn.image.store.mip.1darray.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %slice, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
 634   ret void
 635 }
 636
 637 ; GCN-LABEL: {{^}}store_mip_2darray:
 638 ; GCN: image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf unorm da{{$}}
 639 define amdgpu_ps void @store_mip_2darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %slice, i32 %mip) {
 640 main_body:
 641   call void @llvm.amdgcn.image.store.mip.2darray.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %slice, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
 642   ret void
 643 }
 644
 645 ; GCN-LABEL: {{^}}getresinfo_1d:
 646 ; GCN: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm{{$}}
 647 define amdgpu_ps <4 x float> @getresinfo_1d(<8 x i32> inreg %rsrc, i32 %mip) {
 648 main_body:
 649   %v = call <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i32(i32 15, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
 650   ret <4 x float> %v
 651 }
 652
 653 ; GCN-LABEL: {{^}}getresinfo_2d:
 654 ; GCN: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm{{$}}
 655 define amdgpu_ps <4 x float> @getresinfo_2d(<8 x i32> inreg %rsrc, i32 %mip) {
 656 main_body:
 657   %v = call <4 x float> @llvm.amdgcn.image.getresinfo.2d.v4f32.i32(i32 15, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
 658   ret <4 x float> %v
 659 }
 660
 661 ; GCN-LABEL: {{^}}getresinfo_3d:
 662 ; GCN: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm{{$}}
 663 define amdgpu_ps <4 x float> @getresinfo_3d(<8 x i32> inreg %rsrc, i32 %mip) {
 664 main_body:
 665   %v = call <4 x float> @llvm.amdgcn.image.getresinfo.3d.v4f32.i32(i32 15, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
 666   ret <4 x float> %v
 667 }
 668
 669 ; GCN-LABEL: {{^}}getresinfo_cube:
 670 ; GCN: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm da{{$}}
 671 define amdgpu_ps <4 x float> @getresinfo_cube(<8 x i32> inreg %rsrc, i32 %mip) {
 672 main_body:
 673   %v = call <4 x float> @llvm.amdgcn.image.getresinfo.cube.v4f32.i32(i32 15, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
 674   ret <4 x float> %v
 675 }
 676
 677 ; GCN-LABEL: {{^}}getresinfo_1darray:
 678 ; GCN: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm da{{$}}
 679 define amdgpu_ps <4 x float> @getresinfo_1darray(<8 x i32> inreg %rsrc, i32 %mip) {
 680 main_body:
 681   %v = call <4 x float> @llvm.amdgcn.image.getresinfo.1darray.v4f32.i32(i32 15, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
 682   ret <4 x float> %v
 683 }
 684
 685 ; GCN-LABEL: {{^}}getresinfo_2darray:
 686 ; GCN: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm da{{$}}
 687 define amdgpu_ps <4 x float> @getresinfo_2darray(<8 x i32> inreg %rsrc, i32 %mip) {
 688 main_body:
 689   %v = call <4 x float> @llvm.amdgcn.image.getresinfo.2darray.v4f32.i32(i32 15, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
 690   ret <4 x float> %v
 691 }
 692
 693 ; GCN-LABEL: {{^}}getresinfo_2dmsaa:
 694 ; GCN: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm{{$}}
 695 define amdgpu_ps <4 x float> @getresinfo_2dmsaa(<8 x i32> inreg %rsrc, i32 %mip) {
 696 main_body:
 697   %v = call <4 x float> @llvm.amdgcn.image.getresinfo.2dmsaa.v4f32.i32(i32 15, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
 698   ret <4 x float> %v
 699 }
 700
 701 ; GCN-LABEL: {{^}}getresinfo_2darraymsaa:
 702 ; GCN: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm da{{$}}
 703 define amdgpu_ps <4 x float> @getresinfo_2darraymsaa(<8 x i32> inreg %rsrc, i32 %mip) {
 704 main_body:
 705   %v = call <4 x float> @llvm.amdgcn.image.getresinfo.2darraymsaa.v4f32.i32(i32 15, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
 706   ret <4 x float> %v
 707 }
 708
 709 ; GCN-LABEL: {{^}}load_1d_V1:
 710 ; GCN: image_load v0, v0, s[0:7] dmask:0x8 unorm{{$}}
 711 define amdgpu_ps float @load_1d_V1(<8 x i32> inreg %rsrc, i32 %s) {
 712 main_body:
 713   %v = call float @llvm.amdgcn.image.load.1d.f32.i32(i32 8, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
 714   ret float %v
 715 }
 716
 717 ; GCN-LABEL: {{^}}load_1d_V2:
 718 ; GCN: image_load v[0:1], v0, s[0:7] dmask:0x9 unorm{{$}}
 719 define amdgpu_ps <2 x float> @load_1d_V2(<8 x i32> inreg %rsrc, i32 %s) {
 720 main_body:
 721   %v = call <2 x float> @llvm.amdgcn.image.load.1d.v2f32.i32(i32 9, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
 722   ret <2 x float> %v
 723 }
 724
 725 ; GCN-LABEL: {{^}}store_1d_V1:
 726 ; GCN: image_store v0, v1, s[0:7] dmask:0x2 unorm{{$}}
 727 define amdgpu_ps void @store_1d_V1(<8 x i32> inreg %rsrc, float %vdata, i32 %s) {
 728 main_body:
 729   call void @llvm.amdgcn.image.store.1d.f32.i32(float %vdata, i32 2, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
 730   ret void
 731 }
 732
 733 ; GCN-LABEL: {{^}}store_1d_V2:
 734 ; GCN: image_store v[0:1], v2, s[0:7] dmask:0xc unorm{{$}}
 735 define amdgpu_ps void @store_1d_V2(<8 x i32> inreg %rsrc, <2 x float> %vdata, i32 %s) {
 736 main_body:
 737   call void @llvm.amdgcn.image.store.1d.v2f32.i32(<2 x float> %vdata, i32 12, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
 738   ret void
 739 }
 740
 741 ; GCN-LABEL: {{^}}load_1d_glc:
 742 ; GCN: image_load v[0:3], v0, s[0:7] dmask:0xf unorm glc{{$}}
 743 define amdgpu_ps <4 x float> @load_1d_glc(<8 x i32> inreg %rsrc, i32 %s) {
 744 main_body:
 745   %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 1)
 746   ret <4 x float> %v
 747 }
 748
 749 ; GCN-LABEL: {{^}}load_1d_slc:
 750 ; GCN: image_load v[0:3], v0, s[0:7] dmask:0xf unorm slc{{$}}
 751 define amdgpu_ps <4 x float> @load_1d_slc(<8 x i32> inreg %rsrc, i32 %s) {
 752 main_body:
 753   %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 2)
 754   ret <4 x float> %v
 755 }
 756
 757 ; GCN-LABEL: {{^}}load_1d_glc_slc:
 758 ; GCN: image_load v[0:3], v0, s[0:7] dmask:0xf unorm glc slc{{$}}
 759 define amdgpu_ps <4 x float> @load_1d_glc_slc(<8 x i32> inreg %rsrc, i32 %s) {
 760 main_body:
 761   %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 3)
 762   ret <4 x float> %v
 763 }
 764
 765 ; GCN-LABEL: {{^}}store_1d_glc:
 766 ; GCN: image_store v[0:3], v4, s[0:7] dmask:0xf unorm glc{{$}}
 767 define amdgpu_ps void @store_1d_glc(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s) {
 768 main_body:
 769   call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 1)
 770   ret void
 771 }
 772
 773 ; GCN-LABEL: {{^}}store_1d_slc:
 774 ; GCN: image_store v[0:3], v4, s[0:7] dmask:0xf unorm slc{{$}}
 775 define amdgpu_ps void @store_1d_slc(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s) {
 776 main_body:
 777   call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 2)
 778   ret void
 779 }
 780
 781 ; GCN-LABEL: {{^}}store_1d_glc_slc:
 782 ; GCN: image_store v[0:3], v4, s[0:7] dmask:0xf unorm glc slc{{$}}
 783 define amdgpu_ps void @store_1d_glc_slc(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s) {
 784 main_body:
 785   call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 3)
 786   ret void
 787 }
 788
 789 ; GCN-LABEL: {{^}}getresinfo_dmask0:
 790 ; GCN-NOT: image
 791 ; GCN: ; return to shader part epilog
 792 define amdgpu_ps <4 x float> @getresinfo_dmask0(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %mip) #0 {
 793 main_body:
 794   %r = call <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i32(i32 0, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
 795   ret <4 x float> %r
 796 }
 797
 798 ; Ideally, the register allocator would avoid the wait here
 799 ;
 800 ; GCN-LABEL: {{^}}image_store_wait:
 801 ; GCN: image_store v[0:3], v4, s[0:7] dmask:0xf unorm
 802 ; SI: s_waitcnt expcnt(0)
 803 ; GCN: image_load v[0:3], v4, s[8:15] dmask:0xf unorm
 804 ; GCN: s_waitcnt vmcnt(0)
 805 ; GCN: image_store v[0:3], v4, s[16:23] dmask:0xf unorm
 806 define amdgpu_ps void @image_store_wait(<8 x i32> inreg %arg, <8 x i32> inreg %arg1, <8 x i32> inreg %arg2, <4 x float> %arg3, i32 %arg4) #0 {
 807 main_body:
 808   call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %arg3, i32 15, i32 %arg4, <8 x i32> %arg, i32 0, i32 0)
 809   %data = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %arg4, <8 x i32> %arg1, i32 0, i32 0)
 810   call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %data, i32 15, i32 %arg4, <8 x i32> %arg2, i32 0, i32 0)
 811   ret void
 812 }
 813
 814 ; SI won't merge ds memory operations, because of the signed offset bug, so
 815 ; we only have check lines for VI.
 816 ; VI-LABEL: image_load_mmo
 817 ; VI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0
 818 ; VI: ds_write2_b32 v{{[0-9]+}}, [[ZERO]], [[ZERO]] offset1:4
 819 define amdgpu_ps float @image_load_mmo(<8 x i32> inreg %rsrc, float addrspace(3)* %lds, <2 x i32> %c) #0 {
 820   store float 0.000000e+00, float addrspace(3)* %lds
 821   %c0 = extractelement <2 x i32> %c, i32 0
 822   %c1 = extractelement <2 x i32> %c, i32 1
 823   %tex = call float @llvm.amdgcn.image.load.2d.f32.i32(i32 1, i32 %c0, i32 %c1, <8 x i32> %rsrc, i32 0, i32 0)
 824   %tmp2 = getelementptr float, float addrspace(3)* %lds, i32 4
 825   store float 0.000000e+00, float addrspace(3)* %tmp2
 826   ret float %tex
 827 }
 828
 829 declare <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #1
 830 declare {float,i32} @llvm.amdgcn.image.load.1d.f32i32.i32(i32, i32, <8 x i32>, i32, i32) #1
 831 declare {<2 x float>,i32} @llvm.amdgcn.image.load.1d.v2f32i32.i32(i32, i32, <8 x i32>, i32, i32) #1
 832 declare {<4 x float>,i32} @llvm.amdgcn.image.load.1d.v4f32i32.i32(i32, i32, <8 x i32>, i32, i32) #1
 833 declare <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
 834 declare {<4 x float>,i32} @llvm.amdgcn.image.load.2d.v4f32i32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
 835 declare <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
 836 declare {<4 x float>,i32} @llvm.amdgcn.image.load.3d.v4f32i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
 837 declare <4 x float> @llvm.amdgcn.image.load.cube.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
 838 declare {<4 x float>,i32} @llvm.amdgcn.image.load.cube.v4f32i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
 839 declare <4 x float> @llvm.amdgcn.image.load.1darray.v4f32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
 840 declare {<4 x float>,i32} @llvm.amdgcn.image.load.1darray.v4f32i32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
 841 declare <4 x float> @llvm.amdgcn.image.load.2darray.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
 842 declare {<4 x float>,i32} @llvm.amdgcn.image.load.2darray.v4f32i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
 843 declare <4 x float> @llvm.amdgcn.image.load.2dmsaa.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
 844 declare {<4 x float>,i32} @llvm.amdgcn.image.load.2dmsaa.v4f32i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
 845 declare <4 x float> @llvm.amdgcn.image.load.2darraymsaa.v4f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1
 846 declare {<4 x float>,i32} @llvm.amdgcn.image.load.2darraymsaa.v4f32i32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1
 847
 848 declare <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
 849 declare <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
 850 declare {<4 x float>,i32} @llvm.amdgcn.image.load.mip.1d.v4f32i32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
 851 declare {<4 x float>,i32} @llvm.amdgcn.image.load.mip.2d.v4f32i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
 852 declare {<2 x float>,i32} @llvm.amdgcn.image.load.mip.2d.v2f32i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
 853 declare {float,i32} @llvm.amdgcn.image.load.mip.2d.f32i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
 854 declare <4 x float> @llvm.amdgcn.image.load.mip.3d.v4f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1
 855 declare <4 x float> @llvm.amdgcn.image.load.mip.cube.v4f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1
 856 declare <4 x float> @llvm.amdgcn.image.load.mip.1darray.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
 857 declare <4 x float> @llvm.amdgcn.image.load.mip.2darray.v4f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1
 858
 859 declare void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float>, i32, i32, <8 x i32>, i32, i32) #0
 860 declare void @llvm.amdgcn.image.store.2d.v4f32.i32(<4 x float>, i32, i32, i32, <8 x i32>, i32, i32) #0
 861 declare void @llvm.amdgcn.image.store.3d.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
 862 declare void @llvm.amdgcn.image.store.cube.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
 863 declare void @llvm.amdgcn.image.store.1darray.v4f32.i32(<4 x float>, i32, i32, i32, <8 x i32>, i32, i32) #0
 864 declare void @llvm.amdgcn.image.store.2darray.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
 865 declare void @llvm.amdgcn.image.store.2dmsaa.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
 866 declare void @llvm.amdgcn.image.store.2darraymsaa.v4f32.i32(<4 x float>, i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
 867
 868 declare void @llvm.amdgcn.image.store.mip.1d.v4f32.i32(<4 x float>, i32, i32, i32, <8 x i32>, i32, i32) #0
 869 declare void @llvm.amdgcn.image.store.mip.2d.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
 870 declare void @llvm.amdgcn.image.store.mip.3d.v4f32.i32(<4 x float>, i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
 871 declare void @llvm.amdgcn.image.store.mip.cube.v4f32.i32(<4 x float>, i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
 872 declare void @llvm.amdgcn.image.store.mip.1darray.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
 873 declare void @llvm.amdgcn.image.store.mip.2darray.v4f32.i32(<4 x float>, i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
 874
 875 declare <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #2
 876 declare <4 x float> @llvm.amdgcn.image.getresinfo.2d.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #2
 877 declare <4 x float> @llvm.amdgcn.image.getresinfo.3d.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #2
 878 declare <4 x float> @llvm.amdgcn.image.getresinfo.cube.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #2
 879 declare <4 x float> @llvm.amdgcn.image.getresinfo.1darray.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #2
 880 declare <4 x float> @llvm.amdgcn.image.getresinfo.2darray.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #2
 881 declare <4 x float> @llvm.amdgcn.image.getresinfo.2dmsaa.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #2
 882 declare <4 x float> @llvm.amdgcn.image.getresinfo.2darraymsaa.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #2
 883
 884 declare float @llvm.amdgcn.image.load.1d.f32.i32(i32, i32, <8 x i32>, i32, i32) #1
 885 declare float @llvm.amdgcn.image.load.2d.f32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
 886 declare <2 x float> @llvm.amdgcn.image.load.1d.v2f32.i32(i32, i32, <8 x i32>, i32, i32) #1
 887 declare void @llvm.amdgcn.image.store.1d.f32.i32(float, i32, i32, <8 x i32>, i32, i32) #0
 888 declare void @llvm.amdgcn.image.store.1d.v2f32.i32(<2 x float>, i32, i32, <8 x i32>, i32, i32) #0
 889
 890 attributes #0 = { nounwind }
 891 attributes #1 = { nounwind readonly }
 892 attributes #2 = { nounwind readnone }