llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.load.a16.d16.ll

   1 ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s
   2 ; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10 %s
   3 ; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10 %s
   4
   5 ; GCN-LABEL: {{^}}load.f16.1d:
   6 ; GFX9: image_load v0, v0, s[0:7] dmask:0x1 unorm a16 d16
   7 ; GFX10: image_load v0, v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm a16 d16
   8 define amdgpu_ps <4 x half> @load.f16.1d(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
   9 main_body:
  10   %x = extractelement <2 x i16> %coords, i32 0
  11   %v = call <4 x half> @llvm.amdgcn.image.load.1d.v4f16.i16(i32 1, i16 %x, <8 x i32> %rsrc, i32 0, i32 0)
  12   ret <4 x half> %v
  13 }
  14
  15 ; GCN-LABEL: {{^}}load.v2f16.1d:
  16 ; GFX9: image_load v0, v0, s[0:7] dmask:0x3 unorm a16 d16
  17 ; GFX10: image_load v0, v0, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm a16 d16
  18 define amdgpu_ps <4 x half> @load.v2f16.1d(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
  19 main_body:
  20   %x = extractelement <2 x i16> %coords, i32 0
  21   %v = call <4 x half> @llvm.amdgcn.image.load.1d.v4f16.i16(i32 3, i16 %x, <8 x i32> %rsrc, i32 0, i32 0)
  22   ret <4 x half> %v
  23 }
  24
  25 ; GCN-LABEL: {{^}}load.v3f16.1d:
  26 ; GFX9: image_load v[0:1], v0, s[0:7] dmask:0x7 unorm a16 d16
  27 ; GFX10: image_load v[0:1], v0, s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_1D unorm a16 d16
  28 define amdgpu_ps <4 x half> @load.v3f16.1d(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
  29 main_body:
  30   %x = extractelement <2 x i16> %coords, i32 0
  31   %v = call <4 x half> @llvm.amdgcn.image.load.1d.v4f16.i16(i32 7, i16 %x, <8 x i32> %rsrc, i32 0, i32 0)
  32   ret <4 x half> %v
  33 }
  34
  35 ; GCN-LABEL: {{^}}load.v4f16.1d:
  36 ; GFX9: image_load v[0:1], v0, s[0:7] dmask:0xf unorm a16 d16
  37 ; GFX10: image_load v[0:1], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm a16 d16
  38 define amdgpu_ps <4 x half> @load.v4f16.1d(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
  39 main_body:
  40   %x = extractelement <2 x i16> %coords, i32 0
  41   %v = call <4 x half> @llvm.amdgcn.image.load.1d.v4f16.i16(i32 15, i16 %x, <8 x i32> %rsrc, i32 0, i32 0)
  42   ret <4 x half> %v
  43 }
  44
  45 ; GCN-LABEL: {{^}}load.f16.2d:
  46 ; GFX9: image_load v0, v0, s[0:7] dmask:0x1 unorm a16 d16
  47 ; GFX10: image_load v0, v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D unorm a16 d16
  48 define amdgpu_ps <4 x half> @load.f16.2d(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
  49 main_body:
  50   %x = extractelement <2 x i16> %coords, i32 0
  51   %y = extractelement <2 x i16> %coords, i32 1
  52   %v = call <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i16(i32 1, i16 %x, i16 %y, <8 x i32> %rsrc, i32 0, i32 0)
  53   ret <4 x half> %v
  54 }
  55
  56 ; GCN-LABEL: {{^}}load.v2f16.2d:
  57 ; GFX9: image_load v0, v0, s[0:7] dmask:0x3 unorm a16 d16
  58 ; GFX10: image_load v0, v0, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_2D unorm a16 d16
  59 define amdgpu_ps <4 x half> @load.v2f16.2d(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
  60 main_body:
  61   %x = extractelement <2 x i16> %coords, i32 0
  62   %y = extractelement <2 x i16> %coords, i32 1
  63   %v = call <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i16(i32 3, i16 %x, i16 %y, <8 x i32> %rsrc, i32 0, i32 0)
  64   ret <4 x half> %v
  65 }
  66
  67 ; GCN-LABEL: {{^}}load.v3f16.2d:
  68 ; GFX9: image_load v[0:1], v0, s[0:7] dmask:0x7 unorm a16 d16
  69 ; GFX10: image_load v[0:1], v0, s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_2D unorm a16 d16
  70 define amdgpu_ps <4 x half> @load.v3f16.2d(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
  71 main_body:
  72   %x = extractelement <2 x i16> %coords, i32 0
  73   %y = extractelement <2 x i16> %coords, i32 1
  74   %v = call <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i16(i32 7, i16 %x, i16 %y, <8 x i32> %rsrc, i32 0, i32 0)
  75   ret <4 x half> %v
  76 }
  77
  78 ; GCN-LABEL: {{^}}load.v4f16.2d:
  79 ; GFX9: image_load v[0:1], v0, s[0:7] dmask:0xf unorm a16 d16
  80 ; GFX10: image_load v[0:1], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm a16 d16
  81 define amdgpu_ps <4 x half> @load.v4f16.2d(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
  82 main_body:
  83   %x = extractelement <2 x i16> %coords, i32 0
  84   %y = extractelement <2 x i16> %coords, i32 1
  85   %v = call <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i16(i32 15, i16 %x, i16 %y, <8 x i32> %rsrc, i32 0, i32 0)
  86   ret <4 x half> %v
  87 }
  88
  89 ; GCN-LABEL: {{^}}load.f16.3d:
  90 ; GFX9: image_load v0, v[0:1], s[0:7] dmask:0x1 unorm a16 d16
  91 ; GFX10: image_load v0, v[0:1], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_3D unorm a16 d16
  92 define amdgpu_ps <4 x half> @load.f16.3d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
  93 main_body:
  94   %x = extractelement <2 x i16> %coords_lo, i32 0
  95   %y = extractelement <2 x i16> %coords_lo, i32 1
  96   %z = extractelement <2 x i16> %coords_hi, i32 0
  97   %v = call <4 x half> @llvm.amdgcn.image.load.3d.v4f16.i16(i32 1, i16 %x, i16 %y, i16 %z, <8 x i32> %rsrc, i32 0, i32 0)
  98   ret <4 x half> %v
  99 }
 100
 101 ; GCN-LABEL: {{^}}load.v2f16.3d:
 102 ; GFX9: image_load v0, v[0:1], s[0:7] dmask:0x3 unorm a16 d16
 103 ; GFX10: image_load v0, v[0:1], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_3D unorm a16 d16
 104 define amdgpu_ps <4 x half> @load.v2f16.3d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
 105 main_body:
 106   %x = extractelement <2 x i16> %coords_lo, i32 0
 107   %y = extractelement <2 x i16> %coords_lo, i32 1
 108   %z = extractelement <2 x i16> %coords_hi, i32 0
 109   %v = call <4 x half> @llvm.amdgcn.image.load.3d.v4f16.i16(i32 3, i16 %x, i16 %y, i16 %z, <8 x i32> %rsrc, i32 0, i32 0)
 110   ret <4 x half> %v
 111 }
 112
 113 ; GCN-LABEL: {{^}}load.v3f16.3d:
 114 ; GFX9: image_load v[0:1], v[0:1], s[0:7] dmask:0x7 unorm a16 d16
 115 ; GFX10: image_load v[0:1], v[0:1], s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_3D unorm a16 d16
 116 define amdgpu_ps <4 x half> @load.v3f16.3d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
 117 main_body:
 118   %x = extractelement <2 x i16> %coords_lo, i32 0
 119   %y = extractelement <2 x i16> %coords_lo, i32 1
 120   %z = extractelement <2 x i16> %coords_hi, i32 0
 121   %v = call <4 x half> @llvm.amdgcn.image.load.3d.v4f16.i16(i32 7, i16 %x, i16 %y, i16 %z, <8 x i32> %rsrc, i32 0, i32 0)
 122   ret <4 x half> %v
 123 }
 124
 125 ; GCN-LABEL: {{^}}load.v4f16.3d:
 126 ; GFX9: image_load v[0:1], v[0:1], s[0:7] dmask:0xf unorm a16 d16
 127 ; GFX10: image_load v[0:1], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm a16 d16
 128 define amdgpu_ps <4 x half> @load.v4f16.3d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
 129 main_body:
 130   %x = extractelement <2 x i16> %coords_lo, i32 0
 131   %y = extractelement <2 x i16> %coords_lo, i32 1
 132   %z = extractelement <2 x i16> %coords_hi, i32 0
 133   %v = call <4 x half> @llvm.amdgcn.image.load.3d.v4f16.i16(i32 15, i16 %x, i16 %y, i16 %z, <8 x i32> %rsrc, i32 0, i32 0)
 134   ret <4 x half> %v
 135 }
 136
 137 declare <4 x half> @llvm.amdgcn.image.load.1d.v4f16.i16(i32, i16, <8 x i32>, i32, i32) #2
 138 declare <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i16(i32, i16, i16, <8 x i32>, i32, i32) #2
 139 declare <4 x half> @llvm.amdgcn.image.load.3d.v4f16.i16(i32, i16, i16, i16, <8 x i32>, i32, i32) #2
 140
 141 attributes #0 = { nounwind }
 142 attributes #1 = { nounwind readonly }