llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.load.a16.ll

   1 ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s
   2 ; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10 %s
   3
   4 ; GCN-LABEL: {{^}}load.f32.1d:
   5 ; GFX9: image_load v0, v0, s[0:7] dmask:0x1 unorm a16
   6 ; GFX10: image_load v0, v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm a16
   7 define amdgpu_ps <4 x float> @load.f32.1d(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
   8 main_body:
   9   %x = extractelement <2 x i16> %coords, i32 0
  10   %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32 1, i16 %x, <8 x i32> %rsrc, i32 0, i32 0)
  11   ret <4 x float> %v
  12 }
  13
  14 ; GCN-LABEL: {{^}}load.v2f32.1d:
  15 ; GFX9: image_load v[0:1], v0, s[0:7] dmask:0x3 unorm a16
  16 ; GFX10: image_load v[0:1], v0, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm a16
  17 define amdgpu_ps <4 x float> @load.v2f32.1d(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
  18 main_body:
  19   %x = extractelement <2 x i16> %coords, i32 0
  20   %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32 3, i16 %x, <8 x i32> %rsrc, i32 0, i32 0)
  21   ret <4 x float> %v
  22 }
  23
  24 ; GCN-LABEL: {{^}}load.v3f32.1d:
  25 ; GFX9: image_load v[0:2], v0, s[0:7] dmask:0x7 unorm a16
  26 ; GFX10: image_load v[0:2], v0, s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_1D unorm a16
  27 define amdgpu_ps <4 x float> @load.v3f32.1d(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
  28 main_body:
  29   %x = extractelement <2 x i16> %coords, i32 0
  30   %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32 7, i16 %x, <8 x i32> %rsrc, i32 0, i32 0)
  31   ret <4 x float> %v
  32 }
  33
  34 ; GCN-LABEL: {{^}}load.v4f32.1d:
  35 ; GFX9: image_load v[0:3], v0, s[0:7] dmask:0xf unorm a16
  36 ; GFX10: image_load v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm a16
  37 define amdgpu_ps <4 x float> @load.v4f32.1d(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
  38 main_body:
  39   %x = extractelement <2 x i16> %coords, i32 0
  40   %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32 15, i16 %x, <8 x i32> %rsrc, i32 0, i32 0)
  41   ret <4 x float> %v
  42 }
  43
  44 ; GCN-LABEL: {{^}}load.f32.2d:
  45 ; GFX9: image_load v0, v0, s[0:7] dmask:0x1 unorm a16
  46 ; GFX10: image_load v0, v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D unorm a16
  47 define amdgpu_ps <4 x float> @load.f32.2d(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
  48 main_body:
  49   %x = extractelement <2 x i16> %coords, i32 0
  50   %y = extractelement <2 x i16> %coords, i32 1
  51   %v = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i16(i32 1, i16 %x, i16 %y, <8 x i32> %rsrc, i32 0, i32 0)
  52   ret <4 x float> %v
  53 }
  54
  55 ; GCN-LABEL: {{^}}load.v2f32.2d:
  56 ; GFX9: image_load v[0:1], v0, s[0:7] dmask:0x3 unorm a16
  57 ; GFX10: image_load v[0:1], v0, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_2D unorm a16
  58 define amdgpu_ps <4 x float> @load.v2f32.2d(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
  59 main_body:
  60   %x = extractelement <2 x i16> %coords, i32 0
  61   %y = extractelement <2 x i16> %coords, i32 1
  62   %v = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i16(i32 3, i16 %x, i16 %y, <8 x i32> %rsrc, i32 0, i32 0)
  63   ret <4 x float> %v
  64 }
  65
  66 ; GCN-LABEL: {{^}}load.v3f32.2d:
  67 ; GFX9: image_load v[0:2], v0, s[0:7] dmask:0x7 unorm a16
  68 ; GFX10: image_load v[0:2], v0, s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_2D unorm a16
  69 define amdgpu_ps <4 x float> @load.v3f32.2d(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
  70 main_body:
  71   %x = extractelement <2 x i16> %coords, i32 0
  72   %y = extractelement <2 x i16> %coords, i32 1
  73   %v = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i16(i32 7, i16 %x, i16 %y, <8 x i32> %rsrc, i32 0, i32 0)
  74   ret <4 x float> %v
  75 }
  76
  77 ; GCN-LABEL: {{^}}load.v4f32.2d:
  78 ; GFX9: image_load v[0:3], v0, s[0:7] dmask:0xf unorm a16
  79 ; GFX10: image_load v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm a16
  80 define amdgpu_ps <4 x float> @load.v4f32.2d(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
  81 main_body:
  82   %x = extractelement <2 x i16> %coords, i32 0
  83   %y = extractelement <2 x i16> %coords, i32 1
  84   %v = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i16(i32 15, i16 %x, i16 %y, <8 x i32> %rsrc, i32 0, i32 0)
  85   ret <4 x float> %v
  86 }
  87
  88 ; GCN-LABEL: {{^}}load.f32.3d:
  89 ; GFX9: image_load v0, v[0:1], s[0:7] dmask:0x1 unorm a16
  90 ; GFX10: image_load v0, v[0:1], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_3D unorm a16
  91 define amdgpu_ps <4 x float> @load.f32.3d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
  92 main_body:
  93   %x = extractelement <2 x i16> %coords_lo, i32 0
  94   %y = extractelement <2 x i16> %coords_lo, i32 1
  95   %z = extractelement <2 x i16> %coords_hi, i32 0
  96   %v = call <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i16(i32 1, i16 %x, i16 %y, i16 %z, <8 x i32> %rsrc, i32 0, i32 0)
  97   ret <4 x float> %v
  98 }
  99
 100 ; GCN-LABEL: {{^}}load.v2f32.3d:
 101 ; GFX9: image_load v[0:1], v[0:1], s[0:7] dmask:0x3 unorm a16
 102 ; GFX10: image_load v[0:1], v[0:1], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_3D unorm a16
 103 define amdgpu_ps <4 x float> @load.v2f32.3d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
 104 main_body:
 105   %x = extractelement <2 x i16> %coords_lo, i32 0
 106   %y = extractelement <2 x i16> %coords_lo, i32 1
 107   %z = extractelement <2 x i16> %coords_hi, i32 0
 108   %v = call <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i16(i32 3, i16 %x, i16 %y, i16 %z, <8 x i32> %rsrc, i32 0, i32 0)
 109   ret <4 x float> %v
 110 }
 111
 112 ; GCN-LABEL: {{^}}load.v3f32.3d:
 113 ; GFX9: image_load v[0:2], v[0:1], s[0:7] dmask:0x7 unorm a16
 114 ; GFX10: image_load v[0:2], v[0:1], s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_3D unorm a16
 115 define amdgpu_ps <4 x float> @load.v3f32.3d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
 116 main_body:
 117   %x = extractelement <2 x i16> %coords_lo, i32 0
 118   %y = extractelement <2 x i16> %coords_lo, i32 1
 119   %z = extractelement <2 x i16> %coords_hi, i32 0
 120   %v = call <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i16(i32 7, i16 %x, i16 %y, i16 %z, <8 x i32> %rsrc, i32 0, i32 0)
 121   ret <4 x float> %v
 122 }
 123
 124 ; GCN-LABEL: {{^}}load.v4f32.3d:
 125 ; GFX9: image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm a16
 126 ; GFX10: image_load v[0:3], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm a16
 127 define amdgpu_ps <4 x float> @load.v4f32.3d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
 128 main_body:
 129   %x = extractelement <2 x i16> %coords_lo, i32 0
 130   %y = extractelement <2 x i16> %coords_lo, i32 1
 131   %z = extractelement <2 x i16> %coords_hi, i32 0
 132   %v = call <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i16(i32 15, i16 %x, i16 %y, i16 %z, <8 x i32> %rsrc, i32 0, i32 0)
 133   ret <4 x float> %v
 134 }
 135
 136 declare <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32, i16, <8 x i32>, i32, i32) #2
 137 declare <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i16(i32, i16, i16, <8 x i32>, i32, i32) #2
 138 declare <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i16(i32, i16, i16, i16, <8 x i32>, i32, i32) #2
 139
 140 attributes #0 = { nounwind }
 141 attributes #1 = { nounwind readonly }