1 ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s
2 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10 %s
3 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10 %s
4 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX12 %s
6 ; GCN-LABEL: {{^}}load.f32.1d:
7 ; GFX9: image_load v0, v0, s[0:7] dmask:0x1 unorm a16
8 ; GFX10: image_load v0, v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm a16
9 ; GFX12: image_load v0, v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D a16
10 define amdgpu_ps <4 x float> @load.f32.1d(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
12 %x = extractelement <2 x i16> %coords, i32 0
13 %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32 1, i16 %x, <8 x i32> %rsrc, i32 0, i32 0)
17 ; GCN-LABEL: {{^}}load.v2f32.1d:
18 ; GFX9: image_load v[0:1], v0, s[0:7] dmask:0x3 unorm a16
19 ; GFX10: image_load v[0:1], v0, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm a16
20 ; GFX12: image_load v[0:1], v0, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D a16
21 define amdgpu_ps <4 x float> @load.v2f32.1d(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
23 %x = extractelement <2 x i16> %coords, i32 0
24 %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32 3, i16 %x, <8 x i32> %rsrc, i32 0, i32 0)
28 ; GCN-LABEL: {{^}}load.v3f32.1d:
29 ; GFX9: image_load v[0:2], v0, s[0:7] dmask:0x7 unorm a16
30 ; GFX10: image_load v[0:2], v0, s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_1D unorm a16
31 ; GFX12: image_load v[0:2], v0, s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_1D a16
32 define amdgpu_ps <4 x float> @load.v3f32.1d(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
34 %x = extractelement <2 x i16> %coords, i32 0
35 %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32 7, i16 %x, <8 x i32> %rsrc, i32 0, i32 0)
39 ; GCN-LABEL: {{^}}load.v4f32.1d:
40 ; GFX9: image_load v[0:3], v0, s[0:7] dmask:0xf unorm a16
41 ; GFX10: image_load v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm a16
42 ; GFX12: image_load v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D a16
43 define amdgpu_ps <4 x float> @load.v4f32.1d(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
45 %x = extractelement <2 x i16> %coords, i32 0
46 %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32 15, i16 %x, <8 x i32> %rsrc, i32 0, i32 0)
50 ; GCN-LABEL: {{^}}load.f32.2d:
51 ; GFX9: image_load v0, v0, s[0:7] dmask:0x1 unorm a16
52 ; GFX10: image_load v0, v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D unorm a16
53 ; GFX12: image_load v0, v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
54 define amdgpu_ps <4 x float> @load.f32.2d(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
56 %x = extractelement <2 x i16> %coords, i32 0
57 %y = extractelement <2 x i16> %coords, i32 1
58 %v = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i16(i32 1, i16 %x, i16 %y, <8 x i32> %rsrc, i32 0, i32 0)
62 ; GCN-LABEL: {{^}}load.v2f32.2d:
63 ; GFX9: image_load v[0:1], v0, s[0:7] dmask:0x3 unorm a16
64 ; GFX10: image_load v[0:1], v0, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_2D unorm a16
65 ; GFX12: image_load v[0:1], v0, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_2D a16
66 define amdgpu_ps <4 x float> @load.v2f32.2d(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
68 %x = extractelement <2 x i16> %coords, i32 0
69 %y = extractelement <2 x i16> %coords, i32 1
70 %v = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i16(i32 3, i16 %x, i16 %y, <8 x i32> %rsrc, i32 0, i32 0)
74 ; GCN-LABEL: {{^}}load.v3f32.2d:
75 ; GFX9: image_load v[0:2], v0, s[0:7] dmask:0x7 unorm a16
76 ; GFX10: image_load v[0:2], v0, s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_2D unorm a16
77 ; GFX12: image_load v[0:2], v0, s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_2D a16
78 define amdgpu_ps <4 x float> @load.v3f32.2d(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
80 %x = extractelement <2 x i16> %coords, i32 0
81 %y = extractelement <2 x i16> %coords, i32 1
82 %v = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i16(i32 7, i16 %x, i16 %y, <8 x i32> %rsrc, i32 0, i32 0)
86 ; GCN-LABEL: {{^}}load.v4f32.2d:
87 ; GFX9: image_load v[0:3], v0, s[0:7] dmask:0xf unorm a16
88 ; GFX10: image_load v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm a16
89 ; GFX12: image_load v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D a16
90 define amdgpu_ps <4 x float> @load.v4f32.2d(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
92 %x = extractelement <2 x i16> %coords, i32 0
93 %y = extractelement <2 x i16> %coords, i32 1
94 %v = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i16(i32 15, i16 %x, i16 %y, <8 x i32> %rsrc, i32 0, i32 0)
98 ; GCN-LABEL: {{^}}load.f32.3d:
99 ; GFX9: image_load v0, v[0:1], s[0:7] dmask:0x1 unorm a16
100 ; GFX10: image_load v0, v[0:1], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_3D unorm a16
101 ; GFX12: image_load v0, [v0, v1], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_3D a16
102 define amdgpu_ps <4 x float> @load.f32.3d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
104 %x = extractelement <2 x i16> %coords_lo, i32 0
105 %y = extractelement <2 x i16> %coords_lo, i32 1
106 %z = extractelement <2 x i16> %coords_hi, i32 0
107 %v = call <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i16(i32 1, i16 %x, i16 %y, i16 %z, <8 x i32> %rsrc, i32 0, i32 0)
111 ; GCN-LABEL: {{^}}load.v2f32.3d:
112 ; GFX9: image_load v[0:1], v[0:1], s[0:7] dmask:0x3 unorm a16
113 ; GFX10: image_load v[0:1], v[0:1], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_3D unorm a16
114 ; GFX12: image_load v[0:1], [v0, v1], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_3D a16
115 define amdgpu_ps <4 x float> @load.v2f32.3d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
117 %x = extractelement <2 x i16> %coords_lo, i32 0
118 %y = extractelement <2 x i16> %coords_lo, i32 1
119 %z = extractelement <2 x i16> %coords_hi, i32 0
120 %v = call <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i16(i32 3, i16 %x, i16 %y, i16 %z, <8 x i32> %rsrc, i32 0, i32 0)
124 ; GCN-LABEL: {{^}}load.v3f32.3d:
125 ; GFX9: image_load v[0:2], v[0:1], s[0:7] dmask:0x7 unorm a16
126 ; GFX10: image_load v[0:2], v[0:1], s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_3D unorm a16
127 ; GFX12: image_load v[0:2], [v0, v1], s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_3D a16
128 define amdgpu_ps <4 x float> @load.v3f32.3d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
130 %x = extractelement <2 x i16> %coords_lo, i32 0
131 %y = extractelement <2 x i16> %coords_lo, i32 1
132 %z = extractelement <2 x i16> %coords_hi, i32 0
133 %v = call <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i16(i32 7, i16 %x, i16 %y, i16 %z, <8 x i32> %rsrc, i32 0, i32 0)
137 ; GCN-LABEL: {{^}}load.v4f32.3d:
138 ; GFX9: image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm a16
139 ; GFX10: image_load v[0:3], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm a16
140 ; GFX12: image_load v[0:3], [v0, v1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D a16
141 define amdgpu_ps <4 x float> @load.v4f32.3d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
143 %x = extractelement <2 x i16> %coords_lo, i32 0
144 %y = extractelement <2 x i16> %coords_lo, i32 1
145 %z = extractelement <2 x i16> %coords_hi, i32 0
146 %v = call <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i16(i32 15, i16 %x, i16 %y, i16 %z, <8 x i32> %rsrc, i32 0, i32 0)
150 declare <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32, i16, <8 x i32>, i32, i32) #2
151 declare <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i16(i32, i16, i16, <8 x i32>, i32, i32) #2
152 declare <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i16(i32, i16, i16, i16, <8 x i32>, i32, i32) #2
154 attributes #0 = { nounwind }
155 attributes #1 = { nounwind readonly }