1 ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s
3 ; GCN-LABEL: {{^}}load.f16.1d:
4 ; GCN: image_load v0, v0, s[0:7] dmask:0x1 unorm a16 d16
5 define amdgpu_ps <4 x half> @load.f16.1d(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
7 %x = extractelement <2 x i16> %coords, i32 0
8 %v = call <4 x half> @llvm.amdgcn.image.load.1d.v4f16.i16(i32 1, i16 %x, <8 x i32> %rsrc, i32 0, i32 0)
12 ; GCN-LABEL: {{^}}load.v2f16.1d:
13 ; GCN: image_load v0, v0, s[0:7] dmask:0x3 unorm a16 d16
14 define amdgpu_ps <4 x half> @load.v2f16.1d(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
16 %x = extractelement <2 x i16> %coords, i32 0
17 %v = call <4 x half> @llvm.amdgcn.image.load.1d.v4f16.i16(i32 3, i16 %x, <8 x i32> %rsrc, i32 0, i32 0)
21 ; GCN-LABEL: {{^}}load.v3f16.1d:
22 ; GCN: image_load v[0:1], v0, s[0:7] dmask:0x7 unorm a16 d16
23 define amdgpu_ps <4 x half> @load.v3f16.1d(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
25 %x = extractelement <2 x i16> %coords, i32 0
26 %v = call <4 x half> @llvm.amdgcn.image.load.1d.v4f16.i16(i32 7, i16 %x, <8 x i32> %rsrc, i32 0, i32 0)
30 ; GCN-LABEL: {{^}}load.v4f16.1d:
31 ; GCN: image_load v[0:1], v0, s[0:7] dmask:0xf unorm a16 d16
32 define amdgpu_ps <4 x half> @load.v4f16.1d(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
34 %x = extractelement <2 x i16> %coords, i32 0
35 %v = call <4 x half> @llvm.amdgcn.image.load.1d.v4f16.i16(i32 15, i16 %x, <8 x i32> %rsrc, i32 0, i32 0)
39 ; GCN-LABEL: {{^}}load.f16.2d:
40 ; GCN: image_load v0, v0, s[0:7] dmask:0x1 unorm a16 d16
41 define amdgpu_ps <4 x half> @load.f16.2d(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
43 %x = extractelement <2 x i16> %coords, i32 0
44 %y = extractelement <2 x i16> %coords, i32 1
45 %v = call <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i16(i32 1, i16 %x, i16 %y, <8 x i32> %rsrc, i32 0, i32 0)
49 ; GCN-LABEL: {{^}}load.v2f16.2d:
50 ; GCN: image_load v0, v0, s[0:7] dmask:0x3 unorm a16 d16
51 define amdgpu_ps <4 x half> @load.v2f16.2d(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
53 %x = extractelement <2 x i16> %coords, i32 0
54 %y = extractelement <2 x i16> %coords, i32 1
55 %v = call <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i16(i32 3, i16 %x, i16 %y, <8 x i32> %rsrc, i32 0, i32 0)
59 ; GCN-LABEL: {{^}}load.v3f16.2d:
60 ; GCN: image_load v[0:1], v0, s[0:7] dmask:0x7 unorm a16 d16
61 define amdgpu_ps <4 x half> @load.v3f16.2d(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
63 %x = extractelement <2 x i16> %coords, i32 0
64 %y = extractelement <2 x i16> %coords, i32 1
65 %v = call <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i16(i32 7, i16 %x, i16 %y, <8 x i32> %rsrc, i32 0, i32 0)
69 ; GCN-LABEL: {{^}}load.v4f16.2d:
70 ; GCN: image_load v[0:1], v0, s[0:7] dmask:0xf unorm a16 d16
71 define amdgpu_ps <4 x half> @load.v4f16.2d(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
73 %x = extractelement <2 x i16> %coords, i32 0
74 %y = extractelement <2 x i16> %coords, i32 1
75 %v = call <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i16(i32 15, i16 %x, i16 %y, <8 x i32> %rsrc, i32 0, i32 0)
79 ; GCN-LABEL: {{^}}load.f16.3d:
80 ; GCN: image_load v0, v[0:1], s[0:7] dmask:0x1 unorm a16 d16
81 define amdgpu_ps <4 x half> @load.f16.3d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
83 %x = extractelement <2 x i16> %coords_lo, i32 0
84 %y = extractelement <2 x i16> %coords_lo, i32 1
85 %z = extractelement <2 x i16> %coords_hi, i32 0
86 %v = call <4 x half> @llvm.amdgcn.image.load.3d.v4f16.i16(i32 1, i16 %x, i16 %y, i16 %z, <8 x i32> %rsrc, i32 0, i32 0)
90 ; GCN-LABEL: {{^}}load.v2f16.3d:
91 ; GCN: image_load v0, v[0:1], s[0:7] dmask:0x3 unorm a16 d16
92 define amdgpu_ps <4 x half> @load.v2f16.3d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
94 %x = extractelement <2 x i16> %coords_lo, i32 0
95 %y = extractelement <2 x i16> %coords_lo, i32 1
96 %z = extractelement <2 x i16> %coords_hi, i32 0
97 %v = call <4 x half> @llvm.amdgcn.image.load.3d.v4f16.i16(i32 3, i16 %x, i16 %y, i16 %z, <8 x i32> %rsrc, i32 0, i32 0)
101 ; GCN-LABEL: {{^}}load.v3f16.3d:
102 ; GCN: image_load v[0:1], v[0:1], s[0:7] dmask:0x7 unorm a16 d16
103 define amdgpu_ps <4 x half> @load.v3f16.3d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
105 %x = extractelement <2 x i16> %coords_lo, i32 0
106 %y = extractelement <2 x i16> %coords_lo, i32 1
107 %z = extractelement <2 x i16> %coords_hi, i32 0
108 %v = call <4 x half> @llvm.amdgcn.image.load.3d.v4f16.i16(i32 7, i16 %x, i16 %y, i16 %z, <8 x i32> %rsrc, i32 0, i32 0)
112 ; GCN-LABEL: {{^}}load.v4f16.3d:
113 ; GCN: image_load v[0:1], v[0:1], s[0:7] dmask:0xf unorm a16 d16
114 define amdgpu_ps <4 x half> @load.v4f16.3d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
116 %x = extractelement <2 x i16> %coords_lo, i32 0
117 %y = extractelement <2 x i16> %coords_lo, i32 1
118 %z = extractelement <2 x i16> %coords_hi, i32 0
119 %v = call <4 x half> @llvm.amdgcn.image.load.3d.v4f16.i16(i32 15, i16 %x, i16 %y, i16 %z, <8 x i32> %rsrc, i32 0, i32 0)
123 declare <4 x half> @llvm.amdgcn.image.load.1d.v4f16.i16(i32, i16, <8 x i32>, i32, i32) #2
124 declare <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i16(i32, i16, i16, <8 x i32>, i32, i32) #2
125 declare <4 x half> @llvm.amdgcn.image.load.3d.v4f16.i16(i32, i16, i16, i16, <8 x i32>, i32, i32) #2
127 attributes #0 = { nounwind }
128 attributes #1 = { nounwind readonly }