1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9 %s
3 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s
5 define amdgpu_ps <4 x float> @sample_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s) {
6 ; GFX9-LABEL: sample_cd_1d:
7 ; GFX9: ; %bb.0: ; %main_body
8 ; GFX9-NEXT: image_sample_cd v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16
9 ; GFX9-NEXT: s_waitcnt vmcnt(0)
10 ; GFX9-NEXT: ; return to shader part epilog
12 ; GFX10-LABEL: sample_cd_1d:
13 ; GFX10: ; %bb.0: ; %main_body
14 ; GFX10-NEXT: image_sample_cd_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
15 ; GFX10-NEXT: s_waitcnt vmcnt(0)
16 ; GFX10-NEXT: ; return to shader part epilog
18 %v = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f16(i32 15, half %dsdh, half %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
22 define amdgpu_ps <4 x float> @sample_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) {
23 ; GFX9-LABEL: sample_cd_2d:
24 ; GFX9: ; %bb.0: ; %main_body
25 ; GFX9-NEXT: s_mov_b32 s12, 0x5040100
26 ; GFX9-NEXT: v_perm_b32 v4, v5, v4, s12
27 ; GFX9-NEXT: v_perm_b32 v3, v3, v2, s12
28 ; GFX9-NEXT: v_perm_b32 v2, v1, v0, s12
29 ; GFX9-NEXT: image_sample_cd v[0:3], v[2:4], s[0:7], s[8:11] dmask:0xf a16
30 ; GFX9-NEXT: s_waitcnt vmcnt(0)
31 ; GFX9-NEXT: ; return to shader part epilog
33 ; GFX10-LABEL: sample_cd_2d:
34 ; GFX10: ; %bb.0: ; %main_body
35 ; GFX10-NEXT: v_perm_b32 v4, v5, v4, 0x5040100
36 ; GFX10-NEXT: v_perm_b32 v3, v3, v2, 0x5040100
37 ; GFX10-NEXT: v_perm_b32 v2, v1, v0, 0x5040100
38 ; GFX10-NEXT: image_sample_cd_g16 v[0:3], v[2:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
39 ; GFX10-NEXT: s_waitcnt vmcnt(0)
40 ; GFX10-NEXT: ; return to shader part epilog
42 %v = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
46 define amdgpu_ps <4 x float> @sample_c_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s) {
47 ; GFX9-LABEL: sample_c_cd_1d:
48 ; GFX9: ; %bb.0: ; %main_body
49 ; GFX9-NEXT: image_sample_c_cd v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16
50 ; GFX9-NEXT: s_waitcnt vmcnt(0)
51 ; GFX9-NEXT: ; return to shader part epilog
53 ; GFX10-LABEL: sample_c_cd_1d:
54 ; GFX10: ; %bb.0: ; %main_body
55 ; GFX10-NEXT: image_sample_c_cd_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
56 ; GFX10-NEXT: s_waitcnt vmcnt(0)
57 ; GFX10-NEXT: ; return to shader part epilog
59 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
63 define amdgpu_ps <4 x float> @sample_c_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) {
64 ; GFX9-LABEL: sample_c_cd_2d:
65 ; GFX9: ; %bb.0: ; %main_body
66 ; GFX9-NEXT: v_mov_b32_e32 v7, v3
67 ; GFX9-NEXT: v_mov_b32_e32 v8, v2
68 ; GFX9-NEXT: s_mov_b32 s12, 0x5040100
69 ; GFX9-NEXT: v_perm_b32 v3, v6, v5, s12
70 ; GFX9-NEXT: v_perm_b32 v2, v4, v7, s12
71 ; GFX9-NEXT: v_perm_b32 v1, v8, v1, s12
72 ; GFX9-NEXT: image_sample_c_cd v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16
73 ; GFX9-NEXT: s_waitcnt vmcnt(0)
74 ; GFX9-NEXT: ; return to shader part epilog
76 ; GFX10-LABEL: sample_c_cd_2d:
77 ; GFX10: ; %bb.0: ; %main_body
78 ; GFX10-NEXT: v_perm_b32 v5, v6, v5, 0x5040100
79 ; GFX10-NEXT: v_perm_b32 v3, v4, v3, 0x5040100
80 ; GFX10-NEXT: v_perm_b32 v1, v2, v1, 0x5040100
81 ; GFX10-NEXT: image_sample_c_cd_g16 v[0:3], [v0, v1, v3, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
82 ; GFX10-NEXT: s_waitcnt vmcnt(0)
83 ; GFX10-NEXT: ; return to shader part epilog
85 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
89 define amdgpu_ps <4 x float> @sample_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s, half %clamp) {
90 ; GFX9-LABEL: sample_cd_cl_1d:
91 ; GFX9: ; %bb.0: ; %main_body
92 ; GFX9-NEXT: s_mov_b32 s12, 0x5040100
93 ; GFX9-NEXT: v_perm_b32 v2, v3, v2, s12
94 ; GFX9-NEXT: image_sample_cd_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16
95 ; GFX9-NEXT: s_waitcnt vmcnt(0)
96 ; GFX9-NEXT: ; return to shader part epilog
98 ; GFX10-LABEL: sample_cd_cl_1d:
99 ; GFX10: ; %bb.0: ; %main_body
100 ; GFX10-NEXT: v_perm_b32 v2, v3, v2, 0x5040100
101 ; GFX10-NEXT: image_sample_cd_cl_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
102 ; GFX10-NEXT: s_waitcnt vmcnt(0)
103 ; GFX10-NEXT: ; return to shader part epilog
105 %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f16(i32 15, half %dsdh, half %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
109 define amdgpu_ps <4 x float> @sample_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) {
110 ; GFX9-LABEL: sample_cd_cl_2d:
111 ; GFX9: ; %bb.0: ; %main_body
112 ; GFX9-NEXT: s_mov_b32 s12, 0x5040100
113 ; GFX9-NEXT: v_perm_b32 v5, v5, v4, s12
114 ; GFX9-NEXT: v_perm_b32 v4, v3, v2, s12
115 ; GFX9-NEXT: v_perm_b32 v3, v1, v0, s12
116 ; GFX9-NEXT: image_sample_cd_cl v[0:3], v[3:6], s[0:7], s[8:11] dmask:0xf a16
117 ; GFX9-NEXT: s_waitcnt vmcnt(0)
118 ; GFX9-NEXT: ; return to shader part epilog
120 ; GFX10-LABEL: sample_cd_cl_2d:
121 ; GFX10: ; %bb.0: ; %main_body
122 ; GFX10-NEXT: v_perm_b32 v4, v5, v4, 0x5040100
123 ; GFX10-NEXT: v_perm_b32 v2, v3, v2, 0x5040100
124 ; GFX10-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
125 ; GFX10-NEXT: image_sample_cd_cl_g16 v[0:3], [v0, v2, v4, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
126 ; GFX10-NEXT: s_waitcnt vmcnt(0)
127 ; GFX10-NEXT: ; return to shader part epilog
129 %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
133 define amdgpu_ps <4 x float> @sample_c_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s, half %clamp) {
134 ; GFX9-LABEL: sample_c_cd_cl_1d:
135 ; GFX9: ; %bb.0: ; %main_body
136 ; GFX9-NEXT: s_mov_b32 s12, 0x5040100
137 ; GFX9-NEXT: v_perm_b32 v3, v4, v3, s12
138 ; GFX9-NEXT: image_sample_c_cd_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16
139 ; GFX9-NEXT: s_waitcnt vmcnt(0)
140 ; GFX9-NEXT: ; return to shader part epilog
142 ; GFX10-LABEL: sample_c_cd_cl_1d:
143 ; GFX10: ; %bb.0: ; %main_body
144 ; GFX10-NEXT: v_perm_b32 v3, v4, v3, 0x5040100
145 ; GFX10-NEXT: image_sample_c_cd_cl_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
146 ; GFX10-NEXT: s_waitcnt vmcnt(0)
147 ; GFX10-NEXT: ; return to shader part epilog
149 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
153 define amdgpu_ps <4 x float> @sample_c_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) {
154 ; GFX9-LABEL: sample_c_cd_cl_2d:
155 ; GFX9: ; %bb.0: ; %main_body
156 ; GFX9-NEXT: s_mov_b32 s12, 0x5040100
157 ; GFX9-NEXT: v_mov_b32_e32 v11, v7
158 ; GFX9-NEXT: v_mov_b32_e32 v7, v0
159 ; GFX9-NEXT: v_perm_b32 v10, v6, v5, s12
160 ; GFX9-NEXT: v_perm_b32 v9, v4, v3, s12
161 ; GFX9-NEXT: v_perm_b32 v8, v2, v1, s12
162 ; GFX9-NEXT: image_sample_c_cd_cl v[0:3], v[7:11], s[0:7], s[8:11] dmask:0xf a16
163 ; GFX9-NEXT: s_waitcnt vmcnt(0)
164 ; GFX9-NEXT: ; return to shader part epilog
166 ; GFX10-LABEL: sample_c_cd_cl_2d:
167 ; GFX10: ; %bb.0: ; %main_body
168 ; GFX10-NEXT: v_perm_b32 v5, v6, v5, 0x5040100
169 ; GFX10-NEXT: v_perm_b32 v3, v4, v3, 0x5040100
170 ; GFX10-NEXT: v_perm_b32 v1, v2, v1, 0x5040100
171 ; GFX10-NEXT: image_sample_c_cd_cl_g16 v[0:3], [v0, v1, v3, v5, v7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
172 ; GFX10-NEXT: s_waitcnt vmcnt(0)
173 ; GFX10-NEXT: ; return to shader part epilog
175 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
179 declare <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
180 declare <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f16(i32, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
181 declare <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f16(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
182 declare <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f32.f16(i32, float, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
183 declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f16(i32, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
184 declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f16(i32, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
185 declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f16(i32, float, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
186 declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f32.f16(i32, float, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
188 attributes #0 = { nounwind }
189 attributes #1 = { nounwind readonly }
190 attributes #2 = { nounwind readnone }