1 ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -stop-after=legalizer -o - %s | FileCheck -check-prefix=GFX10 %s
3 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -stop-after=legalizer -o - %s | FileCheck -check-prefix=GFX11 %s
5 define amdgpu_ps <4 x float> @sample_d_1d_g16_a16(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s) {
6 ; GFX10-LABEL: name: sample_d_1d_g16_a16
7 ; GFX10: bb.1.main_body:
8 ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2
10 ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
11 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
12 ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
13 ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
14 ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
15 ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
16 ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
17 ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
18 ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
19 ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
20 ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
21 ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
22 ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
23 ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
24 ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
25 ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32)
26 ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
27 ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32)
28 ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
29 ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32)
30 ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
31 ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
32 ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16)
33 ; GFX10-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
34 ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8)
35 ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
36 ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32)
37 ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32)
38 ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32)
39 ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32)
40 ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
41 ; GFX11-LABEL: name: sample_d_1d_g16_a16
42 ; GFX11: bb.1.main_body:
43 ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2
45 ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
46 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
47 ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
48 ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
49 ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
50 ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
51 ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
52 ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
53 ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
54 ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
55 ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
56 ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
57 ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
58 ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
59 ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
60 ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32)
61 ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
62 ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32)
63 ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
64 ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32)
65 ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
66 ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
67 ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16)
68 ; GFX11-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
69 ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8)
70 ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
71 ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32)
72 ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32)
73 ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32)
74 ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32)
75 ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
77 %v = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f16(i32 15, half %dsdh, half %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
81 define amdgpu_ps <4 x float> @sample_d_2d_g16_a16(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) {
82 ; GFX10-LABEL: name: sample_d_2d_g16_a16
83 ; GFX10: bb.1.main_body:
84 ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
86 ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
87 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
88 ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
89 ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
90 ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
91 ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
92 ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
93 ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
94 ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
95 ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
96 ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
97 ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
98 ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
99 ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
100 ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
101 ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32)
102 ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
103 ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32)
104 ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
105 ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32)
106 ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3
107 ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32)
108 ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4
109 ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32)
110 ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5
111 ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32)
112 ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
113 ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
114 ; GFX10-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16)
115 ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8)
116 ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
117 ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32)
118 ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32)
119 ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32)
120 ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32)
121 ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
122 ; GFX11-LABEL: name: sample_d_2d_g16_a16
123 ; GFX11: bb.1.main_body:
124 ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
126 ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
127 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
128 ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
129 ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
130 ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
131 ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
132 ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
133 ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
134 ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
135 ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
136 ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
137 ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
138 ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
139 ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
140 ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
141 ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32)
142 ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
143 ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32)
144 ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
145 ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32)
146 ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3
147 ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32)
148 ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4
149 ; GFX11-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32)
150 ; GFX11-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5
151 ; GFX11-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32)
152 ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
153 ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
154 ; GFX11-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16)
155 ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8)
156 ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
157 ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32)
158 ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32)
159 ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32)
160 ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32)
161 ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
163 %v = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
167 define amdgpu_ps <4 x float> @sample_d_3d_g16_a16(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, half %s, half %t, half %r) {
168 ; GFX10-LABEL: name: sample_d_3d_g16_a16
169 ; GFX10: bb.1.main_body:
170 ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8
172 ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
173 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
174 ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
175 ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
176 ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
177 ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
178 ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
179 ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
180 ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
181 ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
182 ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
183 ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
184 ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
185 ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
186 ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
187 ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32)
188 ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
189 ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32)
190 ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
191 ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32)
192 ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3
193 ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32)
194 ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4
195 ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32)
196 ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5
197 ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32)
198 ; GFX10-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6
199 ; GFX10-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY18]](s32)
200 ; GFX10-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7
201 ; GFX10-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[COPY19]](s32)
202 ; GFX10-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr8
203 ; GFX10-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[COPY20]](s32)
204 ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
205 ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
206 ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
207 ; GFX10-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC3]](s16), [[TRUNC4]](s16)
208 ; GFX10-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC5]](s16), [[DEF]](s16)
209 ; GFX10-NEXT: [[BUILD_VECTOR6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16)
210 ; GFX10-NEXT: [[BUILD_VECTOR7:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC8]](s16), [[DEF]](s16)
211 ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>), [[BUILD_VECTOR6]](<2 x s16>), [[BUILD_VECTOR7]](<2 x s16>)
212 ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.3d), 15, [[CONCAT_VECTORS]](<12 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8)
213 ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
214 ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32)
215 ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32)
216 ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32)
217 ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32)
218 ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
219 ; GFX11-LABEL: name: sample_d_3d_g16_a16
220 ; GFX11: bb.1.main_body:
221 ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8
223 ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
224 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
225 ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
226 ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
227 ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
228 ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
229 ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
230 ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
231 ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
232 ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
233 ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
234 ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
235 ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
236 ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
237 ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
238 ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32)
239 ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
240 ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32)
241 ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
242 ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32)
243 ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3
244 ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32)
245 ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4
246 ; GFX11-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32)
247 ; GFX11-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5
248 ; GFX11-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32)
249 ; GFX11-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6
250 ; GFX11-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY18]](s32)
251 ; GFX11-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7
252 ; GFX11-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[COPY19]](s32)
253 ; GFX11-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr8
254 ; GFX11-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[COPY20]](s32)
255 ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
256 ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
257 ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
258 ; GFX11-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC3]](s16), [[TRUNC4]](s16)
259 ; GFX11-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC5]](s16), [[DEF]](s16)
260 ; GFX11-NEXT: [[BUILD_VECTOR6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16)
261 ; GFX11-NEXT: [[BUILD_VECTOR7:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC8]](s16), [[DEF]](s16)
262 ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR6]](<2 x s16>), [[BUILD_VECTOR7]](<2 x s16>)
263 ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.3d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8)
264 ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
265 ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32)
266 ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32)
267 ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32)
268 ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32)
269 ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
271 %v = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, half %s, half %t, half %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
275 declare <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32)
276 declare <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f16(i32, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32)
277 declare <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f16(i32, half, half, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32)