1 ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -stop-after=legalizer -o - %s | FileCheck -check-prefix=GFX10 %s
3 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -stop-after=legalizer -o - %s | FileCheck -check-prefix=GFX11 %s
5 define amdgpu_ps <4 x float> @sample_d_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %drdh, float %dsdv, float %dtdv, float %drdv, float %s, float %t, float %r) {
6 ; GFX10-LABEL: name: sample_d_3d
7 ; GFX10: bb.1.main_body:
8 ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8
10 ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
11 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
12 ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
13 ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
14 ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
15 ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
16 ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
17 ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
18 ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
19 ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
20 ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
21 ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
22 ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
23 ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
24 ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
25 ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
26 ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
27 ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3
28 ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4
29 ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5
30 ; GFX10-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6
31 ; GFX10-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7
32 ; GFX10-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr8
33 ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<9 x s32>) = G_BUILD_VECTOR [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32)
34 ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.3d), 15, [[BUILD_VECTOR2]](<9 x s32>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 8)
35 ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
36 ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32)
37 ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32)
38 ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32)
39 ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32)
40 ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
41 ; GFX11-LABEL: name: sample_d_3d
42 ; GFX11: bb.1.main_body:
43 ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8
45 ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
46 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
47 ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
48 ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
49 ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
50 ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
51 ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
52 ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
53 ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
54 ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
55 ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
56 ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
57 ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
58 ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
59 ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
60 ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
61 ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
62 ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3
63 ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4
64 ; GFX11-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5
65 ; GFX11-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6
66 ; GFX11-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7
67 ; GFX11-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr8
68 ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32)
69 ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.3d), 15, [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[BUILD_VECTOR2]](<5 x s32>), $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 8)
70 ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
71 ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32)
72 ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32)
73 ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32)
74 ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32)
75 ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
77 %v = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f32.f32(i32 15, float %dsdh, float %dtdh, float %drdh, float %dsdv, float %dtdv, float %drdv, float %s, float %t, float %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
81 define amdgpu_ps <4 x float> @sample_c_d_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %drdh, float %dsdv, float %dtdv, float %drdv, float %s, float %t, float %r) {
82 ; GFX10-LABEL: name: sample_c_d_3d
83 ; GFX10: bb.1.main_body:
84 ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9
86 ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
87 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
88 ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
89 ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
90 ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
91 ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
92 ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
93 ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
94 ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
95 ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
96 ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
97 ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
98 ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
99 ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
100 ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
101 ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
102 ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
103 ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3
104 ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4
105 ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5
106 ; GFX10-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6
107 ; GFX10-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7
108 ; GFX10-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr8
109 ; GFX10-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr9
110 ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<10 x s32>) = G_BUILD_VECTOR [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32)
111 ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.3d), 15, [[BUILD_VECTOR2]](<10 x s32>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 8)
112 ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
113 ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32)
114 ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32)
115 ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32)
116 ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32)
117 ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
118 ; GFX11-LABEL: name: sample_c_d_3d
119 ; GFX11: bb.1.main_body:
120 ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9
122 ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
123 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
124 ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
125 ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
126 ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
127 ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
128 ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
129 ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
130 ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
131 ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
132 ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
133 ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
134 ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
135 ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
136 ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
137 ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
138 ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
139 ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3
140 ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4
141 ; GFX11-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5
142 ; GFX11-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6
143 ; GFX11-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7
144 ; GFX11-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr8
145 ; GFX11-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr9
146 ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<6 x s32>) = G_BUILD_VECTOR [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32)
147 ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.3d), 15, [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[BUILD_VECTOR2]](<6 x s32>), $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 8)
148 ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
149 ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32)
150 ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32)
151 ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32)
152 ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32)
153 ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
155 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.3d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dtdh, float %drdh, float %dsdv, float %dtdv, float %drdv, float %s, float %t, float %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
159 define amdgpu_ps <4 x float> @sample_c_d_cl_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %drdh, float %dsdv, float %dtdv, float %drdv, float %s, float %t, float %r, float %clamp) {
160 ; GFX10-LABEL: name: sample_c_d_cl_3d
161 ; GFX10: bb.1.main_body:
162 ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10
164 ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
165 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
166 ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
167 ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
168 ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
169 ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
170 ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
171 ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
172 ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
173 ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
174 ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
175 ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
176 ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
177 ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
178 ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
179 ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
180 ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
181 ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3
182 ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4
183 ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5
184 ; GFX10-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6
185 ; GFX10-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7
186 ; GFX10-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr8
187 ; GFX10-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr9
188 ; GFX10-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr10
189 ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<11 x s32>) = G_BUILD_VECTOR [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32)
190 ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.cl.3d), 15, [[BUILD_VECTOR2]](<11 x s32>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 8)
191 ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
192 ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32)
193 ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32)
194 ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32)
195 ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32)
196 ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
197 ; GFX11-LABEL: name: sample_c_d_cl_3d
198 ; GFX11: bb.1.main_body:
199 ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10
201 ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
202 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
203 ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
204 ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
205 ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
206 ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
207 ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
208 ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
209 ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
210 ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
211 ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
212 ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
213 ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
214 ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
215 ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
216 ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
217 ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
218 ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3
219 ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4
220 ; GFX11-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5
221 ; GFX11-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6
222 ; GFX11-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7
223 ; GFX11-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr8
224 ; GFX11-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr9
225 ; GFX11-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr10
226 ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<7 x s32>) = G_BUILD_VECTOR [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32)
227 ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.cl.3d), 15, [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[BUILD_VECTOR2]](<7 x s32>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 8)
228 ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
229 ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32)
230 ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32)
231 ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32)
232 ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32)
233 ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
235 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.3d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dtdh, float %drdh, float %dsdv, float %dtdv, float %drdv, float %s, float %t, float %r, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
239 define amdgpu_ps <4 x float> @sample_c_d_cl_o_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %drdh, float %dsdv, float %dtdv, float %drdv, float %s, float %t, float %r, float %clamp) {
240 ; GFX10-LABEL: name: sample_c_d_cl_o_3d
241 ; GFX10: bb.1.main_body:
242 ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11
244 ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
245 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
246 ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
247 ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
248 ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
249 ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
250 ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
251 ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
252 ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
253 ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
254 ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
255 ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
256 ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
257 ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
258 ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
259 ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
260 ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
261 ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3
262 ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4
263 ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5
264 ; GFX10-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6
265 ; GFX10-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7
266 ; GFX10-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr8
267 ; GFX10-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr9
268 ; GFX10-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr10
269 ; GFX10-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr11
270 ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<12 x s32>) = G_BUILD_VECTOR [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32)
271 ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.cl.o.3d), 15, [[BUILD_VECTOR2]](<12 x s32>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 8)
272 ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
273 ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32)
274 ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32)
275 ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32)
276 ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32)
277 ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
278 ; GFX11-LABEL: name: sample_c_d_cl_o_3d
279 ; GFX11: bb.1.main_body:
280 ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11
282 ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
283 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
284 ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
285 ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
286 ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
287 ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
288 ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
289 ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
290 ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
291 ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
292 ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
293 ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
294 ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
295 ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
296 ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
297 ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
298 ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
299 ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3
300 ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4
301 ; GFX11-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5
302 ; GFX11-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6
303 ; GFX11-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7
304 ; GFX11-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr8
305 ; GFX11-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr9
306 ; GFX11-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr10
307 ; GFX11-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr11
308 ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32)
309 ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.cl.o.3d), 15, [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[BUILD_VECTOR2]](<8 x s32>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 8)
310 ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
311 ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32)
312 ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32)
313 ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32)
314 ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32)
315 ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
317 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.o.3d.v4f32.f32.f32(i32 15, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %drdh, float %dsdv, float %dtdv, float %drdv, float %s, float %t, float %r, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
321 declare <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f32.f32(i32, float, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32)
322 declare <4 x float> @llvm.amdgcn.image.sample.c.d.3d.v4f32.f32.f32(i32, float, float, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32)
323 declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.3d.v4f32.f32.f32(i32, float, float, float, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32)
324 declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.o.3d.v4f32.f32.f32(i32, i32, float, float, float, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32)