1 ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -stop-after=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s
3 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -stop-after=legalizer -o - %s | FileCheck -check-prefix=GFX10NSA %s
4 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -stop-after=legalizer -o - %s | FileCheck -check-prefix=GFX10NSA %s
6 define amdgpu_ps <4 x float> @load_1d(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
7 ; GFX9-LABEL: name: load_1d
8 ; GFX9: bb.1.main_body:
9 ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
11 ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
12 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
13 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
14 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
15 ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
16 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
17 ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
18 ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
19 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
20 ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
21 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
22 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
23 ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
24 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
25 ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
26 ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
27 ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8)
28 ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
29 ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32)
30 ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32)
31 ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32)
32 ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32)
33 ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
35 ; GFX10NSA-LABEL: name: load_1d
36 ; GFX10NSA: bb.1.main_body:
37 ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
38 ; GFX10NSA-NEXT: {{ $}}
39 ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
40 ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
41 ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
42 ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
43 ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
44 ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
45 ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
46 ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
47 ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
48 ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
49 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
50 ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
51 ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
52 ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
53 ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
54 ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
55 ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8)
56 ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
57 ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32)
58 ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32)
59 ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32)
60 ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32)
61 ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
63 %s = extractelement <2 x i16> %coords, i32 0
64 %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32 15, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
68 define amdgpu_ps <4 x float> @load_2d(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
69 ; GFX9-LABEL: name: load_2d
70 ; GFX9: bb.1.main_body:
71 ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
73 ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
74 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
75 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
76 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
77 ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
78 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
79 ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
80 ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
81 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
82 ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
83 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
84 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
85 ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
86 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
87 ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
88 ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
89 ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
90 ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
91 ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
92 ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8)
93 ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
94 ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32)
95 ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32)
96 ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32)
97 ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32)
98 ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
100 ; GFX10NSA-LABEL: name: load_2d
101 ; GFX10NSA: bb.1.main_body:
102 ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
103 ; GFX10NSA-NEXT: {{ $}}
104 ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
105 ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
106 ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
107 ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
108 ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
109 ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
110 ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
111 ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
112 ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
113 ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
114 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
115 ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
116 ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
117 ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
118 ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
119 ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
120 ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
121 ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
122 ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
123 ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8)
124 ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
125 ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32)
126 ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32)
127 ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32)
128 ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32)
129 ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
131 %s = extractelement <2 x i16> %coords, i32 0
132 %t = extractelement <2 x i16> %coords, i32 1
133 %v = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i16(i32 15, i16 %s, i16 %t, <8 x i32> %rsrc, i32 0, i32 0)
137 define amdgpu_ps <4 x float> @load_3d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
138 ; GFX9-LABEL: name: load_3d
139 ; GFX9: bb.1.main_body:
140 ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
142 ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
143 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
144 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
145 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
146 ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
147 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
148 ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
149 ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
150 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
151 ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
152 ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
153 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
154 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
155 ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
156 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
157 ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
158 ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
159 ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
160 ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
161 ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>)
162 ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
163 ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32)
164 ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
165 ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
166 ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
167 ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
168 ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.3d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8)
169 ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
170 ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32)
171 ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32)
172 ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32)
173 ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32)
174 ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
176 ; GFX10NSA-LABEL: name: load_3d
177 ; GFX10NSA: bb.1.main_body:
178 ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
179 ; GFX10NSA-NEXT: {{ $}}
180 ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
181 ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
182 ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
183 ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
184 ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
185 ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
186 ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
187 ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
188 ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
189 ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
190 ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
191 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
192 ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
193 ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
194 ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
195 ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
196 ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
197 ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
198 ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
199 ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>)
200 ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
201 ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32)
202 ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
203 ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
204 ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
205 ; GFX10NSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
206 ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.3d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8)
207 ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
208 ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32)
209 ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32)
210 ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32)
211 ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32)
212 ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
214 %s = extractelement <2 x i16> %coords_lo, i32 0
215 %t = extractelement <2 x i16> %coords_lo, i32 1
216 %r = extractelement <2 x i16> %coords_hi, i32 0
217 %v = call <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i16(i32 15, i16 %s, i16 %t, i16 %r, <8 x i32> %rsrc, i32 0, i32 0)
221 define amdgpu_ps <4 x float> @load_cube(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
222 ; GFX9-LABEL: name: load_cube
223 ; GFX9: bb.1.main_body:
224 ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
226 ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
227 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
228 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
229 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
230 ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
231 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
232 ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
233 ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
234 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
235 ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
236 ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
237 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
238 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
239 ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
240 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
241 ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
242 ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
243 ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
244 ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
245 ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>)
246 ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
247 ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32)
248 ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
249 ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
250 ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
251 ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
252 ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.cube), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8)
253 ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
254 ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32)
255 ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32)
256 ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32)
257 ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32)
258 ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
260 ; GFX10NSA-LABEL: name: load_cube
261 ; GFX10NSA: bb.1.main_body:
262 ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
263 ; GFX10NSA-NEXT: {{ $}}
264 ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
265 ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
266 ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
267 ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
268 ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
269 ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
270 ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
271 ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
272 ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
273 ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
274 ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
275 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
276 ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
277 ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
278 ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
279 ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
280 ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
281 ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
282 ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
283 ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>)
284 ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
285 ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32)
286 ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
287 ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
288 ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
289 ; GFX10NSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
290 ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.cube), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8)
291 ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
292 ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32)
293 ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32)
294 ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32)
295 ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32)
296 ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
298 %s = extractelement <2 x i16> %coords_lo, i32 0
299 %t = extractelement <2 x i16> %coords_lo, i32 1
300 %slice = extractelement <2 x i16> %coords_hi, i32 0
301 %v = call <4 x float> @llvm.amdgcn.image.load.cube.v4f32.i16(i32 15, i16 %s, i16 %t, i16 %slice, <8 x i32> %rsrc, i32 0, i32 0)
305 define amdgpu_ps <4 x float> @load_1darray(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
306 ; GFX9-LABEL: name: load_1darray
307 ; GFX9: bb.1.main_body:
308 ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
310 ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
311 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
312 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
313 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
314 ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
315 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
316 ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
317 ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
318 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
319 ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
320 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
321 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
322 ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
323 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
324 ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
325 ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
326 ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
327 ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
328 ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
329 ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1darray), 15, [[BUILD_VECTOR1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8)
330 ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
331 ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32)
332 ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32)
333 ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32)
334 ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32)
335 ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
337 ; GFX10NSA-LABEL: name: load_1darray
338 ; GFX10NSA: bb.1.main_body:
339 ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
340 ; GFX10NSA-NEXT: {{ $}}
341 ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
342 ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
343 ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
344 ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
345 ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
346 ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
347 ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
348 ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
349 ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
350 ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
351 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
352 ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
353 ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
354 ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
355 ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
356 ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
357 ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
358 ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
359 ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
360 ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1darray), 15, [[BUILD_VECTOR1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8)
361 ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
362 ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32)
363 ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32)
364 ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32)
365 ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32)
366 ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
368 %s = extractelement <2 x i16> %coords, i32 0
369 %slice = extractelement <2 x i16> %coords, i32 1
370 %v = call <4 x float> @llvm.amdgcn.image.load.1darray.v4f32.i16(i32 15, i16 %s, i16 %slice, <8 x i32> %rsrc, i32 0, i32 0)
374 define amdgpu_ps <4 x float> @load_2darray(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
375 ; GFX9-LABEL: name: load_2darray
376 ; GFX9: bb.1.main_body:
377 ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
379 ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
380 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
381 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
382 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
383 ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
384 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
385 ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
386 ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
387 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
388 ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
389 ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
390 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
391 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
392 ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
393 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
394 ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
395 ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
396 ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
397 ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
398 ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>)
399 ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
400 ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32)
401 ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
402 ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
403 ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
404 ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
405 ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darray), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8)
406 ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
407 ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32)
408 ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32)
409 ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32)
410 ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32)
411 ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
413 ; GFX10NSA-LABEL: name: load_2darray
414 ; GFX10NSA: bb.1.main_body:
415 ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
416 ; GFX10NSA-NEXT: {{ $}}
417 ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
418 ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
419 ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
420 ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
421 ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
422 ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
423 ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
424 ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
425 ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
426 ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
427 ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
428 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
429 ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
430 ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
431 ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
432 ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
433 ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
434 ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
435 ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
436 ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>)
437 ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
438 ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32)
439 ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
440 ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
441 ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
442 ; GFX10NSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
443 ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darray), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8)
444 ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
445 ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32)
446 ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32)
447 ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32)
448 ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32)
449 ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
451 %s = extractelement <2 x i16> %coords_lo, i32 0
452 %t = extractelement <2 x i16> %coords_lo, i32 1
453 %slice = extractelement <2 x i16> %coords_hi, i32 0
454 %v = call <4 x float> @llvm.amdgcn.image.load.2darray.v4f32.i16(i32 15, i16 %s, i16 %t, i16 %slice, <8 x i32> %rsrc, i32 0, i32 0)
458 define amdgpu_ps <4 x float> @load_2dmsaa(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
459 ; GFX9-LABEL: name: load_2dmsaa
460 ; GFX9: bb.1.main_body:
461 ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
463 ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
464 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
465 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
466 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
467 ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
468 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
469 ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
470 ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
471 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
472 ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
473 ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
474 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
475 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
476 ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
477 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
478 ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
479 ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
480 ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
481 ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
482 ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>)
483 ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
484 ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32)
485 ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
486 ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
487 ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
488 ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
489 ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2dmsaa), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8)
490 ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
491 ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32)
492 ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32)
493 ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32)
494 ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32)
495 ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
497 ; GFX10NSA-LABEL: name: load_2dmsaa
498 ; GFX10NSA: bb.1.main_body:
499 ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
500 ; GFX10NSA-NEXT: {{ $}}
501 ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
502 ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
503 ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
504 ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
505 ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
506 ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
507 ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
508 ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
509 ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
510 ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
511 ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
512 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
513 ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
514 ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
515 ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
516 ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
517 ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
518 ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
519 ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
520 ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>)
521 ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
522 ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32)
523 ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
524 ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
525 ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
526 ; GFX10NSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
527 ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2dmsaa), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8)
528 ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
529 ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32)
530 ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32)
531 ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32)
532 ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32)
533 ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
535 %s = extractelement <2 x i16> %coords_lo, i32 0
536 %t = extractelement <2 x i16> %coords_lo, i32 1
537 %fragid = extractelement <2 x i16> %coords_hi, i32 0
538 %v = call <4 x float> @llvm.amdgcn.image.load.2dmsaa.v4f32.i16(i32 15, i16 %s, i16 %t, i16 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
542 define amdgpu_ps <4 x float> @load_2darraymsaa(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
543 ; GFX9-LABEL: name: load_2darraymsaa
544 ; GFX9: bb.1.main_body:
545 ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
547 ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
548 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
549 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
550 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
551 ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
552 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
553 ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
554 ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
555 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
556 ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
557 ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
558 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
559 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
560 ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
561 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
562 ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
563 ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
564 ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
565 ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
566 ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>)
567 ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
568 ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32)
569 ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>)
570 ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32)
571 ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32)
572 ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
573 ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
574 ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
575 ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darraymsaa), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8)
576 ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
577 ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32)
578 ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32)
579 ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32)
580 ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32)
581 ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
583 ; GFX10NSA-LABEL: name: load_2darraymsaa
584 ; GFX10NSA: bb.1.main_body:
585 ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
586 ; GFX10NSA-NEXT: {{ $}}
587 ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
588 ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
589 ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
590 ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
591 ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
592 ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
593 ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
594 ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
595 ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
596 ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
597 ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
598 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
599 ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
600 ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
601 ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
602 ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
603 ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
604 ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
605 ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
606 ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>)
607 ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
608 ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32)
609 ; GFX10NSA-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>)
610 ; GFX10NSA-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32)
611 ; GFX10NSA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32)
612 ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
613 ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
614 ; GFX10NSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
615 ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darraymsaa), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8)
616 ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
617 ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32)
618 ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32)
619 ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32)
620 ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32)
621 ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
623 %s = extractelement <2 x i16> %coords_lo, i32 0
624 %t = extractelement <2 x i16> %coords_lo, i32 1
625 %slice = extractelement <2 x i16> %coords_hi, i32 0
626 %fragid = extractelement <2 x i16> %coords_hi, i32 1
627 %v = call <4 x float> @llvm.amdgcn.image.load.2darraymsaa.v4f32.i16(i32 15, i16 %s, i16 %t, i16 %slice, i16 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
631 define amdgpu_ps <4 x float> @load_mip_1d(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
632 ; GFX9-LABEL: name: load_mip_1d
633 ; GFX9: bb.1.main_body:
634 ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
636 ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
637 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
638 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
639 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
640 ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
641 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
642 ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
643 ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
644 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
645 ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
646 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
647 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
648 ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
649 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
650 ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
651 ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
652 ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
653 ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
654 ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
655 ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.1d), 15, [[BUILD_VECTOR1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8)
656 ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
657 ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32)
658 ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32)
659 ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32)
660 ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32)
661 ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
663 ; GFX10NSA-LABEL: name: load_mip_1d
664 ; GFX10NSA: bb.1.main_body:
665 ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
666 ; GFX10NSA-NEXT: {{ $}}
667 ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
668 ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
669 ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
670 ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
671 ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
672 ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
673 ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
674 ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
675 ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
676 ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
677 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
678 ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
679 ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
680 ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
681 ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
682 ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
683 ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
684 ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
685 ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
686 ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.1d), 15, [[BUILD_VECTOR1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8)
687 ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
688 ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32)
689 ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32)
690 ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32)
691 ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32)
692 ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
694 %s = extractelement <2 x i16> %coords, i32 0
695 %mip = extractelement <2 x i16> %coords, i32 1
696 %v = call <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i16(i32 15, i16 %s, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
700 define amdgpu_ps <4 x float> @load_mip_2d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
701 ; GFX9-LABEL: name: load_mip_2d
702 ; GFX9: bb.1.main_body:
703 ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
705 ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
706 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
707 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
708 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
709 ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
710 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
711 ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
712 ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
713 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
714 ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
715 ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
716 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
717 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
718 ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
719 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
720 ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
721 ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
722 ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
723 ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
724 ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>)
725 ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
726 ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32)
727 ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
728 ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
729 ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
730 ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
731 ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.2d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8)
732 ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
733 ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32)
734 ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32)
735 ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32)
736 ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32)
737 ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
739 ; GFX10NSA-LABEL: name: load_mip_2d
740 ; GFX10NSA: bb.1.main_body:
741 ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
742 ; GFX10NSA-NEXT: {{ $}}
743 ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
744 ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
745 ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
746 ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
747 ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
748 ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
749 ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
750 ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
751 ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
752 ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
753 ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
754 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
755 ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
756 ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
757 ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
758 ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
759 ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
760 ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
761 ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
762 ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>)
763 ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
764 ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32)
765 ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
766 ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
767 ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
768 ; GFX10NSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
769 ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.2d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8)
770 ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
771 ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32)
772 ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32)
773 ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32)
774 ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32)
775 ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
777 %s = extractelement <2 x i16> %coords_lo, i32 0
778 %t = extractelement <2 x i16> %coords_lo, i32 1
779 %mip = extractelement <2 x i16> %coords_hi, i32 0
780 %v = call <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i16(i32 15, i16 %s, i16 %t, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
784 define amdgpu_ps <4 x float> @load_mip_3d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
785 ; GFX9-LABEL: name: load_mip_3d
786 ; GFX9: bb.1.main_body:
787 ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
789 ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
790 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
791 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
792 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
793 ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
794 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
795 ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
796 ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
797 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
798 ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
799 ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
800 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
801 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
802 ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
803 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
804 ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
805 ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
806 ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
807 ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
808 ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>)
809 ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
810 ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32)
811 ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>)
812 ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32)
813 ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32)
814 ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
815 ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
816 ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
817 ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.3d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8)
818 ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
819 ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32)
820 ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32)
821 ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32)
822 ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32)
823 ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
825 ; GFX10NSA-LABEL: name: load_mip_3d
826 ; GFX10NSA: bb.1.main_body:
827 ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
828 ; GFX10NSA-NEXT: {{ $}}
829 ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
830 ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
831 ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
832 ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
833 ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
834 ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
835 ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
836 ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
837 ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
838 ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
839 ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
840 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
841 ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
842 ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
843 ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
844 ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
845 ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
846 ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
847 ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
848 ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>)
849 ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
850 ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32)
851 ; GFX10NSA-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>)
852 ; GFX10NSA-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32)
853 ; GFX10NSA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32)
854 ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
855 ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
856 ; GFX10NSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
857 ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.3d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8)
858 ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
859 ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32)
860 ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32)
861 ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32)
862 ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32)
863 ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
865 %s = extractelement <2 x i16> %coords_lo, i32 0
866 %t = extractelement <2 x i16> %coords_lo, i32 1
867 %r = extractelement <2 x i16> %coords_hi, i32 0
868 %mip = extractelement <2 x i16> %coords_hi, i32 1
869 %v = call <4 x float> @llvm.amdgcn.image.load.mip.3d.v4f32.i16(i32 15, i16 %s, i16 %t, i16 %r, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
873 define amdgpu_ps <4 x float> @load_mip_cube(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
874 ; GFX9-LABEL: name: load_mip_cube
875 ; GFX9: bb.1.main_body:
876 ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
878 ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
879 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
880 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
881 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
882 ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
883 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
884 ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
885 ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
886 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
887 ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
888 ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
889 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
890 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
891 ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
892 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
893 ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
894 ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
895 ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
896 ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
897 ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>)
898 ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
899 ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32)
900 ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>)
901 ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32)
902 ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32)
903 ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
904 ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
905 ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
906 ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.cube), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8)
907 ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
908 ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32)
909 ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32)
910 ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32)
911 ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32)
912 ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
914 ; GFX10NSA-LABEL: name: load_mip_cube
915 ; GFX10NSA: bb.1.main_body:
916 ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
917 ; GFX10NSA-NEXT: {{ $}}
918 ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
919 ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
920 ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
921 ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
922 ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
923 ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
924 ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
925 ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
926 ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
927 ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
928 ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
929 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
930 ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
931 ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
932 ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
933 ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
934 ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
935 ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
936 ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
937 ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>)
938 ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
939 ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32)
940 ; GFX10NSA-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>)
941 ; GFX10NSA-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32)
942 ; GFX10NSA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32)
943 ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
944 ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
945 ; GFX10NSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
946 ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.cube), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8)
947 ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
948 ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32)
949 ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32)
950 ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32)
951 ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32)
952 ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
954 %s = extractelement <2 x i16> %coords_lo, i32 0
955 %t = extractelement <2 x i16> %coords_lo, i32 1
956 %slice = extractelement <2 x i16> %coords_hi, i32 0
957 %mip = extractelement <2 x i16> %coords_hi, i32 1
958 %v = call <4 x float> @llvm.amdgcn.image.load.mip.cube.v4f32.i16(i32 15, i16 %s, i16 %t, i16 %slice, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
962 define amdgpu_ps <4 x float> @load_mip_1darray(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
963 ; GFX9-LABEL: name: load_mip_1darray
964 ; GFX9: bb.1.main_body:
965 ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
967 ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
968 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
969 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
970 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
971 ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
972 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
973 ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
974 ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
975 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
976 ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
977 ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
978 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
979 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
980 ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
981 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
982 ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
983 ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
984 ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
985 ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
986 ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>)
987 ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
988 ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32)
989 ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
990 ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
991 ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
992 ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
993 ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.1darray), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8)
994 ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
995 ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32)
996 ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32)
997 ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32)
998 ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32)
999 ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
1001 ; GFX10NSA-LABEL: name: load_mip_1darray
1002 ; GFX10NSA: bb.1.main_body:
1003 ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
1004 ; GFX10NSA-NEXT: {{ $}}
1005 ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1006 ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1007 ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1008 ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1009 ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1010 ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1011 ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1012 ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1013 ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1014 ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
1015 ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
1016 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1017 ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
1018 ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
1019 ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
1020 ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
1021 ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1022 ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
1023 ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
1024 ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>)
1025 ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
1026 ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32)
1027 ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
1028 ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
1029 ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
1030 ; GFX10NSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
1031 ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.1darray), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8)
1032 ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
1033 ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32)
1034 ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32)
1035 ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32)
1036 ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32)
1037 ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
1039 %s = extractelement <2 x i16> %coords_lo, i32 0
1040 %slice = extractelement <2 x i16> %coords_lo, i32 1
1041 %mip = extractelement <2 x i16> %coords_hi, i32 0
1042 %v = call <4 x float> @llvm.amdgcn.image.load.mip.1darray.v4f32.i16(i32 15, i16 %s, i16 %slice, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
1046 define amdgpu_ps <4 x float> @load_mip_2darray(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
1047 ; GFX9-LABEL: name: load_mip_2darray
1048 ; GFX9: bb.1.main_body:
1049 ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
1051 ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1052 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1053 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1054 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1055 ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1056 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1057 ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1058 ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1059 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1060 ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
1061 ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
1062 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1063 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
1064 ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
1065 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
1066 ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
1067 ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1068 ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
1069 ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
1070 ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>)
1071 ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
1072 ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32)
1073 ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>)
1074 ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32)
1075 ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32)
1076 ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
1077 ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
1078 ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
1079 ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.2darray), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8)
1080 ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
1081 ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32)
1082 ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32)
1083 ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32)
1084 ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32)
1085 ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
1087 ; GFX10NSA-LABEL: name: load_mip_2darray
1088 ; GFX10NSA: bb.1.main_body:
1089 ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
1090 ; GFX10NSA-NEXT: {{ $}}
1091 ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1092 ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1093 ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1094 ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1095 ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1096 ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1097 ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1098 ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1099 ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1100 ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
1101 ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
1102 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1103 ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
1104 ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
1105 ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
1106 ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
1107 ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1108 ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
1109 ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
1110 ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>)
1111 ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
1112 ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32)
1113 ; GFX10NSA-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>)
1114 ; GFX10NSA-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32)
1115 ; GFX10NSA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32)
1116 ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
1117 ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
1118 ; GFX10NSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
1119 ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.2darray), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8)
1120 ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
1121 ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32)
1122 ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32)
1123 ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32)
1124 ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32)
1125 ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
1127 %s = extractelement <2 x i16> %coords_lo, i32 0
1128 %t = extractelement <2 x i16> %coords_lo, i32 1
1129 %slice = extractelement <2 x i16> %coords_hi, i32 0
1130 %mip = extractelement <2 x i16> %coords_hi, i32 1
1131 %v = call <4 x float> @llvm.amdgcn.image.load.mip.2darray.v4f32.i16(i32 15, i16 %s, i16 %t, i16 %slice, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
1135 define amdgpu_ps void @store_1d(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords) {
1136 ; GFX9-LABEL: name: store_1d
1137 ; GFX9: bb.1.main_body:
1138 ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
1140 ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1141 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1142 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1143 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1144 ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1145 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1146 ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1147 ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1148 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1149 ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1150 ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1151 ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1152 ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
1153 ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
1154 ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
1155 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1156 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1157 ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
1158 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
1159 ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
1160 ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
1161 ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>), addrspace 8)
1162 ; GFX9-NEXT: S_ENDPGM 0
1164 ; GFX10NSA-LABEL: name: store_1d
1165 ; GFX10NSA: bb.1.main_body:
1166 ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
1167 ; GFX10NSA-NEXT: {{ $}}
1168 ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1169 ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1170 ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1171 ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1172 ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1173 ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1174 ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1175 ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1176 ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1177 ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1178 ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1179 ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1180 ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
1181 ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
1182 ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
1183 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1184 ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1185 ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
1186 ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
1187 ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
1188 ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
1189 ; GFX10NSA-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (dereferenceable store (<4 x s32>), addrspace 8)
1190 ; GFX10NSA-NEXT: S_ENDPGM 0
1192 %s = extractelement <2 x i16> %coords, i32 0
1193 call void @llvm.amdgcn.image.store.1d.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
1197 define amdgpu_ps void @store_2d(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords) {
1198 ; GFX9-LABEL: name: store_2d
1199 ; GFX9: bb.1.main_body:
1200 ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
1202 ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1203 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1204 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1205 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1206 ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1207 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1208 ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1209 ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1210 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1211 ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1212 ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1213 ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1214 ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
1215 ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
1216 ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
1217 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1218 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1219 ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
1220 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
1221 ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1222 ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1223 ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
1224 ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
1225 ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
1226 ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>), addrspace 8)
1227 ; GFX9-NEXT: S_ENDPGM 0
1229 ; GFX10NSA-LABEL: name: store_2d
1230 ; GFX10NSA: bb.1.main_body:
1231 ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
1232 ; GFX10NSA-NEXT: {{ $}}
1233 ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1234 ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1235 ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1236 ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1237 ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1238 ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1239 ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1240 ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1241 ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1242 ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1243 ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1244 ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1245 ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
1246 ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
1247 ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
1248 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1249 ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1250 ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
1251 ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
1252 ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1253 ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1254 ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
1255 ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
1256 ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
1257 ; GFX10NSA-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (dereferenceable store (<4 x s32>), addrspace 8)
1258 ; GFX10NSA-NEXT: S_ENDPGM 0
1260 %s = extractelement <2 x i16> %coords, i32 0
1261 %t = extractelement <2 x i16> %coords, i32 1
1262 call void @llvm.amdgcn.image.store.2d.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %t, <8 x i32> %rsrc, i32 0, i32 0)
1266 define amdgpu_ps void @store_3d(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
1267 ; GFX9-LABEL: name: store_3d
1268 ; GFX9: bb.1.main_body:
1269 ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
1271 ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1272 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1273 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1274 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1275 ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1276 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1277 ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1278 ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1279 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1280 ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1281 ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1282 ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1283 ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
1284 ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
1285 ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
1286 ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5
1287 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1288 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1289 ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
1290 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
1291 ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1292 ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1293 ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
1294 ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
1295 ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>)
1296 ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
1297 ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32)
1298 ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
1299 ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
1300 ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
1301 ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>)
1302 ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.3d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>), addrspace 8)
1303 ; GFX9-NEXT: S_ENDPGM 0
1305 ; GFX10NSA-LABEL: name: store_3d
1306 ; GFX10NSA: bb.1.main_body:
1307 ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
1308 ; GFX10NSA-NEXT: {{ $}}
1309 ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1310 ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1311 ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1312 ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1313 ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1314 ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1315 ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1316 ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1317 ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1318 ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1319 ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1320 ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1321 ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
1322 ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
1323 ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
1324 ; GFX10NSA-NEXT: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5
1325 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1326 ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1327 ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
1328 ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
1329 ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1330 ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1331 ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
1332 ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
1333 ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>)
1334 ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
1335 ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32)
1336 ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
1337 ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
1338 ; GFX10NSA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
1339 ; GFX10NSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>)
1340 ; GFX10NSA-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.3d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (dereferenceable store (<4 x s32>), addrspace 8)
1341 ; GFX10NSA-NEXT: S_ENDPGM 0
1343 %s = extractelement <2 x i16> %coords_lo, i32 0
1344 %t = extractelement <2 x i16> %coords_lo, i32 1
1345 %r = extractelement <2 x i16> %coords_hi, i32 0
1346 call void @llvm.amdgcn.image.store.3d.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %t, i16 %r, <8 x i32> %rsrc, i32 0, i32 0)
1350 define amdgpu_ps void @store_cube(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
1351 ; GFX9-LABEL: name: store_cube
1352 ; GFX9: bb.1.main_body:
1353 ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
1355 ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1356 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1357 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1358 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1359 ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1360 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1361 ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1362 ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1363 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1364 ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1365 ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1366 ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1367 ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
1368 ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
1369 ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
1370 ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5
1371 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1372 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1373 ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
1374 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
1375 ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1376 ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1377 ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
1378 ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
1379 ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>)
1380 ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
1381 ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32)
1382 ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
1383 ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
1384 ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
1385 ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>)
1386 ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.cube), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>), addrspace 8)
1387 ; GFX9-NEXT: S_ENDPGM 0
1389 ; GFX10NSA-LABEL: name: store_cube
1390 ; GFX10NSA: bb.1.main_body:
1391 ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
1392 ; GFX10NSA-NEXT: {{ $}}
1393 ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1394 ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1395 ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1396 ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1397 ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1398 ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1399 ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1400 ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1401 ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1402 ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1403 ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1404 ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1405 ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
1406 ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
1407 ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
1408 ; GFX10NSA-NEXT: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5
1409 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1410 ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1411 ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
1412 ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
1413 ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1414 ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1415 ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
1416 ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
1417 ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>)
1418 ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
1419 ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32)
1420 ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
1421 ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
1422 ; GFX10NSA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
1423 ; GFX10NSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>)
1424 ; GFX10NSA-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.cube), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (dereferenceable store (<4 x s32>), addrspace 8)
1425 ; GFX10NSA-NEXT: S_ENDPGM 0
1427 %s = extractelement <2 x i16> %coords_lo, i32 0
1428 %t = extractelement <2 x i16> %coords_lo, i32 1
1429 %slice = extractelement <2 x i16> %coords_hi, i32 0
1430 call void @llvm.amdgcn.image.store.cube.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %t, i16 %slice, <8 x i32> %rsrc, i32 0, i32 0)
1434 define amdgpu_ps void @store_1darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords) {
1435 ; GFX9-LABEL: name: store_1darray
1436 ; GFX9: bb.1.main_body:
1437 ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
1439 ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1440 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1441 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1442 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1443 ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1444 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1445 ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1446 ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1447 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1448 ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1449 ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1450 ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1451 ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
1452 ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
1453 ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
1454 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1455 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1456 ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
1457 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
1458 ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1459 ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1460 ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
1461 ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
1462 ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
1463 ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>), addrspace 8)
1464 ; GFX9-NEXT: S_ENDPGM 0
1466 ; GFX10NSA-LABEL: name: store_1darray
1467 ; GFX10NSA: bb.1.main_body:
1468 ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
1469 ; GFX10NSA-NEXT: {{ $}}
1470 ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1471 ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1472 ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1473 ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1474 ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1475 ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1476 ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1477 ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1478 ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1479 ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1480 ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1481 ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1482 ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
1483 ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
1484 ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
1485 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1486 ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1487 ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
1488 ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
1489 ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1490 ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1491 ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
1492 ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
1493 ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
1494 ; GFX10NSA-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (dereferenceable store (<4 x s32>), addrspace 8)
1495 ; GFX10NSA-NEXT: S_ENDPGM 0
1497 %s = extractelement <2 x i16> %coords, i32 0
1498 %slice = extractelement <2 x i16> %coords, i32 1
1499 call void @llvm.amdgcn.image.store.1darray.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %slice, <8 x i32> %rsrc, i32 0, i32 0)
1503 define amdgpu_ps void @store_2darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
1504 ; GFX9-LABEL: name: store_2darray
1505 ; GFX9: bb.1.main_body:
1506 ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
1508 ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1509 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1510 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1511 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1512 ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1513 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1514 ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1515 ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1516 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1517 ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1518 ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1519 ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1520 ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
1521 ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
1522 ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
1523 ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5
1524 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1525 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1526 ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
1527 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
1528 ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1529 ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1530 ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
1531 ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
1532 ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>)
1533 ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
1534 ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32)
1535 ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
1536 ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
1537 ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
1538 ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>)
1539 ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>), addrspace 8)
1540 ; GFX9-NEXT: S_ENDPGM 0
1542 ; GFX10NSA-LABEL: name: store_2darray
1543 ; GFX10NSA: bb.1.main_body:
1544 ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
1545 ; GFX10NSA-NEXT: {{ $}}
1546 ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1547 ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1548 ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1549 ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1550 ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1551 ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1552 ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1553 ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1554 ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1555 ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1556 ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1557 ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1558 ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
1559 ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
1560 ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
1561 ; GFX10NSA-NEXT: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5
1562 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1563 ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1564 ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
1565 ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
1566 ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1567 ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1568 ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
1569 ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
1570 ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>)
1571 ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
1572 ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32)
1573 ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
1574 ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
1575 ; GFX10NSA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
1576 ; GFX10NSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>)
1577 ; GFX10NSA-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (dereferenceable store (<4 x s32>), addrspace 8)
1578 ; GFX10NSA-NEXT: S_ENDPGM 0
1580 %s = extractelement <2 x i16> %coords_lo, i32 0
1581 %t = extractelement <2 x i16> %coords_lo, i32 1
1582 %slice = extractelement <2 x i16> %coords_hi, i32 0
1583 call void @llvm.amdgcn.image.store.2darray.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %t, i16 %slice, <8 x i32> %rsrc, i32 0, i32 0)
1587 define amdgpu_ps void @store_2dmsaa(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
1588 ; GFX9-LABEL: name: store_2dmsaa
1589 ; GFX9: bb.1.main_body:
1590 ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
1592 ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1593 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1594 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1595 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1596 ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1597 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1598 ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1599 ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1600 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1601 ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1602 ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1603 ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1604 ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
1605 ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
1606 ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
1607 ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5
1608 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1609 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1610 ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
1611 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
1612 ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1613 ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1614 ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
1615 ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
1616 ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>)
1617 ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
1618 ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32)
1619 ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
1620 ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
1621 ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
1622 ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>)
1623 ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2dmsaa), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>), addrspace 8)
1624 ; GFX9-NEXT: S_ENDPGM 0
1626 ; GFX10NSA-LABEL: name: store_2dmsaa
1627 ; GFX10NSA: bb.1.main_body:
1628 ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
1629 ; GFX10NSA-NEXT: {{ $}}
1630 ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1631 ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1632 ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1633 ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1634 ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1635 ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1636 ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1637 ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1638 ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1639 ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1640 ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1641 ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1642 ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
1643 ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
1644 ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
1645 ; GFX10NSA-NEXT: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5
1646 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1647 ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1648 ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
1649 ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
1650 ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1651 ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1652 ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
1653 ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
1654 ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>)
1655 ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
1656 ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32)
1657 ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
1658 ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
1659 ; GFX10NSA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
1660 ; GFX10NSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>)
1661 ; GFX10NSA-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2dmsaa), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (dereferenceable store (<4 x s32>), addrspace 8)
1662 ; GFX10NSA-NEXT: S_ENDPGM 0
1664 %s = extractelement <2 x i16> %coords_lo, i32 0
1665 %t = extractelement <2 x i16> %coords_lo, i32 1
1666 %fragid = extractelement <2 x i16> %coords_hi, i32 0
1667 call void @llvm.amdgcn.image.store.2dmsaa.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %t, i16 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
1671 define amdgpu_ps void @store_2darraymsaa(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
1672 ; GFX9-LABEL: name: store_2darraymsaa
1673 ; GFX9: bb.1.main_body:
1674 ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
1676 ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1677 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1678 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1679 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1680 ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1681 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1682 ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1683 ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1684 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1685 ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1686 ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1687 ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1688 ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
1689 ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
1690 ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
1691 ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5
1692 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1693 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1694 ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
1695 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
1696 ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1697 ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1698 ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
1699 ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
1700 ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>)
1701 ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
1702 ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32)
1703 ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>)
1704 ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32)
1705 ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32)
1706 ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
1707 ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
1708 ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>)
1709 ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2darraymsaa), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>), addrspace 8)
1710 ; GFX9-NEXT: S_ENDPGM 0
1712 ; GFX10NSA-LABEL: name: store_2darraymsaa
1713 ; GFX10NSA: bb.1.main_body:
1714 ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
1715 ; GFX10NSA-NEXT: {{ $}}
1716 ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1717 ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1718 ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1719 ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1720 ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1721 ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1722 ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1723 ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1724 ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1725 ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1726 ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1727 ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1728 ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
1729 ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
1730 ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
1731 ; GFX10NSA-NEXT: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5
1732 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1733 ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1734 ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
1735 ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
1736 ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1737 ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1738 ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
1739 ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
1740 ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>)
1741 ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
1742 ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32)
1743 ; GFX10NSA-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>)
1744 ; GFX10NSA-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32)
1745 ; GFX10NSA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32)
1746 ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
1747 ; GFX10NSA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
1748 ; GFX10NSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>)
1749 ; GFX10NSA-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2darraymsaa), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (dereferenceable store (<4 x s32>), addrspace 8)
1750 ; GFX10NSA-NEXT: S_ENDPGM 0
1752 %s = extractelement <2 x i16> %coords_lo, i32 0
1753 %t = extractelement <2 x i16> %coords_lo, i32 1
1754 %slice = extractelement <2 x i16> %coords_hi, i32 0
1755 %fragid = extractelement <2 x i16> %coords_hi, i32 1
1756 call void @llvm.amdgcn.image.store.2darraymsaa.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %t, i16 %slice, i16 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
1760 define amdgpu_ps void @store_mip_1d(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords) {
1761 ; GFX9-LABEL: name: store_mip_1d
1762 ; GFX9: bb.1.main_body:
1763 ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
1765 ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1766 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1767 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1768 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1769 ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1770 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1771 ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1772 ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1773 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1774 ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1775 ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1776 ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1777 ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
1778 ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
1779 ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
1780 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1781 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1782 ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
1783 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
1784 ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1785 ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1786 ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
1787 ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
1788 ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
1789 ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>), addrspace 8)
1790 ; GFX9-NEXT: S_ENDPGM 0
1792 ; GFX10NSA-LABEL: name: store_mip_1d
1793 ; GFX10NSA: bb.1.main_body:
1794 ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
1795 ; GFX10NSA-NEXT: {{ $}}
1796 ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1797 ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1798 ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1799 ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1800 ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1801 ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1802 ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1803 ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1804 ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1805 ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1806 ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1807 ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1808 ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
1809 ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
1810 ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
1811 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1812 ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1813 ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
1814 ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
1815 ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1816 ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1817 ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
1818 ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
1819 ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
1820 ; GFX10NSA-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (dereferenceable store (<4 x s32>), addrspace 8)
1821 ; GFX10NSA-NEXT: S_ENDPGM 0
1823 %s = extractelement <2 x i16> %coords, i32 0
1824 %mip = extractelement <2 x i16> %coords, i32 1
1825 call void @llvm.amdgcn.image.store.mip.1d.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
1829 define amdgpu_ps void @store_mip_2d(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
1830 ; GFX9-LABEL: name: store_mip_2d
1831 ; GFX9: bb.1.main_body:
1832 ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
1834 ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1835 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1836 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1837 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1838 ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1839 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1840 ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1841 ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1842 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1843 ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1844 ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1845 ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1846 ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
1847 ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
1848 ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
1849 ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5
1850 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1851 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1852 ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
1853 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
1854 ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1855 ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1856 ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
1857 ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
1858 ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>)
1859 ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
1860 ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32)
1861 ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
1862 ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
1863 ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
1864 ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>)
1865 ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.2d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>), addrspace 8)
1866 ; GFX9-NEXT: S_ENDPGM 0
1868 ; GFX10NSA-LABEL: name: store_mip_2d
1869 ; GFX10NSA: bb.1.main_body:
1870 ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
1871 ; GFX10NSA-NEXT: {{ $}}
1872 ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1873 ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1874 ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1875 ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1876 ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1877 ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1878 ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1879 ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1880 ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1881 ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1882 ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1883 ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1884 ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
1885 ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
1886 ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
1887 ; GFX10NSA-NEXT: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5
1888 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1889 ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1890 ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
1891 ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
1892 ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1893 ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1894 ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
1895 ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
1896 ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>)
1897 ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
1898 ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32)
1899 ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
1900 ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
1901 ; GFX10NSA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
1902 ; GFX10NSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>)
1903 ; GFX10NSA-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.2d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (dereferenceable store (<4 x s32>), addrspace 8)
1904 ; GFX10NSA-NEXT: S_ENDPGM 0
1906 %s = extractelement <2 x i16> %coords_lo, i32 0
1907 %t = extractelement <2 x i16> %coords_lo, i32 1
1908 %mip = extractelement <2 x i16> %coords_hi, i32 0
1909 call void @llvm.amdgcn.image.store.mip.2d.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %t, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
1913 define amdgpu_ps void @store_mip_3d(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
1914 ; GFX9-LABEL: name: store_mip_3d
1915 ; GFX9: bb.1.main_body:
1916 ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
1918 ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1919 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1920 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1921 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1922 ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1923 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1924 ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1925 ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1926 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1927 ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1928 ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1929 ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1930 ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
1931 ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
1932 ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
1933 ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5
1934 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1935 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1936 ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
1937 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
1938 ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1939 ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1940 ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
1941 ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
1942 ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>)
1943 ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
1944 ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32)
1945 ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>)
1946 ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32)
1947 ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32)
1948 ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
1949 ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
1950 ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>)
1951 ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.3d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>), addrspace 8)
1952 ; GFX9-NEXT: S_ENDPGM 0
1954 ; GFX10NSA-LABEL: name: store_mip_3d
1955 ; GFX10NSA: bb.1.main_body:
1956 ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
1957 ; GFX10NSA-NEXT: {{ $}}
1958 ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1959 ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1960 ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1961 ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1962 ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1963 ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1964 ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1965 ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1966 ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1967 ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1968 ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1969 ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
1970 ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
1971 ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
1972 ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
1973 ; GFX10NSA-NEXT: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5
1974 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1975 ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1976 ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
1977 ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
1978 ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
1979 ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1980 ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
1981 ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
1982 ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>)
1983 ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
1984 ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32)
1985 ; GFX10NSA-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>)
1986 ; GFX10NSA-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32)
1987 ; GFX10NSA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32)
1988 ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
1989 ; GFX10NSA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
1990 ; GFX10NSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>)
1991 ; GFX10NSA-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.3d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (dereferenceable store (<4 x s32>), addrspace 8)
1992 ; GFX10NSA-NEXT: S_ENDPGM 0
1994 %s = extractelement <2 x i16> %coords_lo, i32 0
1995 %t = extractelement <2 x i16> %coords_lo, i32 1
1996 %r = extractelement <2 x i16> %coords_hi, i32 0
1997 %mip = extractelement <2 x i16> %coords_hi, i32 1
1998 call void @llvm.amdgcn.image.store.mip.3d.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %t, i16 %r, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
2002 define amdgpu_ps void @store_mip_cube(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
2003 ; GFX9-LABEL: name: store_mip_cube
2004 ; GFX9: bb.1.main_body:
2005 ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
2007 ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
2008 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
2009 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
2010 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
2011 ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
2012 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
2013 ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
2014 ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
2015 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
2016 ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
2017 ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
2018 ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
2019 ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
2020 ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
2021 ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
2022 ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5
2023 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
2024 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
2025 ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
2026 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
2027 ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
2028 ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
2029 ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
2030 ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
2031 ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>)
2032 ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
2033 ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32)
2034 ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>)
2035 ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32)
2036 ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32)
2037 ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
2038 ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
2039 ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>)
2040 ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.cube), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>), addrspace 8)
2041 ; GFX9-NEXT: S_ENDPGM 0
2043 ; GFX10NSA-LABEL: name: store_mip_cube
2044 ; GFX10NSA: bb.1.main_body:
2045 ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
2046 ; GFX10NSA-NEXT: {{ $}}
2047 ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
2048 ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
2049 ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
2050 ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
2051 ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
2052 ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
2053 ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
2054 ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
2055 ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
2056 ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
2057 ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
2058 ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
2059 ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
2060 ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
2061 ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
2062 ; GFX10NSA-NEXT: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5
2063 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
2064 ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
2065 ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
2066 ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
2067 ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
2068 ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
2069 ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
2070 ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
2071 ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>)
2072 ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
2073 ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32)
2074 ; GFX10NSA-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>)
2075 ; GFX10NSA-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32)
2076 ; GFX10NSA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32)
2077 ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
2078 ; GFX10NSA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
2079 ; GFX10NSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>)
2080 ; GFX10NSA-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.cube), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (dereferenceable store (<4 x s32>), addrspace 8)
2081 ; GFX10NSA-NEXT: S_ENDPGM 0
2083 %s = extractelement <2 x i16> %coords_lo, i32 0
2084 %t = extractelement <2 x i16> %coords_lo, i32 1
2085 %slice = extractelement <2 x i16> %coords_hi, i32 0
2086 %mip = extractelement <2 x i16> %coords_hi, i32 1
2087 call void @llvm.amdgcn.image.store.mip.cube.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %t, i16 %slice, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
2091 define amdgpu_ps void @store_mip_1darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
2092 ; GFX9-LABEL: name: store_mip_1darray
2093 ; GFX9: bb.1.main_body:
2094 ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
2096 ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
2097 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
2098 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
2099 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
2100 ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
2101 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
2102 ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
2103 ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
2104 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
2105 ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
2106 ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
2107 ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
2108 ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
2109 ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
2110 ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
2111 ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5
2112 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
2113 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
2114 ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
2115 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
2116 ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
2117 ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
2118 ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
2119 ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
2120 ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>)
2121 ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
2122 ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32)
2123 ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
2124 ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
2125 ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
2126 ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>)
2127 ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.1darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>), addrspace 8)
2128 ; GFX9-NEXT: S_ENDPGM 0
2130 ; GFX10NSA-LABEL: name: store_mip_1darray
2131 ; GFX10NSA: bb.1.main_body:
2132 ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
2133 ; GFX10NSA-NEXT: {{ $}}
2134 ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
2135 ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
2136 ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
2137 ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
2138 ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
2139 ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
2140 ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
2141 ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
2142 ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
2143 ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
2144 ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
2145 ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
2146 ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
2147 ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
2148 ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
2149 ; GFX10NSA-NEXT: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5
2150 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
2151 ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
2152 ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
2153 ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
2154 ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
2155 ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
2156 ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
2157 ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
2158 ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>)
2159 ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
2160 ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32)
2161 ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
2162 ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
2163 ; GFX10NSA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
2164 ; GFX10NSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>)
2165 ; GFX10NSA-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.1darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (dereferenceable store (<4 x s32>), addrspace 8)
2166 ; GFX10NSA-NEXT: S_ENDPGM 0
2168 %s = extractelement <2 x i16> %coords_lo, i32 0
2169 %slice = extractelement <2 x i16> %coords_lo, i32 1
2170 %mip = extractelement <2 x i16> %coords_hi, i32 0
2171 call void @llvm.amdgcn.image.store.mip.1darray.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %slice, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
2175 define amdgpu_ps void @store_mip_2darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
2176 ; GFX9-LABEL: name: store_mip_2darray
2177 ; GFX9: bb.1.main_body:
2178 ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
2180 ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
2181 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
2182 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
2183 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
2184 ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
2185 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
2186 ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
2187 ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
2188 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
2189 ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
2190 ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
2191 ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
2192 ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
2193 ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
2194 ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
2195 ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5
2196 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
2197 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
2198 ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
2199 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
2200 ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
2201 ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
2202 ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
2203 ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
2204 ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>)
2205 ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
2206 ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32)
2207 ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>)
2208 ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32)
2209 ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32)
2210 ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
2211 ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
2212 ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>)
2213 ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.2darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>), addrspace 8)
2214 ; GFX9-NEXT: S_ENDPGM 0
2216 ; GFX10NSA-LABEL: name: store_mip_2darray
2217 ; GFX10NSA: bb.1.main_body:
2218 ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
2219 ; GFX10NSA-NEXT: {{ $}}
2220 ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
2221 ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
2222 ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
2223 ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
2224 ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
2225 ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
2226 ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
2227 ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
2228 ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
2229 ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
2230 ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
2231 ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
2232 ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
2233 ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
2234 ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
2235 ; GFX10NSA-NEXT: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5
2236 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
2237 ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
2238 ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
2239 ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
2240 ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
2241 ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
2242 ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
2243 ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
2244 ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>)
2245 ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
2246 ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32)
2247 ; GFX10NSA-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>)
2248 ; GFX10NSA-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32)
2249 ; GFX10NSA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32)
2250 ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
2251 ; GFX10NSA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
2252 ; GFX10NSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>)
2253 ; GFX10NSA-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.2darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (dereferenceable store (<4 x s32>), addrspace 8)
2254 ; GFX10NSA-NEXT: S_ENDPGM 0
2256 %s = extractelement <2 x i16> %coords_lo, i32 0
2257 %t = extractelement <2 x i16> %coords_lo, i32 1
2258 %slice = extractelement <2 x i16> %coords_hi, i32 0
2259 %mip = extractelement <2 x i16> %coords_hi, i32 1
2260 call void @llvm.amdgcn.image.store.mip.2darray.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %t, i16 %slice, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
2264 define amdgpu_ps <4 x float> @getresinfo_1d(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
2265 ; GFX9-LABEL: name: getresinfo_1d
2266 ; GFX9: bb.1.main_body:
2267 ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
2269 ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
2270 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
2271 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
2272 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
2273 ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
2274 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
2275 ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
2276 ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
2277 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
2278 ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
2279 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
2280 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
2281 ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
2282 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
2283 ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
2284 ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
2285 ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.1d), 15, [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3
2286 ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
2287 ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32)
2288 ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32)
2289 ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32)
2290 ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32)
2291 ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
2293 ; GFX10NSA-LABEL: name: getresinfo_1d
2294 ; GFX10NSA: bb.1.main_body:
2295 ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
2296 ; GFX10NSA-NEXT: {{ $}}
2297 ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
2298 ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
2299 ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
2300 ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
2301 ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
2302 ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
2303 ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
2304 ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
2305 ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
2306 ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
2307 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
2308 ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
2309 ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
2310 ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
2311 ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
2312 ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
2313 ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.1d), 15, [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1
2314 ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
2315 ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32)
2316 ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32)
2317 ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32)
2318 ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32)
2319 ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
2321 %mip = extractelement <2 x i16> %coords, i32 0
2322 %v = call <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i16(i32 15, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
2326 define amdgpu_ps <4 x float> @getresinfo_2d(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
2327 ; GFX9-LABEL: name: getresinfo_2d
2328 ; GFX9: bb.1.main_body:
2329 ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
2331 ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
2332 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
2333 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
2334 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
2335 ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
2336 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
2337 ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
2338 ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
2339 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
2340 ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
2341 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
2342 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
2343 ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
2344 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
2345 ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
2346 ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
2347 ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.2d), 15, [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3
2348 ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
2349 ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32)
2350 ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32)
2351 ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32)
2352 ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32)
2353 ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
2355 ; GFX10NSA-LABEL: name: getresinfo_2d
2356 ; GFX10NSA: bb.1.main_body:
2357 ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
2358 ; GFX10NSA-NEXT: {{ $}}
2359 ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
2360 ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
2361 ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
2362 ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
2363 ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
2364 ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
2365 ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
2366 ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
2367 ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
2368 ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
2369 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
2370 ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
2371 ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
2372 ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
2373 ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
2374 ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
2375 ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.2d), 15, [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1
2376 ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
2377 ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32)
2378 ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32)
2379 ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32)
2380 ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32)
2381 ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
2383 %mip = extractelement <2 x i16> %coords, i32 0
2384 %v = call <4 x float> @llvm.amdgcn.image.getresinfo.2d.v4f32.i16(i32 15, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
2388 define amdgpu_ps <4 x float> @getresinfo_3d(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
2389 ; GFX9-LABEL: name: getresinfo_3d
2390 ; GFX9: bb.1.main_body:
2391 ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
2393 ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
2394 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
2395 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
2396 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
2397 ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
2398 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
2399 ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
2400 ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
2401 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
2402 ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
2403 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
2404 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
2405 ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
2406 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
2407 ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
2408 ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
2409 ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.3d), 15, [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3
2410 ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
2411 ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32)
2412 ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32)
2413 ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32)
2414 ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32)
2415 ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
2417 ; GFX10NSA-LABEL: name: getresinfo_3d
2418 ; GFX10NSA: bb.1.main_body:
2419 ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
2420 ; GFX10NSA-NEXT: {{ $}}
2421 ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
2422 ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
2423 ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
2424 ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
2425 ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
2426 ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
2427 ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
2428 ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
2429 ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
2430 ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
2431 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
2432 ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
2433 ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
2434 ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
2435 ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
2436 ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
2437 ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.3d), 15, [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1
2438 ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
2439 ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32)
2440 ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32)
2441 ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32)
2442 ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32)
2443 ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
2445 %mip = extractelement <2 x i16> %coords, i32 0
2446 %v = call <4 x float> @llvm.amdgcn.image.getresinfo.3d.v4f32.i16(i32 15, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
2450 define amdgpu_ps <4 x float> @getresinfo_cube(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
2451 ; GFX9-LABEL: name: getresinfo_cube
2452 ; GFX9: bb.1.main_body:
2453 ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
2455 ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
2456 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
2457 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
2458 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
2459 ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
2460 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
2461 ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
2462 ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
2463 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
2464 ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
2465 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
2466 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
2467 ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
2468 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
2469 ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
2470 ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
2471 ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.cube), 15, [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3
2472 ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
2473 ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32)
2474 ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32)
2475 ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32)
2476 ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32)
2477 ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
2479 ; GFX10NSA-LABEL: name: getresinfo_cube
2480 ; GFX10NSA: bb.1.main_body:
2481 ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
2482 ; GFX10NSA-NEXT: {{ $}}
2483 ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
2484 ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
2485 ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
2486 ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
2487 ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
2488 ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
2489 ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
2490 ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
2491 ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
2492 ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
2493 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
2494 ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
2495 ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
2496 ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
2497 ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
2498 ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
2499 ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.cube), 15, [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1
2500 ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
2501 ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32)
2502 ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32)
2503 ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32)
2504 ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32)
2505 ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
2507 %mip = extractelement <2 x i16> %coords, i32 0
2508 %v = call <4 x float> @llvm.amdgcn.image.getresinfo.cube.v4f32.i16(i32 15, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
2512 define amdgpu_ps <4 x float> @getresinfo_1darray(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
2513 ; GFX9-LABEL: name: getresinfo_1darray
2514 ; GFX9: bb.1.main_body:
2515 ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
2517 ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
2518 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
2519 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
2520 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
2521 ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
2522 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
2523 ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
2524 ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
2525 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
2526 ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
2527 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
2528 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
2529 ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
2530 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
2531 ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
2532 ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
2533 ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.1darray), 15, [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3
2534 ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
2535 ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32)
2536 ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32)
2537 ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32)
2538 ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32)
2539 ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
2541 ; GFX10NSA-LABEL: name: getresinfo_1darray
2542 ; GFX10NSA: bb.1.main_body:
2543 ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
2544 ; GFX10NSA-NEXT: {{ $}}
2545 ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
2546 ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
2547 ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
2548 ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
2549 ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
2550 ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
2551 ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
2552 ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
2553 ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
2554 ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
2555 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
2556 ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
2557 ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
2558 ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
2559 ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
2560 ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
2561 ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.1darray), 15, [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1
2562 ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
2563 ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32)
2564 ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32)
2565 ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32)
2566 ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32)
2567 ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
2569 %mip = extractelement <2 x i16> %coords, i32 0
2570 %v = call <4 x float> @llvm.amdgcn.image.getresinfo.1darray.v4f32.i16(i32 15, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
2574 define amdgpu_ps <4 x float> @getresinfo_2darray(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
2575 ; GFX9-LABEL: name: getresinfo_2darray
2576 ; GFX9: bb.1.main_body:
2577 ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
2579 ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
2580 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
2581 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
2582 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
2583 ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
2584 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
2585 ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
2586 ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
2587 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
2588 ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
2589 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
2590 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
2591 ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
2592 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
2593 ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
2594 ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
2595 ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.2darray), 15, [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3
2596 ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
2597 ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32)
2598 ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32)
2599 ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32)
2600 ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32)
2601 ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
2603 ; GFX10NSA-LABEL: name: getresinfo_2darray
2604 ; GFX10NSA: bb.1.main_body:
2605 ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
2606 ; GFX10NSA-NEXT: {{ $}}
2607 ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
2608 ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
2609 ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
2610 ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
2611 ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
2612 ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
2613 ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
2614 ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
2615 ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
2616 ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
2617 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
2618 ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
2619 ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
2620 ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
2621 ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
2622 ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
2623 ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.2darray), 15, [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1
2624 ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
2625 ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32)
2626 ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32)
2627 ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32)
2628 ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32)
2629 ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
2631 %mip = extractelement <2 x i16> %coords, i32 0
2632 %v = call <4 x float> @llvm.amdgcn.image.getresinfo.2darray.v4f32.i16(i32 15, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
2636 define amdgpu_ps <4 x float> @getresinfo_2dmsaa(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
2637 ; GFX9-LABEL: name: getresinfo_2dmsaa
2638 ; GFX9: bb.1.main_body:
2639 ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
2641 ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
2642 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
2643 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
2644 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
2645 ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
2646 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
2647 ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
2648 ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
2649 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
2650 ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
2651 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
2652 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
2653 ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
2654 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
2655 ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
2656 ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
2657 ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.2dmsaa), 15, [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3
2658 ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
2659 ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32)
2660 ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32)
2661 ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32)
2662 ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32)
2663 ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
2665 ; GFX10NSA-LABEL: name: getresinfo_2dmsaa
2666 ; GFX10NSA: bb.1.main_body:
2667 ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
2668 ; GFX10NSA-NEXT: {{ $}}
2669 ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
2670 ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
2671 ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
2672 ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
2673 ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
2674 ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
2675 ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
2676 ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
2677 ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
2678 ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
2679 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
2680 ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
2681 ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
2682 ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
2683 ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
2684 ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
2685 ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.2dmsaa), 15, [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1
2686 ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
2687 ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32)
2688 ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32)
2689 ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32)
2690 ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32)
2691 ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
2693 %mip = extractelement <2 x i16> %coords, i32 0
2694 %v = call <4 x float> @llvm.amdgcn.image.getresinfo.2dmsaa.v4f32.i16(i32 15, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
2698 define amdgpu_ps <4 x float> @getresinfo_2darraymsaa(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
2699 ; GFX9-LABEL: name: getresinfo_2darraymsaa
2700 ; GFX9: bb.1.main_body:
2701 ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
2703 ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
2704 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
2705 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
2706 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
2707 ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
2708 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
2709 ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
2710 ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
2711 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
2712 ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
2713 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
2714 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
2715 ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
2716 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
2717 ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
2718 ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
2719 ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.2darraymsaa), 15, [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3
2720 ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
2721 ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32)
2722 ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32)
2723 ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32)
2724 ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32)
2725 ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
2727 ; GFX10NSA-LABEL: name: getresinfo_2darraymsaa
2728 ; GFX10NSA: bb.1.main_body:
2729 ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
2730 ; GFX10NSA-NEXT: {{ $}}
2731 ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
2732 ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
2733 ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
2734 ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
2735 ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
2736 ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
2737 ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
2738 ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
2739 ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
2740 ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
2741 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
2742 ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
2743 ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
2744 ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
2745 ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
2746 ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
2747 ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.2darraymsaa), 15, [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1
2748 ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
2749 ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32)
2750 ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32)
2751 ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32)
2752 ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32)
2753 ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
2755 %mip = extractelement <2 x i16> %coords, i32 0
2756 %v = call <4 x float> @llvm.amdgcn.image.getresinfo.2darraymsaa.v4f32.i16(i32 15, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
2760 define amdgpu_ps float @load_1d_V1(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
2761 ; GFX9-LABEL: name: load_1d_V1
2762 ; GFX9: bb.1.main_body:
2763 ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
2765 ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
2766 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
2767 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
2768 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
2769 ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
2770 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
2771 ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
2772 ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
2773 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
2774 ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
2775 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
2776 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
2777 ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
2778 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
2779 ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
2780 ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
2781 ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 8, [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (s32), addrspace 8)
2782 ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
2783 ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
2785 ; GFX10NSA-LABEL: name: load_1d_V1
2786 ; GFX10NSA: bb.1.main_body:
2787 ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
2788 ; GFX10NSA-NEXT: {{ $}}
2789 ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
2790 ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
2791 ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
2792 ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
2793 ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
2794 ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
2795 ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
2796 ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
2797 ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
2798 ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
2799 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
2800 ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
2801 ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
2802 ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
2803 ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
2804 ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
2805 ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 8, [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (dereferenceable load (s32), addrspace 8)
2806 ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
2807 ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
2809 %s = extractelement <2 x i16> %coords, i32 0
2810 %v = call float @llvm.amdgcn.image.load.1d.f32.i16(i32 8, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
2814 define amdgpu_ps <2 x float> @load_1d_V2(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
2815 ; GFX9-LABEL: name: load_1d_V2
2816 ; GFX9: bb.1.main_body:
2817 ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
2819 ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
2820 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
2821 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
2822 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
2823 ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
2824 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
2825 ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
2826 ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
2827 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
2828 ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
2829 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
2830 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
2831 ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
2832 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
2833 ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
2834 ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
2835 ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 9, [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<2 x s32>), addrspace 8)
2836 ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
2837 ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32)
2838 ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32)
2839 ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
2841 ; GFX10NSA-LABEL: name: load_1d_V2
2842 ; GFX10NSA: bb.1.main_body:
2843 ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
2844 ; GFX10NSA-NEXT: {{ $}}
2845 ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
2846 ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
2847 ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
2848 ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
2849 ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
2850 ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
2851 ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
2852 ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
2853 ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
2854 ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
2855 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
2856 ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
2857 ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
2858 ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
2859 ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
2860 ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
2861 ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 9, [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (dereferenceable load (<2 x s32>), addrspace 8)
2862 ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
2863 ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32)
2864 ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32)
2865 ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
2867 %s = extractelement <2 x i16> %coords, i32 0
2868 %v = call <2 x float> @llvm.amdgcn.image.load.1d.v2f32.i16(i32 9, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
2872 define amdgpu_ps void @store_1d_V1(<8 x i32> inreg %rsrc, float %vdata, <2 x i16> %coords) {
2873 ; GFX9-LABEL: name: store_1d_V1
2874 ; GFX9: bb.1.main_body:
2875 ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
2877 ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
2878 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
2879 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
2880 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
2881 ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
2882 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
2883 ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
2884 ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
2885 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
2886 ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
2887 ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
2888 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
2889 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>)
2890 ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
2891 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
2892 ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
2893 ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
2894 ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[COPY8]](s32), 2, [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (s32), addrspace 8)
2895 ; GFX9-NEXT: S_ENDPGM 0
2897 ; GFX10NSA-LABEL: name: store_1d_V1
2898 ; GFX10NSA: bb.1.main_body:
2899 ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
2900 ; GFX10NSA-NEXT: {{ $}}
2901 ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
2902 ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
2903 ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
2904 ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
2905 ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
2906 ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
2907 ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
2908 ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
2909 ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
2910 ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
2911 ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
2912 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
2913 ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>)
2914 ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
2915 ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
2916 ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
2917 ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
2918 ; GFX10NSA-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[COPY8]](s32), 2, [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (dereferenceable store (s32), addrspace 8)
2919 ; GFX10NSA-NEXT: S_ENDPGM 0
2921 %s = extractelement <2 x i16> %coords, i32 0
2922 call void @llvm.amdgcn.image.store.1d.f32.i16(float %vdata, i32 2, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
2926 define amdgpu_ps void @store_1d_V2(<8 x i32> inreg %rsrc, <2 x float> %vdata, <2 x i16> %coords) {
2927 ; GFX9-LABEL: name: store_1d_V2
2928 ; GFX9: bb.1.main_body:
2929 ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2
2931 ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
2932 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
2933 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
2934 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
2935 ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
2936 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
2937 ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
2938 ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
2939 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
2940 ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
2941 ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
2942 ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
2943 ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
2944 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
2945 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY10]](<2 x s16>)
2946 ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
2947 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
2948 ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
2949 ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
2950 ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<2 x s32>), 12, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<2 x s32>), addrspace 8)
2951 ; GFX9-NEXT: S_ENDPGM 0
2953 ; GFX10NSA-LABEL: name: store_1d_V2
2954 ; GFX10NSA: bb.1.main_body:
2955 ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2
2956 ; GFX10NSA-NEXT: {{ $}}
2957 ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
2958 ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
2959 ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
2960 ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
2961 ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
2962 ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
2963 ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
2964 ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
2965 ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
2966 ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
2967 ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
2968 ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
2969 ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
2970 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
2971 ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY10]](<2 x s16>)
2972 ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
2973 ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
2974 ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
2975 ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
2976 ; GFX10NSA-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<2 x s32>), 12, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 1 :: (dereferenceable store (<2 x s32>), addrspace 8)
2977 ; GFX10NSA-NEXT: S_ENDPGM 0
2979 %s = extractelement <2 x i16> %coords, i32 0
2980 call void @llvm.amdgcn.image.store.1d.v2f32.i16(<2 x float> %vdata, i32 12, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
2984 define amdgpu_ps <4 x float> @load_1d_glc(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
2985 ; GFX9-LABEL: name: load_1d_glc
2986 ; GFX9: bb.1.main_body:
2987 ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
2989 ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
2990 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
2991 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
2992 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
2993 ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
2994 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
2995 ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
2996 ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
2997 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
2998 ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
2999 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
3000 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
3001 ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
3002 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
3003 ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
3004 ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
3005 ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 1, 3 :: (dereferenceable load (<4 x s32>), addrspace 8)
3006 ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
3007 ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32)
3008 ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32)
3009 ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32)
3010 ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32)
3011 ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
3013 ; GFX10NSA-LABEL: name: load_1d_glc
3014 ; GFX10NSA: bb.1.main_body:
3015 ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
3016 ; GFX10NSA-NEXT: {{ $}}
3017 ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
3018 ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
3019 ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
3020 ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
3021 ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
3022 ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
3023 ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
3024 ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
3025 ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
3026 ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
3027 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
3028 ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
3029 ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
3030 ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
3031 ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
3032 ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
3033 ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 1, 1 :: (dereferenceable load (<4 x s32>), addrspace 8)
3034 ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
3035 ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32)
3036 ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32)
3037 ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32)
3038 ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32)
3039 ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
3041 %s = extractelement <2 x i16> %coords, i32 0
3042 %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32 15, i16 %s, <8 x i32> %rsrc, i32 0, i32 1)
3046 define amdgpu_ps <4 x float> @load_1d_slc(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
3047 ; GFX9-LABEL: name: load_1d_slc
3048 ; GFX9: bb.1.main_body:
3049 ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
3051 ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
3052 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
3053 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
3054 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
3055 ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
3056 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
3057 ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
3058 ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
3059 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
3060 ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
3061 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
3062 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
3063 ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
3064 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
3065 ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
3066 ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
3067 ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 2, 3 :: (dereferenceable load (<4 x s32>), addrspace 8)
3068 ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
3069 ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32)
3070 ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32)
3071 ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32)
3072 ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32)
3073 ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
3075 ; GFX10NSA-LABEL: name: load_1d_slc
3076 ; GFX10NSA: bb.1.main_body:
3077 ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
3078 ; GFX10NSA-NEXT: {{ $}}
3079 ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
3080 ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
3081 ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
3082 ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
3083 ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
3084 ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
3085 ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
3086 ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
3087 ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
3088 ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
3089 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
3090 ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
3091 ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
3092 ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
3093 ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
3094 ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
3095 ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 2, 1 :: (dereferenceable load (<4 x s32>), addrspace 8)
3096 ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
3097 ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32)
3098 ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32)
3099 ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32)
3100 ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32)
3101 ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
3103 %s = extractelement <2 x i16> %coords, i32 0
3104 %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32 15, i16 %s, <8 x i32> %rsrc, i32 0, i32 2)
3108 define amdgpu_ps <4 x float> @load_1d_glc_slc(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
3109 ; GFX9-LABEL: name: load_1d_glc_slc
3110 ; GFX9: bb.1.main_body:
3111 ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
3113 ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
3114 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
3115 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
3116 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
3117 ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
3118 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
3119 ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
3120 ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
3121 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
3122 ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
3123 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
3124 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
3125 ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
3126 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
3127 ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
3128 ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
3129 ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 3, 3 :: (dereferenceable load (<4 x s32>), addrspace 8)
3130 ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
3131 ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32)
3132 ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32)
3133 ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32)
3134 ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32)
3135 ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
3137 ; GFX10NSA-LABEL: name: load_1d_glc_slc
3138 ; GFX10NSA: bb.1.main_body:
3139 ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
3140 ; GFX10NSA-NEXT: {{ $}}
3141 ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
3142 ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
3143 ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
3144 ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
3145 ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
3146 ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
3147 ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
3148 ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
3149 ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
3150 ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
3151 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
3152 ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
3153 ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
3154 ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
3155 ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
3156 ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
3157 ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 3, 1 :: (dereferenceable load (<4 x s32>), addrspace 8)
3158 ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
3159 ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32)
3160 ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32)
3161 ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32)
3162 ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32)
3163 ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
3165 %s = extractelement <2 x i16> %coords, i32 0
3166 %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32 15, i16 %s, <8 x i32> %rsrc, i32 0, i32 3)
3170 define amdgpu_ps void @store_1d_glc(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords) {
3171 ; GFX9-LABEL: name: store_1d_glc
3172 ; GFX9: bb.1.main_body:
3173 ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
3175 ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
3176 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
3177 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
3178 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
3179 ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
3180 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
3181 ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
3182 ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
3183 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
3184 ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
3185 ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
3186 ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
3187 ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
3188 ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
3189 ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
3190 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
3191 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
3192 ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
3193 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
3194 ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
3195 ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
3196 ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 1, 3 :: (dereferenceable store (<4 x s32>), addrspace 8)
3197 ; GFX9-NEXT: S_ENDPGM 0
3199 ; GFX10NSA-LABEL: name: store_1d_glc
3200 ; GFX10NSA: bb.1.main_body:
3201 ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
3202 ; GFX10NSA-NEXT: {{ $}}
3203 ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
3204 ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
3205 ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
3206 ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
3207 ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
3208 ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
3209 ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
3210 ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
3211 ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
3212 ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
3213 ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
3214 ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
3215 ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
3216 ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
3217 ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
3218 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
3219 ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
3220 ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
3221 ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
3222 ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
3223 ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
3224 ; GFX10NSA-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 1, 1 :: (dereferenceable store (<4 x s32>), addrspace 8)
3225 ; GFX10NSA-NEXT: S_ENDPGM 0
3227 %s = extractelement <2 x i16> %coords, i32 0
3228 call void @llvm.amdgcn.image.store.1d.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, <8 x i32> %rsrc, i32 0, i32 1)
3232 define amdgpu_ps void @store_1d_slc(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords) {
3233 ; GFX9-LABEL: name: store_1d_slc
3234 ; GFX9: bb.1.main_body:
3235 ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
3237 ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
3238 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
3239 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
3240 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
3241 ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
3242 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
3243 ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
3244 ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
3245 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
3246 ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
3247 ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
3248 ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
3249 ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
3250 ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
3251 ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
3252 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
3253 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
3254 ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
3255 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
3256 ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
3257 ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
3258 ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 2, 3 :: (dereferenceable store (<4 x s32>), addrspace 8)
3259 ; GFX9-NEXT: S_ENDPGM 0
3261 ; GFX10NSA-LABEL: name: store_1d_slc
3262 ; GFX10NSA: bb.1.main_body:
3263 ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
3264 ; GFX10NSA-NEXT: {{ $}}
3265 ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
3266 ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
3267 ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
3268 ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
3269 ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
3270 ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
3271 ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
3272 ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
3273 ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
3274 ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
3275 ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
3276 ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
3277 ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
3278 ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
3279 ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
3280 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
3281 ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
3282 ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
3283 ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
3284 ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
3285 ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
3286 ; GFX10NSA-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 2, 1 :: (dereferenceable store (<4 x s32>), addrspace 8)
3287 ; GFX10NSA-NEXT: S_ENDPGM 0
3289 %s = extractelement <2 x i16> %coords, i32 0
3290 call void @llvm.amdgcn.image.store.1d.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, <8 x i32> %rsrc, i32 0, i32 2)
3294 define amdgpu_ps void @store_1d_glc_slc(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords) {
3295 ; GFX9-LABEL: name: store_1d_glc_slc
3296 ; GFX9: bb.1.main_body:
3297 ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
3299 ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
3300 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
3301 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
3302 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
3303 ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
3304 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
3305 ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
3306 ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
3307 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
3308 ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
3309 ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
3310 ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
3311 ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
3312 ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
3313 ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
3314 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
3315 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
3316 ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
3317 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
3318 ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
3319 ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
3320 ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 3, 3 :: (dereferenceable store (<4 x s32>), addrspace 8)
3321 ; GFX9-NEXT: S_ENDPGM 0
3323 ; GFX10NSA-LABEL: name: store_1d_glc_slc
3324 ; GFX10NSA: bb.1.main_body:
3325 ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
3326 ; GFX10NSA-NEXT: {{ $}}
3327 ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
3328 ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
3329 ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
3330 ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
3331 ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
3332 ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
3333 ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
3334 ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
3335 ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
3336 ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
3337 ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
3338 ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
3339 ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
3340 ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
3341 ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
3342 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
3343 ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
3344 ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
3345 ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
3346 ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
3347 ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
3348 ; GFX10NSA-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 0, 3, 1 :: (dereferenceable store (<4 x s32>), addrspace 8)
3349 ; GFX10NSA-NEXT: S_ENDPGM 0
3351 %s = extractelement <2 x i16> %coords, i32 0
3352 call void @llvm.amdgcn.image.store.1d.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, <8 x i32> %rsrc, i32 0, i32 3)
3356 define amdgpu_ps <4 x float> @getresinfo_dmask0(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords) {
3357 ; GFX9-LABEL: name: getresinfo_dmask0
3358 ; GFX9: bb.1.main_body:
3359 ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
3361 ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
3362 ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<4 x s32>)
3363 ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32)
3364 ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32)
3365 ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32)
3366 ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32)
3367 ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
3369 ; GFX10NSA-LABEL: name: getresinfo_dmask0
3370 ; GFX10NSA: bb.1.main_body:
3371 ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
3372 ; GFX10NSA-NEXT: {{ $}}
3373 ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
3374 ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<4 x s32>)
3375 ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32)
3376 ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32)
3377 ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32)
3378 ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32)
3379 ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
3381 %mip = extractelement <2 x i16> %coords, i32 0
3382 %r = call <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i16(i32 0, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
3386 define amdgpu_ps <4 x float> @load_1d_tfe(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
3387 ; GFX9-LABEL: name: load_1d_tfe
3388 ; GFX9: bb.1.main_body:
3389 ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
3391 ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
3392 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
3393 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
3394 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
3395 ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
3396 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
3397 ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
3398 ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
3399 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
3400 ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
3401 ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
3402 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
3403 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
3404 ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
3405 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
3406 ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
3407 ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF1]](s16)
3408 ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 1, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8)
3409 ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>)
3410 ; GFX9-NEXT: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
3411 ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32)
3412 ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32)
3413 ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32)
3414 ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32)
3415 ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
3417 ; GFX10NSA-LABEL: name: load_1d_tfe
3418 ; GFX10NSA: bb.1.main_body:
3419 ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
3420 ; GFX10NSA-NEXT: {{ $}}
3421 ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
3422 ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
3423 ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
3424 ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
3425 ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
3426 ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
3427 ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
3428 ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
3429 ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
3430 ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
3431 ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
3432 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
3433 ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
3434 ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
3435 ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
3436 ; GFX10NSA-NEXT: [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
3437 ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF1]](s16)
3438 ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), 1, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8)
3439 ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>)
3440 ; GFX10NSA-NEXT: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
3441 ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32)
3442 ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32)
3443 ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32)
3444 ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32)
3445 ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
3447 %s = extractelement <2 x i16> %coords, i32 0
3448 %v = call { <4 x float>, i32 } @llvm.amdgcn.image.load.1d.sl_v4f32i32s.i16(i32 15, i16 %s, <8 x i32> %rsrc, i32 1, i32 0)
3449 %data = extractvalue { <4 x float>, i32 } %v, 0
3450 %tfe = extractvalue { <4 x float>, i32 } %v, 1
3451 store i32 %tfe, ptr addrspace(1) undef
3452 ret <4 x float> %data
3455 define amdgpu_ps <4 x float> @load_2d_tfe(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
3456 ; GFX9-LABEL: name: load_2d_tfe
3457 ; GFX9: bb.1.main_body:
3458 ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
3460 ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
3461 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
3462 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
3463 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
3464 ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
3465 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
3466 ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
3467 ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
3468 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
3469 ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
3470 ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
3471 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
3472 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
3473 ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
3474 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
3475 ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
3476 ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
3477 ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
3478 ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
3479 ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
3480 ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8)
3481 ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>)
3482 ; GFX9-NEXT: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
3483 ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32)
3484 ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32)
3485 ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32)
3486 ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32)
3487 ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
3489 ; GFX10NSA-LABEL: name: load_2d_tfe
3490 ; GFX10NSA: bb.1.main_body:
3491 ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
3492 ; GFX10NSA-NEXT: {{ $}}
3493 ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
3494 ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
3495 ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
3496 ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
3497 ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
3498 ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
3499 ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
3500 ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
3501 ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
3502 ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
3503 ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
3504 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
3505 ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
3506 ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
3507 ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
3508 ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
3509 ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
3510 ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
3511 ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
3512 ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
3513 ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8)
3514 ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>)
3515 ; GFX10NSA-NEXT: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
3516 ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32)
3517 ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32)
3518 ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32)
3519 ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32)
3520 ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
3522 %s = extractelement <2 x i16> %coords, i32 0
3523 %t = extractelement <2 x i16> %coords, i32 1
3524 %v = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f32i32s.i16(i32 15, i16 %s, i16 %t, <8 x i32> %rsrc, i32 1, i32 0)
3525 %data = extractvalue { <4 x float>, i32 } %v, 0
3526 %tfe = extractvalue { <4 x float>, i32 } %v, 1
3527 store i32 %tfe, ptr addrspace(1) undef
3528 ret <4 x float> %data
3531 define amdgpu_ps <4 x float> @load_3d_tfe(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
3532 ; GFX9-LABEL: name: load_3d_tfe
3533 ; GFX9: bb.1.main_body:
3534 ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
3536 ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
3537 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
3538 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
3539 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
3540 ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
3541 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
3542 ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
3543 ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
3544 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
3545 ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
3546 ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
3547 ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
3548 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
3549 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
3550 ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
3551 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
3552 ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
3553 ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
3554 ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
3555 ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
3556 ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>)
3557 ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
3558 ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32)
3559 ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
3560 ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
3561 ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF1]](s16)
3562 ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
3563 ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.3d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8)
3564 ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>)
3565 ; GFX9-NEXT: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
3566 ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32)
3567 ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32)
3568 ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32)
3569 ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32)
3570 ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
3572 ; GFX10NSA-LABEL: name: load_3d_tfe
3573 ; GFX10NSA: bb.1.main_body:
3574 ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
3575 ; GFX10NSA-NEXT: {{ $}}
3576 ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
3577 ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
3578 ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
3579 ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
3580 ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
3581 ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
3582 ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
3583 ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
3584 ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
3585 ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
3586 ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
3587 ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
3588 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
3589 ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
3590 ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
3591 ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
3592 ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
3593 ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
3594 ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
3595 ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
3596 ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>)
3597 ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
3598 ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32)
3599 ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
3600 ; GFX10NSA-NEXT: [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
3601 ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF1]](s16)
3602 ; GFX10NSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
3603 ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.3d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8)
3604 ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>)
3605 ; GFX10NSA-NEXT: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
3606 ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32)
3607 ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32)
3608 ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32)
3609 ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32)
3610 ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
3612 %s = extractelement <2 x i16> %coords_lo, i32 0
3613 %t = extractelement <2 x i16> %coords_lo, i32 1
3614 %r = extractelement <2 x i16> %coords_hi, i32 0
3615 %v = call { <4 x float>, i32 } @llvm.amdgcn.image.load.3d.sl_v4f32i32s.i16(i32 15, i16 %s, i16 %t, i16 %r, <8 x i32> %rsrc, i32 1, i32 0)
3616 %data = extractvalue { <4 x float>, i32 } %v, 0
3617 %tfe = extractvalue { <4 x float>, i32 } %v, 1
3618 store i32 %tfe, ptr addrspace(1) undef
3619 ret <4 x float> %data
3622 define amdgpu_ps <4 x float> @load_2darraymsaa_tfe(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
3623 ; GFX9-LABEL: name: load_2darraymsaa_tfe
3624 ; GFX9: bb.1.main_body:
3625 ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
3627 ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
3628 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
3629 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
3630 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
3631 ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
3632 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
3633 ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
3634 ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
3635 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
3636 ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
3637 ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
3638 ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
3639 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
3640 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
3641 ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
3642 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
3643 ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
3644 ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
3645 ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
3646 ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
3647 ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>)
3648 ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
3649 ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32)
3650 ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>)
3651 ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32)
3652 ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32)
3653 ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
3654 ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
3655 ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
3656 ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darraymsaa), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 8)
3657 ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>)
3658 ; GFX9-NEXT: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
3659 ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32)
3660 ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32)
3661 ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32)
3662 ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32)
3663 ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
3665 ; GFX10NSA-LABEL: name: load_2darraymsaa_tfe
3666 ; GFX10NSA: bb.1.main_body:
3667 ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
3668 ; GFX10NSA-NEXT: {{ $}}
3669 ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
3670 ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
3671 ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
3672 ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
3673 ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
3674 ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
3675 ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
3676 ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
3677 ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
3678 ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
3679 ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
3680 ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
3681 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
3682 ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
3683 ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
3684 ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
3685 ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
3686 ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
3687 ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
3688 ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
3689 ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>)
3690 ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
3691 ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32)
3692 ; GFX10NSA-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>)
3693 ; GFX10NSA-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32)
3694 ; GFX10NSA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32)
3695 ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
3696 ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
3697 ; GFX10NSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
3698 ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darraymsaa), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 8)
3699 ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>)
3700 ; GFX10NSA-NEXT: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
3701 ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32)
3702 ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32)
3703 ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32)
3704 ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32)
3705 ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
3707 %s = extractelement <2 x i16> %coords_lo, i32 0
3708 %t = extractelement <2 x i16> %coords_lo, i32 1
3709 %slice = extractelement <2 x i16> %coords_hi, i32 0
3710 %fragid = extractelement <2 x i16> %coords_hi, i32 1
3711 %v = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2darraymsaa.sl_v4f32i32s.i16(i32 15, i16 %s, i16 %t, i16 %slice, i16 %fragid, <8 x i32> %rsrc, i32 1, i32 0)
3712 %data = extractvalue { <4 x float>, i32 } %v, 0
3713 %tfe = extractvalue { <4 x float>, i32 } %v, 1
3714 store i32 %tfe, ptr addrspace(1) undef
3715 ret <4 x float> %data
3718 declare <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #1
3719 declare <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i16(i32 immarg, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1
3720 declare <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i16(i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1
3721 declare <4 x float> @llvm.amdgcn.image.load.cube.v4f32.i16(i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1
3722 declare <4 x float> @llvm.amdgcn.image.load.1darray.v4f32.i16(i32 immarg, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1
3723 declare <4 x float> @llvm.amdgcn.image.load.2darray.v4f32.i16(i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1
3724 declare <4 x float> @llvm.amdgcn.image.load.2dmsaa.v4f32.i16(i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1
3725 declare <4 x float> @llvm.amdgcn.image.load.2darraymsaa.v4f32.i16(i32 immarg, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1
3726 declare <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i16(i32 immarg, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1
3727 declare <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i16(i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1
3728 declare <4 x float> @llvm.amdgcn.image.load.mip.3d.v4f32.i16(i32 immarg, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1
3729 declare <4 x float> @llvm.amdgcn.image.load.mip.cube.v4f32.i16(i32 immarg, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1
3730 declare <4 x float> @llvm.amdgcn.image.load.mip.1darray.v4f32.i16(i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1
3731 declare <4 x float> @llvm.amdgcn.image.load.mip.2darray.v4f32.i16(i32 immarg, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1
3732 declare void @llvm.amdgcn.image.store.1d.v4f32.i16(<4 x float>, i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #2
3733 declare void @llvm.amdgcn.image.store.2d.v4f32.i16(<4 x float>, i32 immarg, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #2
3734 declare void @llvm.amdgcn.image.store.3d.v4f32.i16(<4 x float>, i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #2
3735 declare void @llvm.amdgcn.image.store.cube.v4f32.i16(<4 x float>, i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #2
3736 declare void @llvm.amdgcn.image.store.1darray.v4f32.i16(<4 x float>, i32 immarg, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #2
3737 declare void @llvm.amdgcn.image.store.2darray.v4f32.i16(<4 x float>, i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #2
3738 declare void @llvm.amdgcn.image.store.2dmsaa.v4f32.i16(<4 x float>, i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #2
3739 declare void @llvm.amdgcn.image.store.2darraymsaa.v4f32.i16(<4 x float>, i32 immarg, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #2
3740 declare void @llvm.amdgcn.image.store.mip.1d.v4f32.i16(<4 x float>, i32 immarg, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #2
3741 declare void @llvm.amdgcn.image.store.mip.2d.v4f32.i16(<4 x float>, i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #2
3742 declare void @llvm.amdgcn.image.store.mip.3d.v4f32.i16(<4 x float>, i32 immarg, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #2
3743 declare void @llvm.amdgcn.image.store.mip.cube.v4f32.i16(<4 x float>, i32 immarg, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #2
3744 declare void @llvm.amdgcn.image.store.mip.1darray.v4f32.i16(<4 x float>, i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #2
3745 declare void @llvm.amdgcn.image.store.mip.2darray.v4f32.i16(<4 x float>, i32 immarg, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #2
3746 declare <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #3
3747 declare <4 x float> @llvm.amdgcn.image.getresinfo.2d.v4f32.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #3
3748 declare <4 x float> @llvm.amdgcn.image.getresinfo.3d.v4f32.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #3
3749 declare <4 x float> @llvm.amdgcn.image.getresinfo.cube.v4f32.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #3
3750 declare <4 x float> @llvm.amdgcn.image.getresinfo.1darray.v4f32.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #3
3751 declare <4 x float> @llvm.amdgcn.image.getresinfo.2darray.v4f32.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #3
3752 declare <4 x float> @llvm.amdgcn.image.getresinfo.2dmsaa.v4f32.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #3
3753 declare <4 x float> @llvm.amdgcn.image.getresinfo.2darraymsaa.v4f32.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #3
3754 declare float @llvm.amdgcn.image.load.1d.f32.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #1
3755 declare float @llvm.amdgcn.image.load.2d.f32.i16(i32 immarg, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1
3756 declare <2 x float> @llvm.amdgcn.image.load.1d.v2f32.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #1
3757 declare void @llvm.amdgcn.image.store.1d.f32.i16(float, i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #2
3758 declare void @llvm.amdgcn.image.store.1d.v2f32.i16(<2 x float>, i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #2
3759 declare { <4 x float>, i32 } @llvm.amdgcn.image.load.1d.sl_v4f32i32s.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #1
3760 declare { <4 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f32i32s.i16(i32 immarg, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1
3761 declare { <4 x float>, i32 } @llvm.amdgcn.image.load.3d.sl_v4f32i32s.i16(i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1
3762 declare { <4 x float>, i32 } @llvm.amdgcn.image.load.2darraymsaa.sl_v4f32i32s.i16(i32 immarg, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1
3764 attributes #0 = { nounwind }
3765 attributes #1 = { nounwind readonly }
3766 attributes #2 = { nounwind writeonly }
3767 attributes #3 = { nounwind readnone }