1 ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga -stop-after=legalizer -o - %s | FileCheck -check-prefix=UNPACKED %s
3 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx810 -stop-after=legalizer -o - %s | FileCheck -check-prefix=PACKED %s
5 define amdgpu_ps half @image_load_f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
6 ; UNPACKED-LABEL: name: image_load_f16
7 ; UNPACKED: bb.1 (%ir-block.0):
8 ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
9 ; UNPACKED-NEXT: {{ $}}
10 ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
11 ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
12 ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
13 ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
14 ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
15 ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
16 ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
17 ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
18 ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
19 ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
20 ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
21 ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
22 ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(s16) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s16), addrspace 8)
23 ; UNPACKED-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](s16)
24 ; UNPACKED-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
25 ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
27 ; PACKED-LABEL: name: image_load_f16
28 ; PACKED: bb.1 (%ir-block.0):
29 ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
31 ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
32 ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
33 ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
34 ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
35 ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
36 ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
37 ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
38 ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
39 ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
40 ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
41 ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
42 ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
43 ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(s16) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s16), addrspace 8)
44 ; PACKED-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](s16)
45 ; PACKED-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
46 ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
47 %tex = call half @llvm.amdgcn.image.load.2d.f16.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
51 define amdgpu_ps <2 x half> @image_load_v2f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
52 ; UNPACKED-LABEL: name: image_load_v2f16
53 ; UNPACKED: bb.1 (%ir-block.0):
54 ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
55 ; UNPACKED-NEXT: {{ $}}
56 ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
57 ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
58 ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
59 ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
60 ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
61 ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
62 ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
63 ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
64 ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
65 ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
66 ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
67 ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
68 ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<2 x s16>), addrspace 8)
69 ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>)
70 ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
71 ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]]
72 ; UNPACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]]
73 ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
74 ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
75 ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
76 ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
77 ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
78 ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
80 ; PACKED-LABEL: name: image_load_v2f16
81 ; PACKED: bb.1 (%ir-block.0):
82 ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
84 ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
85 ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
86 ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
87 ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
88 ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
89 ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
90 ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
91 ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
92 ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
93 ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
94 ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
95 ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
96 ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<2 x s16>), addrspace 8)
97 ; PACKED-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s16>)
98 ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
99 %tex = call <2 x half> @llvm.amdgcn.image.load.2d.v2f16.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
103 define amdgpu_ps <3 x half> @image_load_v3f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
104 ; UNPACKED-LABEL: name: image_load_v3f16
105 ; UNPACKED: bb.1 (%ir-block.0):
106 ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
107 ; UNPACKED-NEXT: {{ $}}
108 ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
109 ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
110 ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
111 ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
112 ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
113 ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
114 ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
115 ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
116 ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
117 ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
118 ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
119 ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
120 ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<3 x s16>), align 8, addrspace 8)
121 ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<3 x s32>)
122 ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
123 ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]]
124 ; UNPACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]]
125 ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
126 ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
127 ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
128 ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
129 ; UNPACKED-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]]
130 ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
131 ; UNPACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32)
132 ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
133 ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
134 ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
135 ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
136 ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
138 ; PACKED-LABEL: name: image_load_v3f16
139 ; PACKED: bb.1 (%ir-block.0):
140 ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
141 ; PACKED-NEXT: {{ $}}
142 ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
143 ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
144 ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
145 ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
146 ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
147 ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
148 ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
149 ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
150 ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
151 ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
152 ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
153 ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
154 ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<4 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<3 x s16>), align 8, addrspace 8)
155 ; PACKED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<4 x s16>)
156 ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
157 ; PACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
158 ; PACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
159 ; PACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
160 ; PACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
161 ; PACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32)
162 ; PACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
163 ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
164 ; PACKED-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>)
165 ; PACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
166 ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
167 %tex = call <3 x half> @llvm.amdgcn.image.load.2d.v3f16.i32(i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
171 define amdgpu_ps <4 x half> @image_load_v4f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
172 ; UNPACKED-LABEL: name: image_load_v4f16
173 ; UNPACKED: bb.1 (%ir-block.0):
174 ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
175 ; UNPACKED-NEXT: {{ $}}
176 ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
177 ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
178 ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
179 ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
180 ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
181 ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
182 ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
183 ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
184 ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
185 ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
186 ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
187 ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
188 ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s16>), addrspace 8)
189 ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<4 x s32>)
190 ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
191 ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]]
192 ; UNPACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]]
193 ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
194 ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
195 ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
196 ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
197 ; UNPACKED-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]]
198 ; UNPACKED-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C]]
199 ; UNPACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32)
200 ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
201 ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
202 ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
203 ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
204 ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
206 ; PACKED-LABEL: name: image_load_v4f16
207 ; PACKED: bb.1 (%ir-block.0):
208 ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
209 ; PACKED-NEXT: {{ $}}
210 ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
211 ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
212 ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
213 ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
214 ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
215 ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
216 ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
217 ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
218 ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
219 ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
220 ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
221 ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
222 ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<4 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s16>), addrspace 8)
223 ; PACKED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<4 x s16>)
224 ; PACKED-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>)
225 ; PACKED-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>)
226 ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
227 %tex = call <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32(i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
231 define amdgpu_ps half @image_load_tfe_f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
232 ; UNPACKED-LABEL: name: image_load_tfe_f16
233 ; UNPACKED: bb.1 (%ir-block.0):
234 ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
235 ; UNPACKED-NEXT: {{ $}}
236 ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
237 ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
238 ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
239 ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
240 ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
241 ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
242 ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
243 ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
244 ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
245 ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
246 ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
247 ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
248 ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
249 ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16), addrspace 8)
250 ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>)
251 ; UNPACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
252 ; UNPACKED-NEXT: $vgpr0 = COPY [[UV]](s32)
253 ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
255 ; PACKED-LABEL: name: image_load_tfe_f16
256 ; PACKED: bb.1 (%ir-block.0):
257 ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
258 ; PACKED-NEXT: {{ $}}
259 ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
260 ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
261 ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
262 ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
263 ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
264 ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
265 ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
266 ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
267 ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
268 ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
269 ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
270 ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
271 ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
272 ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16), addrspace 8)
273 ; PACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>)
274 ; PACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
275 ; PACKED-NEXT: $vgpr0 = COPY [[UV]](s32)
276 ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
277 %res = call { half, i32 } @llvm.amdgcn.image.load.2d.sl_f16i32s.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
278 %tex = extractvalue { half, i32 } %res, 0
279 %tfe = extractvalue { half, i32 } %res, 1
280 store i32 %tfe, ptr addrspace(1) undef
284 define amdgpu_ps <2 x half> @image_load_tfe_v2f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
285 ; UNPACKED-LABEL: name: image_load_tfe_v2f16
286 ; UNPACKED: bb.1 (%ir-block.0):
287 ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
288 ; UNPACKED-NEXT: {{ $}}
289 ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
290 ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
291 ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
292 ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
293 ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
294 ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
295 ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
296 ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
297 ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
298 ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
299 ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
300 ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
301 ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
302 ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s16>), addrspace 8)
303 ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<3 x s32>)
304 ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
305 ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]]
306 ; UNPACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]]
307 ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
308 ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
309 ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
310 ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
311 ; UNPACKED-NEXT: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
312 ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
313 ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
315 ; PACKED-LABEL: name: image_load_tfe_v2f16
316 ; PACKED: bb.1 (%ir-block.0):
317 ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
318 ; PACKED-NEXT: {{ $}}
319 ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
320 ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
321 ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
322 ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
323 ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
324 ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
325 ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
326 ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
327 ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
328 ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
329 ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
330 ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
331 ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
332 ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s16>), addrspace 8)
333 ; PACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>)
334 ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32)
335 ; PACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
336 ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
337 ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
338 %res = call { <2 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v2f16i32s.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
339 %tex = extractvalue { <2 x half>, i32 } %res, 0
340 %tfe = extractvalue { <2 x half>, i32 } %res, 1
341 store i32 %tfe, ptr addrspace(1) undef
345 define amdgpu_ps <3 x half> @image_load_tfe_v3f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
346 ; UNPACKED-LABEL: name: image_load_tfe_v3f16
347 ; UNPACKED: bb.1 (%ir-block.0):
348 ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
349 ; UNPACKED-NEXT: {{ $}}
350 ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
351 ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
352 ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
353 ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
354 ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
355 ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
356 ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
357 ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
358 ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
359 ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
360 ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
361 ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
362 ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
363 ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<3 x s16>), align 8, addrspace 8)
364 ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<4 x s32>)
365 ; UNPACKED-NEXT: G_STORE [[UV3]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
366 ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
367 ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]]
368 ; UNPACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]]
369 ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
370 ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
371 ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
372 ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
373 ; UNPACKED-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]]
374 ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
375 ; UNPACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32)
376 ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
377 ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
378 ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
379 ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
380 ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
382 ; PACKED-LABEL: name: image_load_tfe_v3f16
383 ; PACKED: bb.1 (%ir-block.0):
384 ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
385 ; PACKED-NEXT: {{ $}}
386 ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
387 ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
388 ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
389 ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
390 ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
391 ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
392 ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
393 ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
394 ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
395 ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
396 ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
397 ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
398 ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
399 ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<3 x s16>), align 8, addrspace 8)
400 ; PACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<3 x s32>)
401 ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32)
402 ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV1]](s32)
403 ; PACKED-NEXT: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
404 ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
405 ; PACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
406 ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
407 %res = call { <3 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f16i32s.i32(i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
408 %tex = extractvalue { <3 x half>, i32 } %res, 0
409 %tfe = extractvalue { <3 x half>, i32 } %res, 1
410 store i32 %tfe, ptr addrspace(1) undef
414 define amdgpu_ps <4 x half> @image_load_tfe_v4f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
415 ; UNPACKED-LABEL: name: image_load_tfe_v4f16
416 ; UNPACKED: bb.1 (%ir-block.0):
417 ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
418 ; UNPACKED-NEXT: {{ $}}
419 ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
420 ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
421 ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
422 ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
423 ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
424 ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
425 ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
426 ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
427 ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
428 ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
429 ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
430 ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
431 ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
432 ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<4 x s16>), addrspace 8)
433 ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<5 x s32>)
434 ; UNPACKED-NEXT: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
435 ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
436 ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]]
437 ; UNPACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]]
438 ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
439 ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
440 ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
441 ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
442 ; UNPACKED-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]]
443 ; UNPACKED-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C]]
444 ; UNPACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32)
445 ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
446 ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
447 ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
448 ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
449 ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
451 ; PACKED-LABEL: name: image_load_tfe_v4f16
452 ; PACKED: bb.1 (%ir-block.0):
453 ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
454 ; PACKED-NEXT: {{ $}}
455 ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
456 ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
457 ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
458 ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
459 ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
460 ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
461 ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
462 ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
463 ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
464 ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
465 ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
466 ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
467 ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
468 ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<4 x s16>), addrspace 8)
469 ; PACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<3 x s32>)
470 ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32)
471 ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV1]](s32)
472 ; PACKED-NEXT: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
473 ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
474 ; PACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
475 ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
476 %res = call { <4 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f16i32s.i32(i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
477 %tex = extractvalue { <4 x half>, i32 } %res, 0
478 %tfe = extractvalue { <4 x half>, i32 } %res, 1
479 store i32 %tfe, ptr addrspace(1) undef
483 define amdgpu_ps half @image_load_f16_dmask_0000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
484 ; UNPACKED-LABEL: name: image_load_f16_dmask_0000
485 ; UNPACKED: bb.1 (%ir-block.0):
486 ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
487 ; UNPACKED-NEXT: {{ $}}
488 ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
489 ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
490 ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
491 ; UNPACKED-NEXT: $vgpr0 = COPY [[DEF]](s32)
492 ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
494 ; PACKED-LABEL: name: image_load_f16_dmask_0000
495 ; PACKED: bb.1 (%ir-block.0):
496 ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
497 ; PACKED-NEXT: {{ $}}
498 ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
499 ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
500 ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
501 ; PACKED-NEXT: $vgpr0 = COPY [[DEF]](s32)
502 ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
503 %tex = call half @llvm.amdgcn.image.load.2d.f16.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
507 define amdgpu_ps <2 x half> @image_load_v2f16_dmask_1000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
508 ; UNPACKED-LABEL: name: image_load_v2f16_dmask_1000
509 ; UNPACKED: bb.1 (%ir-block.0):
510 ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
511 ; UNPACKED-NEXT: {{ $}}
512 ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
513 ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
514 ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
515 ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
516 ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
517 ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
518 ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
519 ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
520 ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
521 ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
522 ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
523 ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
524 ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s16), addrspace 8)
525 ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
526 ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]], [[C]]
527 ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
528 ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
529 ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32)
530 ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
531 ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
532 ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
533 ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
535 ; PACKED-LABEL: name: image_load_v2f16_dmask_1000
536 ; PACKED: bb.1 (%ir-block.0):
537 ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
538 ; PACKED-NEXT: {{ $}}
539 ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
540 ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
541 ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
542 ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
543 ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
544 ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
545 ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
546 ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
547 ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
548 ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
549 ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
550 ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
551 ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s16), addrspace 8)
552 ; PACKED-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s16>)
553 ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
554 %tex = call <2 x half> @llvm.amdgcn.image.load.2d.v2f16.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
558 define amdgpu_ps <2 x half> @image_load_v2f16_dmask_0000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
559 ; UNPACKED-LABEL: name: image_load_v2f16_dmask_0000
560 ; UNPACKED: bb.1 (%ir-block.0):
561 ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
562 ; UNPACKED-NEXT: {{ $}}
563 ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
564 ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
565 ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
566 ; UNPACKED-NEXT: $vgpr0 = COPY [[DEF]](<2 x s16>)
567 ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
569 ; PACKED-LABEL: name: image_load_v2f16_dmask_0000
570 ; PACKED: bb.1 (%ir-block.0):
571 ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
572 ; PACKED-NEXT: {{ $}}
573 ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
574 ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
575 ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
576 ; PACKED-NEXT: $vgpr0 = COPY [[DEF]](<2 x s16>)
577 ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
578 %tex = call <2 x half> @llvm.amdgcn.image.load.2d.v2f16.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
582 define amdgpu_ps <3 x half> @image_load_v3f16_dmask_1100(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
583 ; UNPACKED-LABEL: name: image_load_v3f16_dmask_1100
584 ; UNPACKED: bb.1 (%ir-block.0):
585 ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
586 ; UNPACKED-NEXT: {{ $}}
587 ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
588 ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
589 ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
590 ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
591 ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
592 ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
593 ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
594 ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
595 ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
596 ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
597 ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
598 ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
599 ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<2 x s16>), addrspace 8)
600 ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>)
601 ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
602 ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]]
603 ; UNPACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]]
604 ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
605 ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
606 ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
607 ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
608 ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
609 ; UNPACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32)
610 ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]]
611 ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
612 ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
613 ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
614 ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
616 ; PACKED-LABEL: name: image_load_v3f16_dmask_1100
617 ; PACKED: bb.1 (%ir-block.0):
618 ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
619 ; PACKED-NEXT: {{ $}}
620 ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
621 ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
622 ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
623 ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
624 ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
625 ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
626 ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
627 ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
628 ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
629 ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
630 ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
631 ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
632 ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<2 x s16>), addrspace 8)
633 ; PACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
634 ; PACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
635 ; PACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32)
636 ; PACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]]
637 ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
638 ; PACKED-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s16>)
639 ; PACKED-NEXT: $vgpr1 = COPY [[BITCAST]](<2 x s16>)
640 ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
641 %tex = call <3 x half> @llvm.amdgcn.image.load.2d.v3f16.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
645 define amdgpu_ps <3 x half> @image_load_v3f16_dmask_1000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
646 ; UNPACKED-LABEL: name: image_load_v3f16_dmask_1000
647 ; UNPACKED: bb.1 (%ir-block.0):
648 ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
649 ; UNPACKED-NEXT: {{ $}}
650 ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
651 ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
652 ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
653 ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
654 ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
655 ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
656 ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
657 ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
658 ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
659 ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
660 ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
661 ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
662 ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s16), addrspace 8)
663 ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
664 ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]], [[C]]
665 ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
666 ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
667 ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32)
668 ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
669 ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
670 ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL]]
671 ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
672 ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
673 ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
674 ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
676 ; PACKED-LABEL: name: image_load_v3f16_dmask_1000
677 ; PACKED: bb.1 (%ir-block.0):
678 ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
679 ; PACKED-NEXT: {{ $}}
680 ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
681 ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
682 ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
683 ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
684 ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
685 ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
686 ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
687 ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
688 ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
689 ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
690 ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
691 ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
692 ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s16), addrspace 8)
693 ; PACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
694 ; PACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
695 ; PACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32)
696 ; PACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]]
697 ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
698 ; PACKED-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s16>)
699 ; PACKED-NEXT: $vgpr1 = COPY [[BITCAST]](<2 x s16>)
700 ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
701 %tex = call <3 x half> @llvm.amdgcn.image.load.2d.v3f16.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
705 define amdgpu_ps <3 x half> @image_load_v3f16_dmask_0000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
706 ; UNPACKED-LABEL: name: image_load_v3f16_dmask_0000
707 ; UNPACKED: bb.1 (%ir-block.0):
708 ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
709 ; UNPACKED-NEXT: {{ $}}
710 ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
711 ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
712 ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
713 ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
714 ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
715 ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
716 ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
717 ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
718 ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
719 ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32)
720 ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
721 ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
722 ; UNPACKED-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>)
723 ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
724 ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
726 ; PACKED-LABEL: name: image_load_v3f16_dmask_0000
727 ; PACKED: bb.1 (%ir-block.0):
728 ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
729 ; PACKED-NEXT: {{ $}}
730 ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
731 ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
732 ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
733 ; PACKED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
734 ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
735 ; PACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
736 ; PACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
737 ; PACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
738 ; PACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
739 ; PACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32)
740 ; PACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
741 ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
742 ; PACKED-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>)
743 ; PACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
744 ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
745 %tex = call <3 x half> @llvm.amdgcn.image.load.2d.v3f16.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
749 define amdgpu_ps <4 x half> @image_load_v4f16_dmask_1110(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
750 ; UNPACKED-LABEL: name: image_load_v4f16_dmask_1110
751 ; UNPACKED: bb.1 (%ir-block.0):
752 ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
753 ; UNPACKED-NEXT: {{ $}}
754 ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
755 ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
756 ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
757 ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
758 ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
759 ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
760 ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
761 ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
762 ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
763 ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
764 ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
765 ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
766 ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<3 x s16>), align 8, addrspace 8)
767 ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<3 x s32>)
768 ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
769 ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]]
770 ; UNPACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]]
771 ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
772 ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
773 ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
774 ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
775 ; UNPACKED-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]]
776 ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
777 ; UNPACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32)
778 ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
779 ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
780 ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
781 ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
782 ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
784 ; PACKED-LABEL: name: image_load_v4f16_dmask_1110
785 ; PACKED: bb.1 (%ir-block.0):
786 ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
787 ; PACKED-NEXT: {{ $}}
788 ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
789 ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
790 ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
791 ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
792 ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
793 ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
794 ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
795 ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
796 ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
797 ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
798 ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
799 ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
800 ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<4 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<3 x s16>), align 8, addrspace 8)
801 ; PACKED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<4 x s16>)
802 ; PACKED-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>)
803 ; PACKED-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>)
804 ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
805 %tex = call <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32(i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
809 define amdgpu_ps <4 x half> @image_load_v4f16_dmask_1100(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
810 ; UNPACKED-LABEL: name: image_load_v4f16_dmask_1100
811 ; UNPACKED: bb.1 (%ir-block.0):
812 ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
813 ; UNPACKED-NEXT: {{ $}}
814 ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
815 ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
816 ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
817 ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
818 ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
819 ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
820 ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
821 ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
822 ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
823 ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
824 ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
825 ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
826 ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<2 x s16>), addrspace 8)
827 ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>)
828 ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
829 ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]]
830 ; UNPACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]]
831 ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
832 ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
833 ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
834 ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
835 ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
836 ; UNPACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32)
837 ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]]
838 ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
839 ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
840 ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
841 ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
843 ; PACKED-LABEL: name: image_load_v4f16_dmask_1100
844 ; PACKED: bb.1 (%ir-block.0):
845 ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
846 ; PACKED-NEXT: {{ $}}
847 ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
848 ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
849 ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
850 ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
851 ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
852 ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
853 ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
854 ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
855 ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
856 ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
857 ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
858 ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
859 ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<2 x s16>), addrspace 8)
860 ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
861 ; PACKED-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s16>)
862 ; PACKED-NEXT: $vgpr1 = COPY [[DEF]](<2 x s16>)
863 ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
864 %tex = call <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
868 define amdgpu_ps <4 x half> @image_load_v4f16_dmask_1000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
869 ; UNPACKED-LABEL: name: image_load_v4f16_dmask_1000
870 ; UNPACKED: bb.1 (%ir-block.0):
871 ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
872 ; UNPACKED-NEXT: {{ $}}
873 ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
874 ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
875 ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
876 ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
877 ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
878 ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
879 ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
880 ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
881 ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
882 ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
883 ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
884 ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
885 ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s16), addrspace 8)
886 ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
887 ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]], [[C]]
888 ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
889 ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
890 ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32)
891 ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
892 ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
893 ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL]]
894 ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
895 ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
896 ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
897 ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
899 ; PACKED-LABEL: name: image_load_v4f16_dmask_1000
900 ; PACKED: bb.1 (%ir-block.0):
901 ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
902 ; PACKED-NEXT: {{ $}}
903 ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
904 ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
905 ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
906 ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
907 ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
908 ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
909 ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
910 ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
911 ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
912 ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
913 ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
914 ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
915 ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s16), addrspace 8)
916 ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
917 ; PACKED-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s16>)
918 ; PACKED-NEXT: $vgpr1 = COPY [[DEF]](<2 x s16>)
919 ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
920 %tex = call <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
924 define amdgpu_ps <4 x half> @image_load_v4f16_dmask_0000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
925 ; UNPACKED-LABEL: name: image_load_v4f16_dmask_0000
926 ; UNPACKED: bb.1 (%ir-block.0):
927 ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
928 ; UNPACKED-NEXT: {{ $}}
929 ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
930 ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
931 ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
932 ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
933 ; UNPACKED-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>)
934 ; UNPACKED-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>)
935 ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
937 ; PACKED-LABEL: name: image_load_v4f16_dmask_0000
938 ; PACKED: bb.1 (%ir-block.0):
939 ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
940 ; PACKED-NEXT: {{ $}}
941 ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
942 ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
943 ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
944 ; PACKED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
945 ; PACKED-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>)
946 ; PACKED-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>)
947 ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
948 %tex = call <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
952 define amdgpu_ps half @image_load_tfe_f16_dmask_0000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
953 ; UNPACKED-LABEL: name: image_load_tfe_f16_dmask_0000
954 ; UNPACKED: bb.1 (%ir-block.0):
955 ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
956 ; UNPACKED-NEXT: {{ $}}
957 ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
958 ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
959 ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
960 ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
961 ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
962 ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
963 ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
964 ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
965 ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
966 ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
967 ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
968 ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
969 ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
970 ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16), addrspace 8)
971 ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>)
972 ; UNPACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
973 ; UNPACKED-NEXT: $vgpr0 = COPY [[UV]](s32)
974 ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
976 ; PACKED-LABEL: name: image_load_tfe_f16_dmask_0000
977 ; PACKED: bb.1 (%ir-block.0):
978 ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
979 ; PACKED-NEXT: {{ $}}
980 ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
981 ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
982 ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
983 ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
984 ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
985 ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
986 ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
987 ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
988 ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
989 ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
990 ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
991 ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
992 ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
993 ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16), addrspace 8)
994 ; PACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>)
995 ; PACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
996 ; PACKED-NEXT: $vgpr0 = COPY [[UV]](s32)
997 ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
998 %res = call { half, i32 } @llvm.amdgcn.image.load.2d.sl_f16i32s.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
999 %tex = extractvalue { half, i32 } %res, 0
1000 %tfe = extractvalue { half, i32 } %res, 1
1001 store i32 %tfe, ptr addrspace(1) undef
1005 define amdgpu_ps <2 x half> @image_load_tfe_v2f16_dmask_1000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
1006 ; UNPACKED-LABEL: name: image_load_tfe_v2f16_dmask_1000
1007 ; UNPACKED: bb.1 (%ir-block.0):
1008 ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
1009 ; UNPACKED-NEXT: {{ $}}
1010 ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1011 ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1012 ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1013 ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1014 ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1015 ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1016 ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1017 ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1018 ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1019 ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1020 ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1021 ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1022 ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1023 ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16), addrspace 8)
1024 ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>)
1025 ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
1026 ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]]
1027 ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1028 ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1029 ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32)
1030 ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
1031 ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
1032 ; UNPACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
1033 ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
1034 ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
1036 ; PACKED-LABEL: name: image_load_tfe_v2f16_dmask_1000
1037 ; PACKED: bb.1 (%ir-block.0):
1038 ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
1039 ; PACKED-NEXT: {{ $}}
1040 ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1041 ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1042 ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1043 ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1044 ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1045 ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1046 ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1047 ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1048 ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1049 ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1050 ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1051 ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1052 ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1053 ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16), addrspace 8)
1054 ; PACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>)
1055 ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32)
1056 ; PACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
1057 ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
1058 ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
1059 %res = call { <2 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v2f16i32s.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
1060 %tex = extractvalue { <2 x half>, i32 } %res, 0
1061 %tfe = extractvalue { <2 x half>, i32 } %res, 1
1062 store i32 %tfe, ptr addrspace(1) undef
1066 define amdgpu_ps <2 x half> @image_load_tfe_v2f16_dmask_0000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
1067 ; UNPACKED-LABEL: name: image_load_tfe_v2f16_dmask_0000
1068 ; UNPACKED: bb.1 (%ir-block.0):
1069 ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
1070 ; UNPACKED-NEXT: {{ $}}
1071 ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1072 ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1073 ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1074 ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1075 ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1076 ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1077 ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1078 ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1079 ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1080 ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1081 ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1082 ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1083 ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1084 ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16), addrspace 8)
1085 ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>)
1086 ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
1087 ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]]
1088 ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1089 ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1090 ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32)
1091 ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
1092 ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
1093 ; UNPACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
1094 ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
1095 ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
1097 ; PACKED-LABEL: name: image_load_tfe_v2f16_dmask_0000
1098 ; PACKED: bb.1 (%ir-block.0):
1099 ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
1100 ; PACKED-NEXT: {{ $}}
1101 ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1102 ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1103 ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1104 ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1105 ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1106 ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1107 ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1108 ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1109 ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1110 ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1111 ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1112 ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1113 ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1114 ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16), addrspace 8)
1115 ; PACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>)
1116 ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32)
1117 ; PACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
1118 ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
1119 ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
1120 %res = call { <2 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v2f16i32s.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
1121 %tex = extractvalue { <2 x half>, i32 } %res, 0
1122 %tfe = extractvalue { <2 x half>, i32 } %res, 1
1123 store i32 %tfe, ptr addrspace(1) undef
1127 define amdgpu_ps <3 x half> @image_load_tfe_v3f16_dmask_1100(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
1128 ; UNPACKED-LABEL: name: image_load_tfe_v3f16_dmask_1100
1129 ; UNPACKED: bb.1 (%ir-block.0):
1130 ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
1131 ; UNPACKED-NEXT: {{ $}}
1132 ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1133 ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1134 ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1135 ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1136 ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1137 ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1138 ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1139 ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1140 ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1141 ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1142 ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1143 ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1144 ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1145 ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s16>), addrspace 8)
1146 ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<3 x s32>)
1147 ; UNPACKED-NEXT: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
1148 ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
1149 ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]]
1150 ; UNPACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]]
1151 ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1152 ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
1153 ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
1154 ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
1155 ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1156 ; UNPACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32)
1157 ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]]
1158 ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
1159 ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
1160 ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
1161 ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
1163 ; PACKED-LABEL: name: image_load_tfe_v3f16_dmask_1100
1164 ; PACKED: bb.1 (%ir-block.0):
1165 ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
1166 ; PACKED-NEXT: {{ $}}
1167 ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1168 ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1169 ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1170 ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1171 ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1172 ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1173 ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1174 ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1175 ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1176 ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1177 ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1178 ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1179 ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1180 ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s16>), addrspace 8)
1181 ; PACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>)
1182 ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32)
1183 ; PACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
1184 ; PACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1185 ; PACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1186 ; PACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32)
1187 ; PACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]]
1188 ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
1189 ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
1190 ; PACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
1191 ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
1192 %res = call { <3 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f16i32s.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
1193 %tex = extractvalue { <3 x half>, i32 } %res, 0
1194 %tfe = extractvalue { <3 x half>, i32 } %res, 1
1195 store i32 %tfe, ptr addrspace(1) undef
1199 define amdgpu_ps <3 x half> @image_load_tfe_v3f16_dmask_1000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
1200 ; UNPACKED-LABEL: name: image_load_tfe_v3f16_dmask_1000
1201 ; UNPACKED: bb.1 (%ir-block.0):
1202 ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
1203 ; UNPACKED-NEXT: {{ $}}
1204 ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1205 ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1206 ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1207 ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1208 ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1209 ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1210 ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1211 ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1212 ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1213 ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1214 ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1215 ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1216 ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1217 ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16), addrspace 8)
1218 ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>)
1219 ; UNPACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
1220 ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
1221 ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]]
1222 ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1223 ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1224 ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32)
1225 ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
1226 ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
1227 ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL]]
1228 ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
1229 ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
1230 ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
1231 ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
1233 ; PACKED-LABEL: name: image_load_tfe_v3f16_dmask_1000
1234 ; PACKED: bb.1 (%ir-block.0):
1235 ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
1236 ; PACKED-NEXT: {{ $}}
1237 ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1238 ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1239 ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1240 ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1241 ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1242 ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1243 ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1244 ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1245 ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1246 ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1247 ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1248 ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1249 ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1250 ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16), addrspace 8)
1251 ; PACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>)
1252 ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32)
1253 ; PACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
1254 ; PACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1255 ; PACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1256 ; PACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32)
1257 ; PACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]]
1258 ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
1259 ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
1260 ; PACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
1261 ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
1262 %res = call { <3 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f16i32s.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
1263 %tex = extractvalue { <3 x half>, i32 } %res, 0
1264 %tfe = extractvalue { <3 x half>, i32 } %res, 1
1265 store i32 %tfe, ptr addrspace(1) undef
1269 define amdgpu_ps <3 x half> @image_load_tfe_v3f16_dmask_0000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
1270 ; UNPACKED-LABEL: name: image_load_tfe_v3f16_dmask_0000
1271 ; UNPACKED: bb.1 (%ir-block.0):
1272 ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
1273 ; UNPACKED-NEXT: {{ $}}
1274 ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1275 ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1276 ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1277 ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1278 ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1279 ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1280 ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1281 ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1282 ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1283 ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1284 ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1285 ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1286 ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1287 ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16), addrspace 8)
1288 ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>)
1289 ; UNPACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
1290 ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
1291 ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]]
1292 ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1293 ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1294 ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32)
1295 ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
1296 ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
1297 ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL]]
1298 ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
1299 ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
1300 ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
1301 ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
1303 ; PACKED-LABEL: name: image_load_tfe_v3f16_dmask_0000
1304 ; PACKED: bb.1 (%ir-block.0):
1305 ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
1306 ; PACKED-NEXT: {{ $}}
1307 ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1308 ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1309 ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1310 ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1311 ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1312 ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1313 ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1314 ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1315 ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1316 ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1317 ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1318 ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1319 ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1320 ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16), addrspace 8)
1321 ; PACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>)
1322 ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32)
1323 ; PACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
1324 ; PACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1325 ; PACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1326 ; PACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32)
1327 ; PACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]]
1328 ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
1329 ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
1330 ; PACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
1331 ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
1332 %res = call { <3 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f16i32s.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
1333 %tex = extractvalue { <3 x half>, i32 } %res, 0
1334 %tfe = extractvalue { <3 x half>, i32 } %res, 1
1335 store i32 %tfe, ptr addrspace(1) undef
1339 define amdgpu_ps <4 x half> @image_load_tfe_v4f16_dmask_1110(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
1340 ; UNPACKED-LABEL: name: image_load_tfe_v4f16_dmask_1110
1341 ; UNPACKED: bb.1 (%ir-block.0):
1342 ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
1343 ; UNPACKED-NEXT: {{ $}}
1344 ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1345 ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1346 ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1347 ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1348 ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1349 ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1350 ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1351 ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1352 ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1353 ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1354 ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1355 ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1356 ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1357 ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<3 x s16>), align 8, addrspace 8)
1358 ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<4 x s32>)
1359 ; UNPACKED-NEXT: G_STORE [[UV3]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
1360 ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
1361 ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]]
1362 ; UNPACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]]
1363 ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1364 ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
1365 ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
1366 ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
1367 ; UNPACKED-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]]
1368 ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1369 ; UNPACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32)
1370 ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
1371 ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
1372 ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
1373 ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
1374 ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
1376 ; PACKED-LABEL: name: image_load_tfe_v4f16_dmask_1110
1377 ; PACKED: bb.1 (%ir-block.0):
1378 ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
1379 ; PACKED-NEXT: {{ $}}
1380 ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1381 ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1382 ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1383 ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1384 ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1385 ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1386 ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1387 ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1388 ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1389 ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1390 ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1391 ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1392 ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1393 ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<3 x s16>), align 8, addrspace 8)
1394 ; PACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<3 x s32>)
1395 ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32)
1396 ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV1]](s32)
1397 ; PACKED-NEXT: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
1398 ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
1399 ; PACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
1400 ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
1401 %res = call { <4 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f16i32s.i32(i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
1402 %tex = extractvalue { <4 x half>, i32 } %res, 0
1403 %tfe = extractvalue { <4 x half>, i32 } %res, 1
1404 store i32 %tfe, ptr addrspace(1) undef
1408 define amdgpu_ps <4 x half> @image_load_tfe_v4f16_dmask_1100(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
1409 ; UNPACKED-LABEL: name: image_load_tfe_v4f16_dmask_1100
1410 ; UNPACKED: bb.1 (%ir-block.0):
1411 ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
1412 ; UNPACKED-NEXT: {{ $}}
1413 ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1414 ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1415 ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1416 ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1417 ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1418 ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1419 ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1420 ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1421 ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1422 ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1423 ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1424 ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1425 ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1426 ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s16>), addrspace 8)
1427 ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<3 x s32>)
1428 ; UNPACKED-NEXT: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
1429 ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
1430 ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]]
1431 ; UNPACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]]
1432 ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1433 ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
1434 ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
1435 ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
1436 ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1437 ; UNPACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32)
1438 ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]]
1439 ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
1440 ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
1441 ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
1442 ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
1444 ; PACKED-LABEL: name: image_load_tfe_v4f16_dmask_1100
1445 ; PACKED: bb.1 (%ir-block.0):
1446 ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
1447 ; PACKED-NEXT: {{ $}}
1448 ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1449 ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1450 ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1451 ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1452 ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1453 ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1454 ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1455 ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1456 ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1457 ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1458 ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1459 ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1460 ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1461 ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s16>), addrspace 8)
1462 ; PACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>)
1463 ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32)
1464 ; PACKED-NEXT: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
1465 ; PACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
1466 ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
1467 ; PACKED-NEXT: $vgpr1 = COPY [[DEF1]](<2 x s16>)
1468 ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
1469 %res = call { <4 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f16i32s.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
1470 %tex = extractvalue { <4 x half>, i32 } %res, 0
1471 %tfe = extractvalue { <4 x half>, i32 } %res, 1
1472 store i32 %tfe, ptr addrspace(1) undef
1476 define amdgpu_ps <4 x half> @image_load_tfe_v4f16_dmask_1000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
1477 ; UNPACKED-LABEL: name: image_load_tfe_v4f16_dmask_1000
1478 ; UNPACKED: bb.1 (%ir-block.0):
1479 ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
1480 ; UNPACKED-NEXT: {{ $}}
1481 ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1482 ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1483 ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1484 ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1485 ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1486 ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1487 ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1488 ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1489 ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1490 ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1491 ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1492 ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1493 ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1494 ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16), addrspace 8)
1495 ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>)
1496 ; UNPACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
1497 ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
1498 ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]]
1499 ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1500 ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1501 ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32)
1502 ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
1503 ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
1504 ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL]]
1505 ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
1506 ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
1507 ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
1508 ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
1510 ; PACKED-LABEL: name: image_load_tfe_v4f16_dmask_1000
1511 ; PACKED: bb.1 (%ir-block.0):
1512 ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
1513 ; PACKED-NEXT: {{ $}}
1514 ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1515 ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1516 ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1517 ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1518 ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1519 ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1520 ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1521 ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1522 ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1523 ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1524 ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1525 ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1526 ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1527 ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16), addrspace 8)
1528 ; PACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>)
1529 ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32)
1530 ; PACKED-NEXT: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
1531 ; PACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
1532 ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
1533 ; PACKED-NEXT: $vgpr1 = COPY [[DEF1]](<2 x s16>)
1534 ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
1535 %res = call { <4 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f16i32s.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
1536 %tex = extractvalue { <4 x half>, i32 } %res, 0
1537 %tfe = extractvalue { <4 x half>, i32 } %res, 1
1538 store i32 %tfe, ptr addrspace(1) undef
1542 define amdgpu_ps <4 x half> @image_load_tfe_v4f16_dmask_0000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
1543 ; UNPACKED-LABEL: name: image_load_tfe_v4f16_dmask_0000
1544 ; UNPACKED: bb.1 (%ir-block.0):
1545 ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
1546 ; UNPACKED-NEXT: {{ $}}
1547 ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1548 ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1549 ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1550 ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1551 ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1552 ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1553 ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1554 ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1555 ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1556 ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1557 ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1558 ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1559 ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1560 ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16), addrspace 8)
1561 ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>)
1562 ; UNPACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
1563 ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
1564 ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]]
1565 ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1566 ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
1567 ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32)
1568 ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
1569 ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
1570 ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL]]
1571 ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
1572 ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
1573 ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
1574 ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
1576 ; PACKED-LABEL: name: image_load_tfe_v4f16_dmask_0000
1577 ; PACKED: bb.1 (%ir-block.0):
1578 ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
1579 ; PACKED-NEXT: {{ $}}
1580 ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
1581 ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
1582 ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
1583 ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
1584 ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
1585 ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
1586 ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
1587 ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
1588 ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
1589 ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
1590 ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
1591 ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1592 ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
1593 ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16), addrspace 8)
1594 ; PACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>)
1595 ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32)
1596 ; PACKED-NEXT: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
1597 ; PACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
1598 ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
1599 ; PACKED-NEXT: $vgpr1 = COPY [[DEF1]](<2 x s16>)
1600 ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
1601 %res = call { <4 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f16i32s.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
1602 %tex = extractvalue { <4 x half>, i32 } %res, 0
1603 %tfe = extractvalue { <4 x half>, i32 } %res, 1
1604 store i32 %tfe, ptr addrspace(1) undef
1608 declare half @llvm.amdgcn.image.load.2d.f16.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
1609 declare <2 x half> @llvm.amdgcn.image.load.2d.v2f16.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
1610 declare <3 x half> @llvm.amdgcn.image.load.2d.v3f16.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
1611 declare <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
1612 declare { half, i32 } @llvm.amdgcn.image.load.2d.sl_f16i32s.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
1613 declare { <2 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v2f16i32s.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
1614 declare { <3 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f16i32s.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
1615 declare { <4 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f16i32s.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
1617 attributes #0 = { nounwind readonly }