1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt -S -passes=instcombine -mtriple=amdgcn-amd-amdhsa %s | FileCheck %s
4 ; --------------------------------------------------------------------
5 ; llvm.amdgcn.raw.buffer.load
6 ; --------------------------------------------------------------------
8 define amdgpu_ps float @raw_buffer_load_f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
9 ; CHECK-LABEL: @raw_buffer_load_f32(
10 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
11 ; CHECK-NEXT: ret float [[DATA]]
13 %data = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
17 define amdgpu_ps <1 x float> @raw_buffer_load_v1f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
18 ; CHECK-LABEL: @raw_buffer_load_v1f32(
19 ; CHECK-NEXT: [[DATA:%.*]] = call <1 x float> @llvm.amdgcn.raw.buffer.load.v1f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
20 ; CHECK-NEXT: ret <1 x float> [[DATA]]
22 %data = call <1 x float> @llvm.amdgcn.raw.buffer.load.v1f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
26 define amdgpu_ps <2 x float> @raw_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
27 ; CHECK-LABEL: @raw_buffer_load_v2f32(
28 ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
29 ; CHECK-NEXT: ret <2 x float> [[DATA]]
31 %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
35 define amdgpu_ps <4 x float> @raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
36 ; CHECK-LABEL: @raw_buffer_load_v4f32(
37 ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
38 ; CHECK-NEXT: ret <4 x float> [[DATA]]
40 %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
44 define amdgpu_ps float @extract_elt0_raw_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
45 ; CHECK-LABEL: @extract_elt0_raw_buffer_load_v2f32(
46 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
47 ; CHECK-NEXT: ret float [[DATA]]
49 %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
50 %elt0 = extractelement <2 x float> %data, i32 0
54 define amdgpu_ps float @extract_elt1_raw_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
55 ; CHECK-LABEL: @extract_elt1_raw_buffer_load_v2f32(
56 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 4
57 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
58 ; CHECK-NEXT: ret float [[DATA]]
60 %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
61 %elt1 = extractelement <2 x float> %data, i32 1
65 define amdgpu_ps float @extract_elt0_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
66 ; CHECK-LABEL: @extract_elt0_raw_buffer_load_v4f32(
67 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
68 ; CHECK-NEXT: ret float [[DATA]]
70 %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
71 %elt0 = extractelement <4 x float> %data, i32 0
75 define amdgpu_ps float @extract_elt1_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
76 ; CHECK-LABEL: @extract_elt1_raw_buffer_load_v4f32(
77 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 4
78 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
79 ; CHECK-NEXT: ret float [[DATA]]
81 %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
82 %elt1 = extractelement <4 x float> %data, i32 1
86 define amdgpu_ps float @extract_elt2_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
87 ; CHECK-LABEL: @extract_elt2_raw_buffer_load_v4f32(
88 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 8
89 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
90 ; CHECK-NEXT: ret float [[DATA]]
92 %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
93 %elt1 = extractelement <4 x float> %data, i32 2
97 define amdgpu_ps float @extract_elt3_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
98 ; CHECK-LABEL: @extract_elt3_raw_buffer_load_v4f32(
99 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 12
100 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
101 ; CHECK-NEXT: ret float [[DATA]]
103 %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
104 %elt1 = extractelement <4 x float> %data, i32 3
108 define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
109 ; CHECK-LABEL: @extract_elt0_elt1_raw_buffer_load_v4f32(
110 ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
111 ; CHECK-NEXT: ret <2 x float> [[DATA]]
113 %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
114 %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 0, i32 1>
115 ret <2 x float> %shuf
118 define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
119 ; CHECK-LABEL: @extract_elt1_elt2_raw_buffer_load_v4f32(
120 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 4
121 ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
122 ; CHECK-NEXT: ret <2 x float> [[DATA]]
124 %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
125 %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 1, i32 2>
126 ret <2 x float> %shuf
129 define amdgpu_ps <2 x float> @extract_elt2_elt3_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
130 ; CHECK-LABEL: @extract_elt2_elt3_raw_buffer_load_v4f32(
131 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 8
132 ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
133 ; CHECK-NEXT: ret <2 x float> [[DATA]]
135 %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
136 %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 2, i32 3>
137 ret <2 x float> %shuf
140 define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
141 ; CHECK-LABEL: @extract_elt0_elt1_elt2_raw_buffer_load_v4f32(
142 ; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
143 ; CHECK-NEXT: ret <3 x float> [[DATA]]
145 %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
146 %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
147 ret <3 x float> %shuf
150 define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
151 ; CHECK-LABEL: @extract_elt1_elt2_elt3_raw_buffer_load_v4f32(
152 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 4
153 ; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
154 ; CHECK-NEXT: ret <3 x float> [[DATA]]
156 %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
157 %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 1, i32 2, i32 3>
158 ret <3 x float> %shuf
161 define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
162 ; CHECK-LABEL: @extract_elt0_elt2_elt3_raw_buffer_load_v4f32(
163 ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
164 ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3>
165 ; CHECK-NEXT: ret <3 x float> [[SHUF]]
167 %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
168 %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3>
169 ret <3 x float> %shuf
172 define amdgpu_ps float @extract_elt0_raw_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
173 ; CHECK-LABEL: @extract_elt0_raw_buffer_load_v3f32(
174 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
175 ; CHECK-NEXT: ret float [[DATA]]
177 %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
178 %elt0 = extractelement <3 x float> %data, i32 0
182 define amdgpu_ps float @extract_elt1_raw_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
183 ; CHECK-LABEL: @extract_elt1_raw_buffer_load_v3f32(
184 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 4
185 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
186 ; CHECK-NEXT: ret float [[DATA]]
188 %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
189 %elt1 = extractelement <3 x float> %data, i32 1
193 define amdgpu_ps float @extract_elt2_raw_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
194 ; CHECK-LABEL: @extract_elt2_raw_buffer_load_v3f32(
195 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 8
196 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
197 ; CHECK-NEXT: ret float [[DATA]]
199 %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
200 %elt1 = extractelement <3 x float> %data, i32 2
204 define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
205 ; CHECK-LABEL: @extract_elt0_elt1_raw_buffer_load_v3f32(
206 ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
207 ; CHECK-NEXT: ret <2 x float> [[DATA]]
209 %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
210 %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 0, i32 1>
211 ret <2 x float> %shuf
214 define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
215 ; CHECK-LABEL: @extract_elt1_elt2_raw_buffer_load_v3f32(
216 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 4
217 ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
218 ; CHECK-NEXT: ret <2 x float> [[DATA]]
220 %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
221 %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 1, i32 2>
222 ret <2 x float> %shuf
225 define i32 @extract0_bitcast_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
226 ; CHECK-LABEL: @extract0_bitcast_raw_buffer_load_v4f32(
227 ; CHECK-NEXT: [[VAR:%.*]] = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
228 ; CHECK-NEXT: [[VAR2:%.*]] = bitcast float [[VAR]] to i32
229 ; CHECK-NEXT: ret i32 [[VAR2]]
231 %var = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
232 %var1 = bitcast <4 x float> %var to <4 x i32>
233 %var2 = extractelement <4 x i32> %var1, i32 0
237 define float @extract0_bitcast_raw_buffer_load_v4i32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
238 ; CHECK-LABEL: @extract0_bitcast_raw_buffer_load_v4i32(
239 ; CHECK-NEXT: [[VAR:%.*]] = call i32 @llvm.amdgcn.raw.buffer.load.i32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
240 ; CHECK-NEXT: [[VAR2:%.*]] = bitcast i32 [[VAR]] to float
241 ; CHECK-NEXT: ret float [[VAR2]]
243 %var = call <4 x i32> @llvm.amdgcn.raw.buffer.load.v4i32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
244 %var1 = bitcast <4 x i32> %var to <4 x float>
245 %var2 = extractelement <4 x float> %var1, i32 0
249 define amdgpu_ps float @preserve_metadata_extract_elt0_raw_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
250 ; CHECK-LABEL: @preserve_metadata_extract_elt0_raw_buffer_load_v2f32(
251 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0), !fpmath [[META0:![0-9]+]]
252 ; CHECK-NEXT: ret float [[DATA]]
254 %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0), !fpmath !0
255 %elt0 = extractelement <2 x float> %data, i32 0
259 declare float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32>, i32, i32, i32) #1
260 declare <1 x float> @llvm.amdgcn.raw.buffer.load.v1f32(<4 x i32>, i32, i32, i32) #1
261 declare <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32>, i32, i32, i32) #1
262 declare <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32>, i32, i32, i32) #1
263 declare <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32>, i32, i32, i32) #1
265 declare <4 x i32> @llvm.amdgcn.raw.buffer.load.v4i32(<4 x i32>, i32, i32, i32) #1
267 define amdgpu_ps half @extract_elt0_raw_buffer_load_v2f16(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
268 ; CHECK-LABEL: @extract_elt0_raw_buffer_load_v2f16(
269 ; CHECK-NEXT: [[DATA:%.*]] = call half @llvm.amdgcn.raw.buffer.load.f16(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
270 ; CHECK-NEXT: ret half [[DATA]]
272 %data = call <2 x half> @llvm.amdgcn.raw.buffer.load.v2f16(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
273 %elt0 = extractelement <2 x half> %data, i32 0
277 define amdgpu_ps half @extract_elt1_raw_buffer_load_v2f16(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
278 ; CHECK-LABEL: @extract_elt1_raw_buffer_load_v2f16(
279 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 2
280 ; CHECK-NEXT: [[DATA:%.*]] = call half @llvm.amdgcn.raw.buffer.load.f16(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
281 ; CHECK-NEXT: ret half [[DATA]]
283 %data = call <2 x half> @llvm.amdgcn.raw.buffer.load.v2f16(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
284 %elt1 = extractelement <2 x half> %data, i32 1
288 define amdgpu_ps half @extract_elt1_raw_buffer_load_v3f16(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
289 ; CHECK-LABEL: @extract_elt1_raw_buffer_load_v3f16(
290 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 2
291 ; CHECK-NEXT: [[DATA:%.*]] = call half @llvm.amdgcn.raw.buffer.load.f16(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
292 ; CHECK-NEXT: ret half [[DATA]]
294 %data = call <3 x half> @llvm.amdgcn.raw.buffer.load.v3f16(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
295 %elt0 = extractelement <3 x half> %data, i32 1
299 define amdgpu_ps half @extract_elt1_raw_buffer_load_v4f16(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
300 ; CHECK-LABEL: @extract_elt1_raw_buffer_load_v4f16(
301 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 2
302 ; CHECK-NEXT: [[DATA:%.*]] = call half @llvm.amdgcn.raw.buffer.load.f16(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
303 ; CHECK-NEXT: ret half [[DATA]]
305 %data = call <4 x half> @llvm.amdgcn.raw.buffer.load.v4f16(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
306 %elt1 = extractelement <4 x half> %data, i32 1
310 define amdgpu_ps half @extract_elt3_raw_buffer_load_v4f16(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
311 ; CHECK-LABEL: @extract_elt3_raw_buffer_load_v4f16(
312 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 6
313 ; CHECK-NEXT: [[DATA:%.*]] = call half @llvm.amdgcn.raw.buffer.load.f16(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
314 ; CHECK-NEXT: ret half [[DATA]]
316 %data = call <4 x half> @llvm.amdgcn.raw.buffer.load.v4f16(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
317 %elt1 = extractelement <4 x half> %data, i32 3
321 define amdgpu_ps <2 x half> @extract_elt0_elt1_raw_buffer_load_v4f16(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
322 ; CHECK-LABEL: @extract_elt0_elt1_raw_buffer_load_v4f16(
323 ; CHECK-NEXT: [[DATA:%.*]] = call <2 x half> @llvm.amdgcn.raw.buffer.load.v2f16(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
324 ; CHECK-NEXT: ret <2 x half> [[DATA]]
326 %data = call <4 x half> @llvm.amdgcn.raw.buffer.load.v4f16(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
327 %shuf = shufflevector <4 x half> %data, <4 x half> poison, <2 x i32> <i32 0, i32 1>
331 declare half @llvm.amdgcn.raw.buffer.load.f16(<4 x i32>, i32, i32, i32) #1
332 declare <2 x half> @llvm.amdgcn.raw.buffer.load.v2f16(<4 x i32>, i32, i32, i32) #1
333 declare <3 x half> @llvm.amdgcn.raw.buffer.load.v3f16(<4 x i32>, i32, i32, i32) #1
334 declare <4 x half> @llvm.amdgcn.raw.buffer.load.v4f16(<4 x i32>, i32, i32, i32) #1
336 define amdgpu_ps i8 @extract_elt0_raw_buffer_load_v2i8(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
337 ; CHECK-LABEL: @extract_elt0_raw_buffer_load_v2i8(
338 ; CHECK-NEXT: [[DATA:%.*]] = call i8 @llvm.amdgcn.raw.buffer.load.i8(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
339 ; CHECK-NEXT: ret i8 [[DATA]]
341 %data = call <2 x i8> @llvm.amdgcn.raw.buffer.load.v2i8(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
342 %elt0 = extractelement <2 x i8> %data, i32 0
346 define amdgpu_ps i8 @extract_elt1_raw_buffer_load_v2i8(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
347 ; CHECK-LABEL: @extract_elt1_raw_buffer_load_v2i8(
348 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 1
349 ; CHECK-NEXT: [[DATA:%.*]] = call i8 @llvm.amdgcn.raw.buffer.load.i8(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
350 ; CHECK-NEXT: ret i8 [[DATA]]
352 %data = call <2 x i8> @llvm.amdgcn.raw.buffer.load.v2i8(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
353 %elt1 = extractelement <2 x i8> %data, i32 1
357 define amdgpu_ps i8 @extract_elt1_raw_buffer_load_v3i8(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
358 ; CHECK-LABEL: @extract_elt1_raw_buffer_load_v3i8(
359 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 1
360 ; CHECK-NEXT: [[DATA:%.*]] = call i8 @llvm.amdgcn.raw.buffer.load.i8(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
361 ; CHECK-NEXT: ret i8 [[DATA]]
363 %data = call <3 x i8> @llvm.amdgcn.raw.buffer.load.v3i8(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
364 %elt0 = extractelement <3 x i8> %data, i32 1
368 define amdgpu_ps i8 @extract_elt1_raw_buffer_load_v4i8(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
369 ; CHECK-LABEL: @extract_elt1_raw_buffer_load_v4i8(
370 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 1
371 ; CHECK-NEXT: [[DATA:%.*]] = call i8 @llvm.amdgcn.raw.buffer.load.i8(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
372 ; CHECK-NEXT: ret i8 [[DATA]]
374 %data = call <4 x i8> @llvm.amdgcn.raw.buffer.load.v4i8(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
375 %elt1 = extractelement <4 x i8> %data, i32 1
379 define amdgpu_ps i8 @extract_elt3_raw_buffer_load_v4i8(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
380 ; CHECK-LABEL: @extract_elt3_raw_buffer_load_v4i8(
381 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 3
382 ; CHECK-NEXT: [[DATA:%.*]] = call i8 @llvm.amdgcn.raw.buffer.load.i8(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
383 ; CHECK-NEXT: ret i8 [[DATA]]
385 %data = call <4 x i8> @llvm.amdgcn.raw.buffer.load.v4i8(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
386 %elt1 = extractelement <4 x i8> %data, i32 3
390 define amdgpu_ps <2 x i8> @extract_elt0_elt1_raw_buffer_load_v4i8(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
391 ; CHECK-LABEL: @extract_elt0_elt1_raw_buffer_load_v4i8(
392 ; CHECK-NEXT: [[DATA:%.*]] = call <2 x i8> @llvm.amdgcn.raw.buffer.load.v2i8(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
393 ; CHECK-NEXT: ret <2 x i8> [[DATA]]
395 %data = call <4 x i8> @llvm.amdgcn.raw.buffer.load.v4i8(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
396 %shuf = shufflevector <4 x i8> %data, <4 x i8> poison, <2 x i32> <i32 0, i32 1>
400 declare i8 @llvm.amdgcn.raw.buffer.load.i8(<4 x i32>, i32, i32, i32) #1
401 declare <2 x i8> @llvm.amdgcn.raw.buffer.load.v2i8(<4 x i32>, i32, i32, i32) #1
402 declare <3 x i8> @llvm.amdgcn.raw.buffer.load.v3i8(<4 x i32>, i32, i32, i32) #1
403 declare <4 x i8> @llvm.amdgcn.raw.buffer.load.v4i8(<4 x i32>, i32, i32, i32) #1
405 ; --------------------------------------------------------------------
406 ; llvm.amdgcn.raw.ptr.buffer.load
407 ; --------------------------------------------------------------------
409 define amdgpu_ps float @raw_ptr_buffer_load_f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
410 ; CHECK-LABEL: @raw_ptr_buffer_load_f32(
411 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
412 ; CHECK-NEXT: ret float [[DATA]]
414 %data = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
418 define amdgpu_ps <1 x float> @raw_ptr_buffer_load_v1f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
419 ; CHECK-LABEL: @raw_ptr_buffer_load_v1f32(
420 ; CHECK-NEXT: [[DATA:%.*]] = call <1 x float> @llvm.amdgcn.raw.ptr.buffer.load.v1f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
421 ; CHECK-NEXT: ret <1 x float> [[DATA]]
423 %data = call <1 x float> @llvm.amdgcn.raw.ptr.buffer.load.v1f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
424 ret <1 x float> %data
427 define amdgpu_ps <2 x float> @raw_ptr_buffer_load_v2f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
428 ; CHECK-LABEL: @raw_ptr_buffer_load_v2f32(
429 ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
430 ; CHECK-NEXT: ret <2 x float> [[DATA]]
432 %data = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.v2f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
433 ret <2 x float> %data
436 define amdgpu_ps <4 x float> @raw_ptr_buffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
437 ; CHECK-LABEL: @raw_ptr_buffer_load_v4f32(
438 ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
439 ; CHECK-NEXT: ret <4 x float> [[DATA]]
441 %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
442 ret <4 x float> %data
445 define amdgpu_ps float @extract_elt0_raw_ptr_buffer_load_v2f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
446 ; CHECK-LABEL: @extract_elt0_raw_ptr_buffer_load_v2f32(
447 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
448 ; CHECK-NEXT: ret float [[DATA]]
450 %data = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.v2f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
451 %elt0 = extractelement <2 x float> %data, i32 0
455 define amdgpu_ps float @extract_elt1_raw_ptr_buffer_load_v2f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
456 ; CHECK-LABEL: @extract_elt1_raw_ptr_buffer_load_v2f32(
457 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 4
458 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
459 ; CHECK-NEXT: ret float [[DATA]]
461 %data = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.v2f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
462 %elt1 = extractelement <2 x float> %data, i32 1
466 define amdgpu_ps float @extract_elt0_raw_ptr_buffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
467 ; CHECK-LABEL: @extract_elt0_raw_ptr_buffer_load_v4f32(
468 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
469 ; CHECK-NEXT: ret float [[DATA]]
471 %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
472 %elt0 = extractelement <4 x float> %data, i32 0
476 define amdgpu_ps float @extract_elt1_raw_ptr_buffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
477 ; CHECK-LABEL: @extract_elt1_raw_ptr_buffer_load_v4f32(
478 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 4
479 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
480 ; CHECK-NEXT: ret float [[DATA]]
482 %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
483 %elt1 = extractelement <4 x float> %data, i32 1
487 define amdgpu_ps float @extract_elt2_raw_ptr_buffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
488 ; CHECK-LABEL: @extract_elt2_raw_ptr_buffer_load_v4f32(
489 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 8
490 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
491 ; CHECK-NEXT: ret float [[DATA]]
493 %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
494 %elt1 = extractelement <4 x float> %data, i32 2
498 define amdgpu_ps float @extract_elt3_raw_ptr_buffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
499 ; CHECK-LABEL: @extract_elt3_raw_ptr_buffer_load_v4f32(
500 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 12
501 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
502 ; CHECK-NEXT: ret float [[DATA]]
504 %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
505 %elt1 = extractelement <4 x float> %data, i32 3
509 define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_ptr_buffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
510 ; CHECK-LABEL: @extract_elt0_elt1_raw_ptr_buffer_load_v4f32(
511 ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
512 ; CHECK-NEXT: ret <2 x float> [[DATA]]
514 %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
515 %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 0, i32 1>
516 ret <2 x float> %shuf
519 define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_ptr_buffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
520 ; CHECK-LABEL: @extract_elt1_elt2_raw_ptr_buffer_load_v4f32(
521 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 4
522 ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
523 ; CHECK-NEXT: ret <2 x float> [[DATA]]
525 %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
526 %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 1, i32 2>
527 ret <2 x float> %shuf
530 define amdgpu_ps <2 x float> @extract_elt2_elt3_raw_ptr_buffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
531 ; CHECK-LABEL: @extract_elt2_elt3_raw_ptr_buffer_load_v4f32(
532 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 8
533 ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
534 ; CHECK-NEXT: ret <2 x float> [[DATA]]
536 %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
537 %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 2, i32 3>
538 ret <2 x float> %shuf
541 define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_raw_ptr_buffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
542 ; CHECK-LABEL: @extract_elt0_elt1_elt2_raw_ptr_buffer_load_v4f32(
543 ; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.raw.ptr.buffer.load.v3f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
544 ; CHECK-NEXT: ret <3 x float> [[DATA]]
546 %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
547 %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
548 ret <3 x float> %shuf
551 define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_raw_ptr_buffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
552 ; CHECK-LABEL: @extract_elt1_elt2_elt3_raw_ptr_buffer_load_v4f32(
553 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 4
554 ; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.raw.ptr.buffer.load.v3f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
555 ; CHECK-NEXT: ret <3 x float> [[DATA]]
557 %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
558 %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 1, i32 2, i32 3>
559 ret <3 x float> %shuf
562 define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_raw_ptr_buffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
563 ; CHECK-LABEL: @extract_elt0_elt2_elt3_raw_ptr_buffer_load_v4f32(
564 ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
565 ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3>
566 ; CHECK-NEXT: ret <3 x float> [[SHUF]]
568 %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
569 %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3>
570 ret <3 x float> %shuf
573 define amdgpu_ps float @extract_elt0_raw_ptr_buffer_load_v3f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
574 ; CHECK-LABEL: @extract_elt0_raw_ptr_buffer_load_v3f32(
575 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
576 ; CHECK-NEXT: ret float [[DATA]]
578 %data = call <3 x float> @llvm.amdgcn.raw.ptr.buffer.load.v3f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
579 %elt0 = extractelement <3 x float> %data, i32 0
583 define amdgpu_ps float @extract_elt1_raw_ptr_buffer_load_v3f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
584 ; CHECK-LABEL: @extract_elt1_raw_ptr_buffer_load_v3f32(
585 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 4
586 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
587 ; CHECK-NEXT: ret float [[DATA]]
589 %data = call <3 x float> @llvm.amdgcn.raw.ptr.buffer.load.v3f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
590 %elt1 = extractelement <3 x float> %data, i32 1
594 define amdgpu_ps float @extract_elt2_raw_ptr_buffer_load_v3f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
595 ; CHECK-LABEL: @extract_elt2_raw_ptr_buffer_load_v3f32(
596 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 8
597 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
598 ; CHECK-NEXT: ret float [[DATA]]
600 %data = call <3 x float> @llvm.amdgcn.raw.ptr.buffer.load.v3f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
601 %elt1 = extractelement <3 x float> %data, i32 2
605 define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_ptr_buffer_load_v3f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
606 ; CHECK-LABEL: @extract_elt0_elt1_raw_ptr_buffer_load_v3f32(
607 ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
608 ; CHECK-NEXT: ret <2 x float> [[DATA]]
610 %data = call <3 x float> @llvm.amdgcn.raw.ptr.buffer.load.v3f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
611 %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 0, i32 1>
612 ret <2 x float> %shuf
615 define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_ptr_buffer_load_v3f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
616 ; CHECK-LABEL: @extract_elt1_elt2_raw_ptr_buffer_load_v3f32(
617 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 4
618 ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
619 ; CHECK-NEXT: ret <2 x float> [[DATA]]
621 %data = call <3 x float> @llvm.amdgcn.raw.ptr.buffer.load.v3f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
622 %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 1, i32 2>
623 ret <2 x float> %shuf
626 define i32 @extract0_bitcast_raw_ptr_buffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
627 ; CHECK-LABEL: @extract0_bitcast_raw_ptr_buffer_load_v4f32(
628 ; CHECK-NEXT: [[VAR:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
629 ; CHECK-NEXT: [[VAR2:%.*]] = bitcast float [[VAR]] to i32
630 ; CHECK-NEXT: ret i32 [[VAR2]]
632 %var = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
633 %var1 = bitcast <4 x float> %var to <4 x i32>
634 %var2 = extractelement <4 x i32> %var1, i32 0
638 define float @extract0_bitcast_raw_ptr_buffer_load_v4i32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
639 ; CHECK-LABEL: @extract0_bitcast_raw_ptr_buffer_load_v4i32(
640 ; CHECK-NEXT: [[VAR:%.*]] = call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
641 ; CHECK-NEXT: [[VAR2:%.*]] = bitcast i32 [[VAR]] to float
642 ; CHECK-NEXT: ret float [[VAR2]]
644 %var = call <4 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v4i32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
645 %var1 = bitcast <4 x i32> %var to <4 x float>
646 %var2 = extractelement <4 x float> %var1, i32 0
650 define amdgpu_ps float @preserve_metadata_extract_elt0_raw_ptr_buffer_load_v2f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
651 ; CHECK-LABEL: @preserve_metadata_extract_elt0_raw_ptr_buffer_load_v2f32(
652 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0), !fpmath [[META0]]
653 ; CHECK-NEXT: ret float [[DATA]]
655 %data = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.v2f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0), !fpmath !0
656 %elt0 = extractelement <2 x float> %data, i32 0
660 declare float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8), i32, i32, i32) #1
661 declare <1 x float> @llvm.amdgcn.raw.ptr.buffer.load.v1f32(ptr addrspace(8), i32, i32, i32) #1
662 declare <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.v2f32(ptr addrspace(8), i32, i32, i32) #1
663 declare <3 x float> @llvm.amdgcn.raw.ptr.buffer.load.v3f32(ptr addrspace(8), i32, i32, i32) #1
664 declare <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8), i32, i32, i32) #1
666 declare <4 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v4i32(ptr addrspace(8), i32, i32, i32) #1
668 define amdgpu_ps half @extract_elt0_raw_ptr_buffer_load_v2f16(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
669 ; CHECK-LABEL: @extract_elt0_raw_ptr_buffer_load_v2f16(
670 ; CHECK-NEXT: [[DATA:%.*]] = call half @llvm.amdgcn.raw.ptr.buffer.load.f16(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
671 ; CHECK-NEXT: ret half [[DATA]]
673 %data = call <2 x half> @llvm.amdgcn.raw.ptr.buffer.load.v2f16(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
674 %elt0 = extractelement <2 x half> %data, i32 0
678 define amdgpu_ps half @extract_elt1_raw_ptr_buffer_load_v2f16(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
679 ; CHECK-LABEL: @extract_elt1_raw_ptr_buffer_load_v2f16(
680 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 2
681 ; CHECK-NEXT: [[DATA:%.*]] = call half @llvm.amdgcn.raw.ptr.buffer.load.f16(ptr addrspace(8) [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
682 ; CHECK-NEXT: ret half [[DATA]]
684 %data = call <2 x half> @llvm.amdgcn.raw.ptr.buffer.load.v2f16(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
685 %elt1 = extractelement <2 x half> %data, i32 1
689 define amdgpu_ps half @extract_elt1_raw_ptr_buffer_load_v3f16(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
690 ; CHECK-LABEL: @extract_elt1_raw_ptr_buffer_load_v3f16(
691 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 2
692 ; CHECK-NEXT: [[DATA:%.*]] = call half @llvm.amdgcn.raw.ptr.buffer.load.f16(ptr addrspace(8) [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
693 ; CHECK-NEXT: ret half [[DATA]]
695 %data = call <3 x half> @llvm.amdgcn.raw.ptr.buffer.load.v3f16(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
696 %elt0 = extractelement <3 x half> %data, i32 1
700 define amdgpu_ps half @extract_elt1_raw_ptr_buffer_load_v4f16(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
701 ; CHECK-LABEL: @extract_elt1_raw_ptr_buffer_load_v4f16(
702 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 2
703 ; CHECK-NEXT: [[DATA:%.*]] = call half @llvm.amdgcn.raw.ptr.buffer.load.f16(ptr addrspace(8) [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
704 ; CHECK-NEXT: ret half [[DATA]]
706 %data = call <4 x half> @llvm.amdgcn.raw.ptr.buffer.load.v4f16(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
707 %elt1 = extractelement <4 x half> %data, i32 1
711 define amdgpu_ps half @extract_elt3_raw_ptr_buffer_load_v4f16(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
712 ; CHECK-LABEL: @extract_elt3_raw_ptr_buffer_load_v4f16(
713 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 6
714 ; CHECK-NEXT: [[DATA:%.*]] = call half @llvm.amdgcn.raw.ptr.buffer.load.f16(ptr addrspace(8) [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
715 ; CHECK-NEXT: ret half [[DATA]]
717 %data = call <4 x half> @llvm.amdgcn.raw.ptr.buffer.load.v4f16(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
718 %elt1 = extractelement <4 x half> %data, i32 3
722 define amdgpu_ps <2 x half> @extract_elt0_elt1_raw_ptr_buffer_load_v4f16(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
723 ; CHECK-LABEL: @extract_elt0_elt1_raw_ptr_buffer_load_v4f16(
724 ; CHECK-NEXT: [[DATA:%.*]] = call <2 x half> @llvm.amdgcn.raw.ptr.buffer.load.v2f16(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
725 ; CHECK-NEXT: ret <2 x half> [[DATA]]
727 %data = call <4 x half> @llvm.amdgcn.raw.ptr.buffer.load.v4f16(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
728 %shuf = shufflevector <4 x half> %data, <4 x half> poison, <2 x i32> <i32 0, i32 1>
732 declare half @llvm.amdgcn.raw.ptr.buffer.load.f16(ptr addrspace(8), i32, i32, i32) #1
733 declare <2 x half> @llvm.amdgcn.raw.ptr.buffer.load.v2f16(ptr addrspace(8), i32, i32, i32) #1
734 declare <3 x half> @llvm.amdgcn.raw.ptr.buffer.load.v3f16(ptr addrspace(8), i32, i32, i32) #1
735 declare <4 x half> @llvm.amdgcn.raw.ptr.buffer.load.v4f16(ptr addrspace(8), i32, i32, i32) #1
737 define amdgpu_ps i8 @extract_elt0_raw_ptr_buffer_load_v2i8(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
738 ; CHECK-LABEL: @extract_elt0_raw_ptr_buffer_load_v2i8(
739 ; CHECK-NEXT: [[DATA:%.*]] = call i8 @llvm.amdgcn.raw.ptr.buffer.load.i8(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
740 ; CHECK-NEXT: ret i8 [[DATA]]
742 %data = call <2 x i8> @llvm.amdgcn.raw.ptr.buffer.load.v2i8(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
743 %elt0 = extractelement <2 x i8> %data, i32 0
747 define amdgpu_ps i8 @extract_elt1_raw_ptr_buffer_load_v2i8(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
748 ; CHECK-LABEL: @extract_elt1_raw_ptr_buffer_load_v2i8(
749 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 1
750 ; CHECK-NEXT: [[DATA:%.*]] = call i8 @llvm.amdgcn.raw.ptr.buffer.load.i8(ptr addrspace(8) [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
751 ; CHECK-NEXT: ret i8 [[DATA]]
753 %data = call <2 x i8> @llvm.amdgcn.raw.ptr.buffer.load.v2i8(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
754 %elt1 = extractelement <2 x i8> %data, i32 1
758 define amdgpu_ps i8 @extract_elt1_raw_ptr_buffer_load_v3i8(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
759 ; CHECK-LABEL: @extract_elt1_raw_ptr_buffer_load_v3i8(
760 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 1
761 ; CHECK-NEXT: [[DATA:%.*]] = call i8 @llvm.amdgcn.raw.ptr.buffer.load.i8(ptr addrspace(8) [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
762 ; CHECK-NEXT: ret i8 [[DATA]]
764 %data = call <3 x i8> @llvm.amdgcn.raw.ptr.buffer.load.v3i8(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
765 %elt0 = extractelement <3 x i8> %data, i32 1
769 define amdgpu_ps i8 @extract_elt1_raw_ptr_buffer_load_v4i8(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
770 ; CHECK-LABEL: @extract_elt1_raw_ptr_buffer_load_v4i8(
771 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 1
772 ; CHECK-NEXT: [[DATA:%.*]] = call i8 @llvm.amdgcn.raw.ptr.buffer.load.i8(ptr addrspace(8) [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
773 ; CHECK-NEXT: ret i8 [[DATA]]
775 %data = call <4 x i8> @llvm.amdgcn.raw.ptr.buffer.load.v4i8(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
776 %elt1 = extractelement <4 x i8> %data, i32 1
780 define amdgpu_ps i8 @extract_elt3_raw_ptr_buffer_load_v4i8(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
781 ; CHECK-LABEL: @extract_elt3_raw_ptr_buffer_load_v4i8(
782 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 3
783 ; CHECK-NEXT: [[DATA:%.*]] = call i8 @llvm.amdgcn.raw.ptr.buffer.load.i8(ptr addrspace(8) [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
784 ; CHECK-NEXT: ret i8 [[DATA]]
786 %data = call <4 x i8> @llvm.amdgcn.raw.ptr.buffer.load.v4i8(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
787 %elt1 = extractelement <4 x i8> %data, i32 3
791 define amdgpu_ps <2 x i8> @extract_elt0_elt1_raw_ptr_buffer_load_v4i8(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
792 ; CHECK-LABEL: @extract_elt0_elt1_raw_ptr_buffer_load_v4i8(
793 ; CHECK-NEXT: [[DATA:%.*]] = call <2 x i8> @llvm.amdgcn.raw.ptr.buffer.load.v2i8(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
794 ; CHECK-NEXT: ret <2 x i8> [[DATA]]
796 %data = call <4 x i8> @llvm.amdgcn.raw.ptr.buffer.load.v4i8(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
797 %shuf = shufflevector <4 x i8> %data, <4 x i8> poison, <2 x i32> <i32 0, i32 1>
801 declare i8 @llvm.amdgcn.raw.ptr.buffer.load.i8(ptr addrspace(8), i32, i32, i32) #1
802 declare <2 x i8> @llvm.amdgcn.raw.ptr.buffer.load.v2i8(ptr addrspace(8), i32, i32, i32) #1
803 declare <3 x i8> @llvm.amdgcn.raw.ptr.buffer.load.v3i8(ptr addrspace(8), i32, i32, i32) #1
804 declare <4 x i8> @llvm.amdgcn.raw.ptr.buffer.load.v4i8(ptr addrspace(8), i32, i32, i32) #1
806 ; --------------------------------------------------------------------
807 ; llvm.amdgcn.s.buffer.load
808 ; --------------------------------------------------------------------
810 define amdgpu_ps float @s_buffer_load_f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
811 ; CHECK-LABEL: @s_buffer_load_f32(
812 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 0)
813 ; CHECK-NEXT: ret float [[DATA]]
815 %data = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 0)
819 define amdgpu_ps <2 x float> @s_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
820 ; CHECK-LABEL: @s_buffer_load_v2f32(
821 ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.s.buffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 0)
822 ; CHECK-NEXT: ret <2 x float> [[DATA]]
824 %data = call <2 x float> @llvm.amdgcn.s.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 0)
825 ret <2 x float> %data
828 define amdgpu_ps <4 x float> @s_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
829 ; CHECK-LABEL: @s_buffer_load_v4f32(
830 ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 0)
831 ; CHECK-NEXT: ret <4 x float> [[DATA]]
833 %data = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 0)
834 ret <4 x float> %data
837 define amdgpu_ps float @extract_elt0_s_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
838 ; CHECK-LABEL: @extract_elt0_s_buffer_load_v2f32(
839 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 0)
840 ; CHECK-NEXT: ret float [[DATA]]
842 %data = call <2 x float> @llvm.amdgcn.s.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 0)
843 %elt0 = extractelement <2 x float> %data, i32 0
847 define amdgpu_ps float @extract_elt1_s_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
848 ; CHECK-LABEL: @extract_elt1_s_buffer_load_v2f32(
849 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 4
850 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 0)
851 ; CHECK-NEXT: ret float [[DATA]]
853 %data = call <2 x float> @llvm.amdgcn.s.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 0)
854 %elt1 = extractelement <2 x float> %data, i32 1
858 define amdgpu_ps float @extract_elt0_s_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
859 ; CHECK-LABEL: @extract_elt0_s_buffer_load_v4f32(
860 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 0)
861 ; CHECK-NEXT: ret float [[DATA]]
863 %data = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 0)
864 %elt0 = extractelement <4 x float> %data, i32 0
868 define amdgpu_ps float @extract_elt1_s_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
869 ; CHECK-LABEL: @extract_elt1_s_buffer_load_v4f32(
870 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 4
871 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 0)
872 ; CHECK-NEXT: ret float [[DATA]]
874 %data = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 0)
875 %elt1 = extractelement <4 x float> %data, i32 1
879 define amdgpu_ps float @extract_elt2_s_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
880 ; CHECK-LABEL: @extract_elt2_s_buffer_load_v4f32(
881 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 8
882 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 0)
883 ; CHECK-NEXT: ret float [[DATA]]
885 %data = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 0)
886 %elt1 = extractelement <4 x float> %data, i32 2
890 define amdgpu_ps float @extract_elt3_s_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
891 ; CHECK-LABEL: @extract_elt3_s_buffer_load_v4f32(
892 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 12
893 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 0)
894 ; CHECK-NEXT: ret float [[DATA]]
896 %data = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 0)
897 %elt1 = extractelement <4 x float> %data, i32 3
901 define amdgpu_ps <2 x float> @extract_elt0_elt1_s_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
902 ; CHECK-LABEL: @extract_elt0_elt1_s_buffer_load_v4f32(
903 ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.s.buffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 0)
904 ; CHECK-NEXT: ret <2 x float> [[DATA]]
906 %data = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 0)
907 %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 0, i32 1>
908 ret <2 x float> %shuf
911 define amdgpu_ps <2 x float> @extract_elt1_elt2_s_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
912 ; CHECK-LABEL: @extract_elt1_elt2_s_buffer_load_v4f32(
913 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 4
914 ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.s.buffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 0)
915 ; CHECK-NEXT: ret <2 x float> [[DATA]]
917 %data = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 0)
918 %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 1, i32 2>
919 ret <2 x float> %shuf
922 define amdgpu_ps <2 x float> @extract_elt2_elt3_s_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
923 ; CHECK-LABEL: @extract_elt2_elt3_s_buffer_load_v4f32(
924 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 8
925 ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.s.buffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 0)
926 ; CHECK-NEXT: ret <2 x float> [[DATA]]
928 %data = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 0)
929 %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 2, i32 3>
930 ret <2 x float> %shuf
933 define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_s_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
934 ; CHECK-LABEL: @extract_elt0_elt1_elt2_s_buffer_load_v4f32(
935 ; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.s.buffer.load.v3f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 0)
936 ; CHECK-NEXT: ret <3 x float> [[DATA]]
938 %data = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 0)
939 %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
940 ret <3 x float> %shuf
943 define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_s_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
944 ; CHECK-LABEL: @extract_elt0_elt2_elt3_s_buffer_load_v4f32(
945 ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 0)
946 ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3>
947 ; CHECK-NEXT: ret <3 x float> [[SHUF]]
949 %data = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 0)
950 %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3>
951 ret <3 x float> %shuf
954 define amdgpu_ps float @extract_elt0_s_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
955 ; CHECK-LABEL: @extract_elt0_s_buffer_load_v3f32(
956 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 0)
957 ; CHECK-NEXT: ret float [[DATA]]
959 %data = call <3 x float> @llvm.amdgcn.s.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 0)
960 %elt0 = extractelement <3 x float> %data, i32 0
964 define amdgpu_ps float @extract_elt1_s_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
965 ; CHECK-LABEL: @extract_elt1_s_buffer_load_v3f32(
966 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 4
967 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 0)
968 ; CHECK-NEXT: ret float [[DATA]]
970 %data = call <3 x float> @llvm.amdgcn.s.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 0)
971 %elt1 = extractelement <3 x float> %data, i32 1
975 define amdgpu_ps float @extract_elt2_s_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
976 ; CHECK-LABEL: @extract_elt2_s_buffer_load_v3f32(
977 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 8
978 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 0)
979 ; CHECK-NEXT: ret float [[DATA]]
981 %data = call <3 x float> @llvm.amdgcn.s.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 0)
982 %elt1 = extractelement <3 x float> %data, i32 2
986 define amdgpu_ps <2 x float> @extract_elt0_elt1_s_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
987 ; CHECK-LABEL: @extract_elt0_elt1_s_buffer_load_v3f32(
988 ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.s.buffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 0)
989 ; CHECK-NEXT: ret <2 x float> [[DATA]]
991 %data = call <3 x float> @llvm.amdgcn.s.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 0)
992 %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 0, i32 1>
993 ret <2 x float> %shuf
996 define amdgpu_ps <2 x float> @extract_elt1_elt2_s_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
997 ; CHECK-LABEL: @extract_elt1_elt2_s_buffer_load_v3f32(
998 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 4
999 ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.s.buffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 0)
1000 ; CHECK-NEXT: ret <2 x float> [[DATA]]
1002 %data = call <3 x float> @llvm.amdgcn.s.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 0)
1003 %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 1, i32 2>
1004 ret <2 x float> %shuf
1007 ; Do not trim to vec3 s_buffer_load in instcombine, as the load will most likely be widened
1008 ; to vec4 anyway during lowering.
1009 define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_s_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
1010 ; CHECK-LABEL: @extract_elt1_elt2_elt3_s_buffer_load_v4f32(
1011 ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 0)
1012 ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <3 x i32> <i32 1, i32 2, i32 3>
1013 ; CHECK-NEXT: ret <3 x float> [[SHUF]]
1015 %data = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 0)
1016 %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 1, i32 2, i32 3>
1017 ret <3 x float> %shuf
1020 define i32 @extract0_bitcast_s_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
1021 ; CHECK-LABEL: @extract0_bitcast_s_buffer_load_v4f32(
1022 ; CHECK-NEXT: [[VAR:%.*]] = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 0)
1023 ; CHECK-NEXT: [[VAR2:%.*]] = bitcast float [[VAR]] to i32
1024 ; CHECK-NEXT: ret i32 [[VAR2]]
1026 %var = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 0)
1027 %var1 = bitcast <4 x float> %var to <4 x i32>
1028 %var2 = extractelement <4 x i32> %var1, i32 0
1032 define float @extract0_bitcast_s_buffer_load_v4i32(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
1033 ; CHECK-LABEL: @extract0_bitcast_s_buffer_load_v4i32(
1034 ; CHECK-NEXT: [[VAR:%.*]] = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 0)
1035 ; CHECK-NEXT: [[VAR2:%.*]] = bitcast i32 [[VAR]] to float
1036 ; CHECK-NEXT: ret float [[VAR2]]
1038 %var = call <4 x i32> @llvm.amdgcn.s.buffer.load.v4i32(<4 x i32> %rsrc, i32 %ofs, i32 0)
1039 %var1 = bitcast <4 x i32> %var to <4 x float>
1040 %var2 = extractelement <4 x float> %var1, i32 0
1044 define amdgpu_ps float @preserve_metadata_extract_elt0_s_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
1045 ; CHECK-LABEL: @preserve_metadata_extract_elt0_s_buffer_load_v2f32(
1046 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 0), !fpmath [[META0]]
1047 ; CHECK-NEXT: ret float [[DATA]]
1049 %data = call <2 x float> @llvm.amdgcn.s.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 0), !fpmath !0
1050 %elt0 = extractelement <2 x float> %data, i32 0
1054 declare float @llvm.amdgcn.s.buffer.load.f32(<4 x i32>, i32, i32) #1
1055 declare <2 x float> @llvm.amdgcn.s.buffer.load.v2f32(<4 x i32>, i32, i32) #1
1056 declare <3 x float> @llvm.amdgcn.s.buffer.load.v3f32(<4 x i32>, i32, i32) #1
1057 declare <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32>, i32, i32) #1
1058 declare <4 x i32> @llvm.amdgcn.s.buffer.load.v4i32(<4 x i32>, i32, i32) #1
1060 define amdgpu_ps half @extract_elt0_s_buffer_load_v2f16(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
1061 ; CHECK-LABEL: @extract_elt0_s_buffer_load_v2f16(
1062 ; CHECK-NEXT: [[DATA:%.*]] = call half @llvm.amdgcn.s.buffer.load.f16(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 0)
1063 ; CHECK-NEXT: ret half [[DATA]]
1065 %data = call <2 x half> @llvm.amdgcn.s.buffer.load.v2f16(<4 x i32> %rsrc, i32 %ofs, i32 0)
1066 %elt0 = extractelement <2 x half> %data, i32 0
1070 define amdgpu_ps half @extract_elt1_s_buffer_load_v2f16(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
1071 ; CHECK-LABEL: @extract_elt1_s_buffer_load_v2f16(
1072 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 2
1073 ; CHECK-NEXT: [[DATA:%.*]] = call half @llvm.amdgcn.s.buffer.load.f16(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 0)
1074 ; CHECK-NEXT: ret half [[DATA]]
1076 %data = call <2 x half> @llvm.amdgcn.s.buffer.load.v2f16(<4 x i32> %rsrc, i32 %ofs, i32 0)
1077 %elt1 = extractelement <2 x half> %data, i32 1
1081 define amdgpu_ps half @extract_elt1_s_buffer_load_v3f16(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
1082 ; CHECK-LABEL: @extract_elt1_s_buffer_load_v3f16(
1083 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 2
1084 ; CHECK-NEXT: [[DATA:%.*]] = call half @llvm.amdgcn.s.buffer.load.f16(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 0)
1085 ; CHECK-NEXT: ret half [[DATA]]
1087 %data = call <3 x half> @llvm.amdgcn.s.buffer.load.v3f16(<4 x i32> %rsrc, i32 %ofs, i32 0)
1088 %elt1 = extractelement <3 x half> %data, i32 1
1092 define amdgpu_ps half @extract_elt1_s_buffer_load_v4f16(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
1093 ; CHECK-LABEL: @extract_elt1_s_buffer_load_v4f16(
1094 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 2
1095 ; CHECK-NEXT: [[DATA:%.*]] = call half @llvm.amdgcn.s.buffer.load.f16(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 0)
1096 ; CHECK-NEXT: ret half [[DATA]]
1098 %data = call <4 x half> @llvm.amdgcn.s.buffer.load.v4f16(<4 x i32> %rsrc, i32 %ofs, i32 0)
1099 %elt1 = extractelement <4 x half> %data, i32 1
1104 define amdgpu_ps half @extract_elt3_s_buffer_load_v4f16(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
1105 ; CHECK-LABEL: @extract_elt3_s_buffer_load_v4f16(
1106 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 6
1107 ; CHECK-NEXT: [[DATA:%.*]] = call half @llvm.amdgcn.s.buffer.load.f16(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 0)
1108 ; CHECK-NEXT: ret half [[DATA]]
1110 %data = call <4 x half> @llvm.amdgcn.s.buffer.load.v4f16(<4 x i32> %rsrc, i32 %ofs, i32 0)
1111 %elt1 = extractelement <4 x half> %data, i32 3
1115 define amdgpu_ps <2 x half> @extract_elt0_elt1_s_buffer_load_v4f16(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
1116 ; CHECK-LABEL: @extract_elt0_elt1_s_buffer_load_v4f16(
1117 ; CHECK-NEXT: [[DATA:%.*]] = call <2 x half> @llvm.amdgcn.s.buffer.load.v2f16(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 0)
1118 ; CHECK-NEXT: ret <2 x half> [[DATA]]
1120 %data = call <4 x half> @llvm.amdgcn.s.buffer.load.v4f16(<4 x i32> %rsrc, i32 %ofs, i32 0)
1121 %shuf = shufflevector <4 x half> %data, <4 x half> poison, <2 x i32> <i32 0, i32 1>
1122 ret <2 x half> %shuf
1125 declare half @llvm.amdgcn.s.buffer.load.f16(<4 x i32>, i32, i32) #1
1126 declare <2 x half> @llvm.amdgcn.s.buffer.load.v2f16(<4 x i32>, i32, i32) #1
1127 declare <3 x half> @llvm.amdgcn.s.buffer.load.v3f16(<4 x i32>, i32, i32) #1
1128 declare <4 x half> @llvm.amdgcn.s.buffer.load.v4f16(<4 x i32>, i32, i32) #1
1130 define amdgpu_ps i8 @extract_elt0_s_buffer_load_v2i8(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
1131 ; CHECK-LABEL: @extract_elt0_s_buffer_load_v2i8(
1132 ; CHECK-NEXT: [[DATA:%.*]] = call i8 @llvm.amdgcn.s.buffer.load.i8(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 0)
1133 ; CHECK-NEXT: ret i8 [[DATA]]
1135 %data = call <2 x i8> @llvm.amdgcn.s.buffer.load.v2i8(<4 x i32> %rsrc, i32 %ofs, i32 0)
1136 %elt0 = extractelement <2 x i8> %data, i32 0
1140 define amdgpu_ps i8 @extract_elt1_s_buffer_load_v2i8(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
1141 ; CHECK-LABEL: @extract_elt1_s_buffer_load_v2i8(
1142 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 1
1143 ; CHECK-NEXT: [[DATA:%.*]] = call i8 @llvm.amdgcn.s.buffer.load.i8(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 0)
1144 ; CHECK-NEXT: ret i8 [[DATA]]
1146 %data = call <2 x i8> @llvm.amdgcn.s.buffer.load.v2i8(<4 x i32> %rsrc, i32 %ofs, i32 0)
1147 %elt1 = extractelement <2 x i8> %data, i32 1
1151 define amdgpu_ps i8 @extract_elt1_s_buffer_load_v3i8(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
1152 ; CHECK-LABEL: @extract_elt1_s_buffer_load_v3i8(
1153 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 1
1154 ; CHECK-NEXT: [[DATA:%.*]] = call i8 @llvm.amdgcn.s.buffer.load.i8(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 0)
1155 ; CHECK-NEXT: ret i8 [[DATA]]
1157 %data = call <3 x i8> @llvm.amdgcn.s.buffer.load.v3i8(<4 x i32> %rsrc, i32 %ofs, i32 0)
1158 %elt1 = extractelement <3 x i8> %data, i32 1
1162 define amdgpu_ps i8 @extract_elt1_s_buffer_load_v4i8(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
1163 ; CHECK-LABEL: @extract_elt1_s_buffer_load_v4i8(
1164 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 1
1165 ; CHECK-NEXT: [[DATA:%.*]] = call i8 @llvm.amdgcn.s.buffer.load.i8(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 0)
1166 ; CHECK-NEXT: ret i8 [[DATA]]
1168 %data = call <4 x i8> @llvm.amdgcn.s.buffer.load.v4i8(<4 x i32> %rsrc, i32 %ofs, i32 0)
1169 %elt1 = extractelement <4 x i8> %data, i32 1
1173 define amdgpu_ps i8 @extract_elt3_s_buffer_load_v4i8(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
1174 ; CHECK-LABEL: @extract_elt3_s_buffer_load_v4i8(
1175 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 3
1176 ; CHECK-NEXT: [[DATA:%.*]] = call i8 @llvm.amdgcn.s.buffer.load.i8(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 0)
1177 ; CHECK-NEXT: ret i8 [[DATA]]
1179 %data = call <4 x i8> @llvm.amdgcn.s.buffer.load.v4i8(<4 x i32> %rsrc, i32 %ofs, i32 0)
1180 %elt1 = extractelement <4 x i8> %data, i32 3
1184 define amdgpu_ps <2 x i8> @extract_elt0_elt1_s_buffer_load_v4i8(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
1185 ; CHECK-LABEL: @extract_elt0_elt1_s_buffer_load_v4i8(
1186 ; CHECK-NEXT: [[DATA:%.*]] = call <2 x i8> @llvm.amdgcn.s.buffer.load.v2i8(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 0)
1187 ; CHECK-NEXT: ret <2 x i8> [[DATA]]
1189 %data = call <4 x i8> @llvm.amdgcn.s.buffer.load.v4i8(<4 x i32> %rsrc, i32 %ofs, i32 0)
1190 %shuf = shufflevector <4 x i8> %data, <4 x i8> poison, <2 x i32> <i32 0, i32 1>
1194 declare i8 @llvm.amdgcn.s.buffer.load.i8(<4 x i32>, i32, i32) #1
1195 declare <2 x i8> @llvm.amdgcn.s.buffer.load.v2i8(<4 x i32>, i32, i32) #1
1196 declare <3 x i8> @llvm.amdgcn.s.buffer.load.v3i8(<4 x i32>, i32, i32) #1
1197 declare <4 x i8> @llvm.amdgcn.s.buffer.load.v4i8(<4 x i32>, i32, i32) #1
1199 ; --------------------------------------------------------------------
1200 ; llvm.amdgcn.raw.buffer.load.format
1201 ; --------------------------------------------------------------------
1203 define amdgpu_ps float @raw_buffer_load_format_f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1204 ; CHECK-LABEL: @raw_buffer_load_format_f32(
1205 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1206 ; CHECK-NEXT: ret float [[DATA]]
1208 %data = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
1212 define amdgpu_ps <1 x float> @raw_buffer_load_format_v1f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1213 ; CHECK-LABEL: @raw_buffer_load_format_v1f32(
1214 ; CHECK-NEXT: [[DATA:%.*]] = call <1 x float> @llvm.amdgcn.raw.buffer.load.format.v1f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1215 ; CHECK-NEXT: ret <1 x float> [[DATA]]
1217 %data = call <1 x float> @llvm.amdgcn.raw.buffer.load.format.v1f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
1218 ret <1 x float> %data
1221 define amdgpu_ps <2 x float> @raw_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1222 ; CHECK-LABEL: @raw_buffer_load_format_v2f32(
1223 ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1224 ; CHECK-NEXT: ret <2 x float> [[DATA]]
1226 %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
1227 ret <2 x float> %data
1230 define amdgpu_ps <4 x float> @raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1231 ; CHECK-LABEL: @raw_buffer_load_format_v4f32(
1232 ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1233 ; CHECK-NEXT: ret <4 x float> [[DATA]]
1235 %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
1236 ret <4 x float> %data
1239 define amdgpu_ps float @extract_elt0_raw_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1240 ; CHECK-LABEL: @extract_elt0_raw_buffer_load_format_v2f32(
1241 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1242 ; CHECK-NEXT: ret float [[DATA]]
1244 %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
1245 %elt0 = extractelement <2 x float> %data, i32 0
1249 define amdgpu_ps float @extract_elt1_raw_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1250 ; CHECK-LABEL: @extract_elt1_raw_buffer_load_format_v2f32(
1251 ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1252 ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <2 x float> [[DATA]], i64 1
1253 ; CHECK-NEXT: ret float [[ELT1]]
1255 %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
1256 %elt1 = extractelement <2 x float> %data, i32 1
1260 define amdgpu_ps float @extract_elt0_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1261 ; CHECK-LABEL: @extract_elt0_raw_buffer_load_format_v4f32(
1262 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1263 ; CHECK-NEXT: ret float [[DATA]]
1265 %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
1266 %elt0 = extractelement <4 x float> %data, i32 0
1270 define amdgpu_ps float @extract_elt1_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1271 ; CHECK-LABEL: @extract_elt1_raw_buffer_load_format_v4f32(
1272 ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1273 ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <2 x float> [[DATA]], i64 1
1274 ; CHECK-NEXT: ret float [[ELT1]]
1276 %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
1277 %elt1 = extractelement <4 x float> %data, i32 1
1281 define amdgpu_ps float @extract_elt2_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1282 ; CHECK-LABEL: @extract_elt2_raw_buffer_load_format_v4f32(
1283 ; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1284 ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <3 x float> [[DATA]], i64 2
1285 ; CHECK-NEXT: ret float [[ELT1]]
1287 %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
1288 %elt1 = extractelement <4 x float> %data, i32 2
1292 define amdgpu_ps float @extract_elt3_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1293 ; CHECK-LABEL: @extract_elt3_raw_buffer_load_format_v4f32(
1294 ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1295 ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <4 x float> [[DATA]], i64 3
1296 ; CHECK-NEXT: ret float [[ELT1]]
1298 %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
1299 %elt1 = extractelement <4 x float> %data, i32 3
1303 define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1304 ; CHECK-LABEL: @extract_elt0_elt1_raw_buffer_load_format_v4f32(
1305 ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1306 ; CHECK-NEXT: ret <2 x float> [[DATA]]
1308 %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
1309 %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 0, i32 1>
1310 ret <2 x float> %shuf
1313 define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1314 ; CHECK-LABEL: @extract_elt1_elt2_raw_buffer_load_format_v4f32(
1315 ; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1316 ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <3 x float> [[DATA]], <3 x float> poison, <2 x i32> <i32 1, i32 2>
1317 ; CHECK-NEXT: ret <2 x float> [[SHUF]]
1319 %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
1320 %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 1, i32 2>
1321 ret <2 x float> %shuf
1324 define amdgpu_ps <2 x float> @extract_elt2_elt3_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1325 ; CHECK-LABEL: @extract_elt2_elt3_raw_buffer_load_format_v4f32(
1326 ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1327 ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
1328 ; CHECK-NEXT: ret <2 x float> [[SHUF]]
1330 %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
1331 %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 2, i32 3>
1332 ret <2 x float> %shuf
1335 define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1336 ; CHECK-LABEL: @extract_elt0_elt1_elt2_raw_buffer_load_format_v4f32(
1337 ; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1338 ; CHECK-NEXT: ret <3 x float> [[DATA]]
1340 %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
1341 %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
1342 ret <3 x float> %shuf
1345 define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1346 ; CHECK-LABEL: @extract_elt1_elt2_elt3_raw_buffer_load_format_v4f32(
1347 ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1348 ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <3 x i32> <i32 1, i32 2, i32 3>
1349 ; CHECK-NEXT: ret <3 x float> [[SHUF]]
1351 %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
1352 %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 1, i32 2, i32 3>
1353 ret <3 x float> %shuf
1356 define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1357 ; CHECK-LABEL: @extract_elt0_elt2_elt3_raw_buffer_load_format_v4f32(
1358 ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1359 ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3>
1360 ; CHECK-NEXT: ret <3 x float> [[SHUF]]
1362 %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
1363 %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3>
1364 ret <3 x float> %shuf
1367 define amdgpu_ps float @extract_elt0_raw_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1368 ; CHECK-LABEL: @extract_elt0_raw_buffer_load_format_v3f32(
1369 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1370 ; CHECK-NEXT: ret float [[DATA]]
1372 %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
1373 %elt0 = extractelement <3 x float> %data, i32 0
1377 define amdgpu_ps float @extract_elt1_raw_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1378 ; CHECK-LABEL: @extract_elt1_raw_buffer_load_format_v3f32(
1379 ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1380 ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <2 x float> [[DATA]], i64 1
1381 ; CHECK-NEXT: ret float [[ELT1]]
1383 %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
1384 %elt1 = extractelement <3 x float> %data, i32 1
1388 define amdgpu_ps float @extract_elt2_raw_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1389 ; CHECK-LABEL: @extract_elt2_raw_buffer_load_format_v3f32(
1390 ; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1391 ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <3 x float> [[DATA]], i64 2
1392 ; CHECK-NEXT: ret float [[ELT1]]
1394 %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
1395 %elt1 = extractelement <3 x float> %data, i32 2
1399 define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1400 ; CHECK-LABEL: @extract_elt0_elt1_raw_buffer_load_format_v3f32(
1401 ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1402 ; CHECK-NEXT: ret <2 x float> [[DATA]]
1404 %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
1405 %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 0, i32 1>
1406 ret <2 x float> %shuf
1409 define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1410 ; CHECK-LABEL: @extract_elt1_elt2_raw_buffer_load_format_v3f32(
1411 ; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1412 ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <3 x float> [[DATA]], <3 x float> poison, <2 x i32> <i32 1, i32 2>
1413 ; CHECK-NEXT: ret <2 x float> [[SHUF]]
1415 %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
1416 %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 1, i32 2>
1417 ret <2 x float> %shuf
1420 define i32 @extract0_bitcast_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1421 ; CHECK-LABEL: @extract0_bitcast_raw_buffer_load_format_v4f32(
1422 ; CHECK-NEXT: [[VAR:%.*]] = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1423 ; CHECK-NEXT: [[VAR2:%.*]] = bitcast float [[VAR]] to i32
1424 ; CHECK-NEXT: ret i32 [[VAR2]]
1426 %var = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
1427 %var1 = bitcast <4 x float> %var to <4 x i32>
1428 %var2 = extractelement <4 x i32> %var1, i32 0
1432 define float @extract0_bitcast_raw_buffer_load_format_v4i32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1433 ; CHECK-LABEL: @extract0_bitcast_raw_buffer_load_format_v4i32(
1434 ; CHECK-NEXT: [[VAR:%.*]] = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1435 ; CHECK-NEXT: ret float [[VAR]]
1437 %var = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
1438 %var1 = extractelement <4 x float> %var, i32 0
1442 define amdgpu_ps float @preserve_metadata_extract_elt0_raw_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1443 ; CHECK-LABEL: @preserve_metadata_extract_elt0_raw_buffer_load_format_v2f32(
1444 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0), !fpmath [[META0]]
1445 ; CHECK-NEXT: ret float [[DATA]]
1447 %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0), !fpmath !0
1448 %elt0 = extractelement <2 x float> %data, i32 0
1452 declare float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32>, i32, i32, i32) #1
1453 declare <1 x float> @llvm.amdgcn.raw.buffer.load.format.v1f32(<4 x i32>, i32, i32, i32) #1
1454 declare <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32>, i32, i32, i32) #1
1455 declare <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32>, i32, i32, i32) #1
1456 declare <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32>, i32, i32, i32) #1
1458 ; --------------------------------------------------------------------
1459 ; llvm.amdgcn.raw.ptr.buffer.load.format
1460 ; --------------------------------------------------------------------
1462 define amdgpu_ps float @raw_ptr_buffer_load_format_f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1463 ; CHECK-LABEL: @raw_ptr_buffer_load_format_f32(
1464 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.format.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1465 ; CHECK-NEXT: ret float [[DATA]]
1467 %data = call float @llvm.amdgcn.raw.ptr.buffer.load.format.f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
1471 define amdgpu_ps <1 x float> @raw_ptr_buffer_load_format_v1f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1472 ; CHECK-LABEL: @raw_ptr_buffer_load_format_v1f32(
1473 ; CHECK-NEXT: [[DATA:%.*]] = call <1 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v1f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1474 ; CHECK-NEXT: ret <1 x float> [[DATA]]
1476 %data = call <1 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v1f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
1477 ret <1 x float> %data
1480 define amdgpu_ps <2 x float> @raw_ptr_buffer_load_format_v2f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1481 ; CHECK-LABEL: @raw_ptr_buffer_load_format_v2f32(
1482 ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1483 ; CHECK-NEXT: ret <2 x float> [[DATA]]
1485 %data = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v2f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
1486 ret <2 x float> %data
1489 define amdgpu_ps <4 x float> @raw_ptr_buffer_load_format_v4f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1490 ; CHECK-LABEL: @raw_ptr_buffer_load_format_v4f32(
1491 ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1492 ; CHECK-NEXT: ret <4 x float> [[DATA]]
1494 %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
1495 ret <4 x float> %data
1498 define amdgpu_ps float @extract_elt0_raw_ptr_buffer_load_format_v2f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1499 ; CHECK-LABEL: @extract_elt0_raw_ptr_buffer_load_format_v2f32(
1500 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.format.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1501 ; CHECK-NEXT: ret float [[DATA]]
1503 %data = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v2f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
1504 %elt0 = extractelement <2 x float> %data, i32 0
1508 define amdgpu_ps float @extract_elt1_raw_ptr_buffer_load_format_v2f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1509 ; CHECK-LABEL: @extract_elt1_raw_ptr_buffer_load_format_v2f32(
1510 ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1511 ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <2 x float> [[DATA]], i64 1
1512 ; CHECK-NEXT: ret float [[ELT1]]
1514 %data = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v2f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
1515 %elt1 = extractelement <2 x float> %data, i32 1
1519 define amdgpu_ps float @extract_elt0_raw_ptr_buffer_load_format_v4f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1520 ; CHECK-LABEL: @extract_elt0_raw_ptr_buffer_load_format_v4f32(
1521 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.format.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1522 ; CHECK-NEXT: ret float [[DATA]]
1524 %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
1525 %elt0 = extractelement <4 x float> %data, i32 0
1529 define amdgpu_ps float @extract_elt1_raw_ptr_buffer_load_format_v4f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1530 ; CHECK-LABEL: @extract_elt1_raw_ptr_buffer_load_format_v4f32(
1531 ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1532 ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <2 x float> [[DATA]], i64 1
1533 ; CHECK-NEXT: ret float [[ELT1]]
1535 %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
1536 %elt1 = extractelement <4 x float> %data, i32 1
1540 define amdgpu_ps float @extract_elt2_raw_ptr_buffer_load_format_v4f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1541 ; CHECK-LABEL: @extract_elt2_raw_ptr_buffer_load_format_v4f32(
1542 ; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v3f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1543 ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <3 x float> [[DATA]], i64 2
1544 ; CHECK-NEXT: ret float [[ELT1]]
1546 %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
1547 %elt1 = extractelement <4 x float> %data, i32 2
1551 define amdgpu_ps float @extract_elt3_raw_ptr_buffer_load_format_v4f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1552 ; CHECK-LABEL: @extract_elt3_raw_ptr_buffer_load_format_v4f32(
1553 ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1554 ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <4 x float> [[DATA]], i64 3
1555 ; CHECK-NEXT: ret float [[ELT1]]
1557 %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
1558 %elt1 = extractelement <4 x float> %data, i32 3
1562 define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_ptr_buffer_load_format_v4f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1563 ; CHECK-LABEL: @extract_elt0_elt1_raw_ptr_buffer_load_format_v4f32(
1564 ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1565 ; CHECK-NEXT: ret <2 x float> [[DATA]]
1567 %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
1568 %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 0, i32 1>
1569 ret <2 x float> %shuf
1572 define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_ptr_buffer_load_format_v4f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1573 ; CHECK-LABEL: @extract_elt1_elt2_raw_ptr_buffer_load_format_v4f32(
1574 ; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v3f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1575 ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <3 x float> [[DATA]], <3 x float> poison, <2 x i32> <i32 1, i32 2>
1576 ; CHECK-NEXT: ret <2 x float> [[SHUF]]
1578 %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
1579 %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 1, i32 2>
1580 ret <2 x float> %shuf
1583 define amdgpu_ps <2 x float> @extract_elt2_elt3_raw_ptr_buffer_load_format_v4f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1584 ; CHECK-LABEL: @extract_elt2_elt3_raw_ptr_buffer_load_format_v4f32(
1585 ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1586 ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
1587 ; CHECK-NEXT: ret <2 x float> [[SHUF]]
1589 %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
1590 %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 2, i32 3>
1591 ret <2 x float> %shuf
1594 define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_raw_ptr_buffer_load_format_v4f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1595 ; CHECK-LABEL: @extract_elt0_elt1_elt2_raw_ptr_buffer_load_format_v4f32(
1596 ; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v3f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1597 ; CHECK-NEXT: ret <3 x float> [[DATA]]
1599 %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
1600 %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
1601 ret <3 x float> %shuf
1604 define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_raw_ptr_buffer_load_format_v4f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1605 ; CHECK-LABEL: @extract_elt1_elt2_elt3_raw_ptr_buffer_load_format_v4f32(
1606 ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1607 ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <3 x i32> <i32 1, i32 2, i32 3>
1608 ; CHECK-NEXT: ret <3 x float> [[SHUF]]
1610 %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
1611 %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 1, i32 2, i32 3>
1612 ret <3 x float> %shuf
1615 define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_raw_ptr_buffer_load_format_v4f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1616 ; CHECK-LABEL: @extract_elt0_elt2_elt3_raw_ptr_buffer_load_format_v4f32(
1617 ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1618 ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3>
1619 ; CHECK-NEXT: ret <3 x float> [[SHUF]]
1621 %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
1622 %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3>
1623 ret <3 x float> %shuf
1626 define amdgpu_ps float @extract_elt0_raw_ptr_buffer_load_format_v3f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1627 ; CHECK-LABEL: @extract_elt0_raw_ptr_buffer_load_format_v3f32(
1628 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.format.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1629 ; CHECK-NEXT: ret float [[DATA]]
1631 %data = call <3 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v3f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
1632 %elt0 = extractelement <3 x float> %data, i32 0
1636 define amdgpu_ps float @extract_elt1_raw_ptr_buffer_load_format_v3f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1637 ; CHECK-LABEL: @extract_elt1_raw_ptr_buffer_load_format_v3f32(
1638 ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1639 ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <2 x float> [[DATA]], i64 1
1640 ; CHECK-NEXT: ret float [[ELT1]]
1642 %data = call <3 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v3f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
1643 %elt1 = extractelement <3 x float> %data, i32 1
1647 define amdgpu_ps float @extract_elt2_raw_ptr_buffer_load_format_v3f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1648 ; CHECK-LABEL: @extract_elt2_raw_ptr_buffer_load_format_v3f32(
1649 ; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v3f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1650 ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <3 x float> [[DATA]], i64 2
1651 ; CHECK-NEXT: ret float [[ELT1]]
1653 %data = call <3 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v3f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
1654 %elt1 = extractelement <3 x float> %data, i32 2
1658 define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_ptr_buffer_load_format_v3f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1659 ; CHECK-LABEL: @extract_elt0_elt1_raw_ptr_buffer_load_format_v3f32(
1660 ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1661 ; CHECK-NEXT: ret <2 x float> [[DATA]]
1663 %data = call <3 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v3f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
1664 %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 0, i32 1>
1665 ret <2 x float> %shuf
1668 define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_ptr_buffer_load_format_v3f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1669 ; CHECK-LABEL: @extract_elt1_elt2_raw_ptr_buffer_load_format_v3f32(
1670 ; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v3f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1671 ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <3 x float> [[DATA]], <3 x float> poison, <2 x i32> <i32 1, i32 2>
1672 ; CHECK-NEXT: ret <2 x float> [[SHUF]]
1674 %data = call <3 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v3f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
1675 %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 1, i32 2>
1676 ret <2 x float> %shuf
1679 define i32 @extract0_bitcast_raw_ptr_buffer_load_format_v4f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1680 ; CHECK-LABEL: @extract0_bitcast_raw_ptr_buffer_load_format_v4f32(
1681 ; CHECK-NEXT: [[VAR:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.format.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1682 ; CHECK-NEXT: [[VAR2:%.*]] = bitcast float [[VAR]] to i32
1683 ; CHECK-NEXT: ret i32 [[VAR2]]
1685 %var = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
1686 %var1 = bitcast <4 x float> %var to <4 x i32>
1687 %var2 = extractelement <4 x i32> %var1, i32 0
1691 define float @extract0_bitcast_raw_ptr_buffer_load_format_v4i32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1692 ; CHECK-LABEL: @extract0_bitcast_raw_ptr_buffer_load_format_v4i32(
1693 ; CHECK-NEXT: [[VAR:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.format.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1694 ; CHECK-NEXT: ret float [[VAR]]
1696 %var = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0)
1697 %var1 = extractelement <4 x float> %var, i32 0
1701 define amdgpu_ps float @preserve_metadata_extract_elt0_raw_ptr_buffer_load_format_v2f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
1702 ; CHECK-LABEL: @preserve_metadata_extract_elt0_raw_ptr_buffer_load_format_v2f32(
1703 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.format.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0), !fpmath [[META0]]
1704 ; CHECK-NEXT: ret float [[DATA]]
1706 %data = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v2f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0), !fpmath !0
1707 %elt0 = extractelement <2 x float> %data, i32 0
1711 declare float @llvm.amdgcn.raw.ptr.buffer.load.format.f32(ptr addrspace(8), i32, i32, i32) #1
1712 declare <1 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v1f32(ptr addrspace(8), i32, i32, i32) #1
1713 declare <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v2f32(ptr addrspace(8), i32, i32, i32) #1
1714 declare <3 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v3f32(ptr addrspace(8), i32, i32, i32) #1
1715 declare <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f32(ptr addrspace(8), i32, i32, i32) #1
1717 ; --------------------------------------------------------------------
1718 ; llvm.amdgcn.struct.buffer.load
1719 ; --------------------------------------------------------------------
1721 define amdgpu_ps float @struct_buffer_load_f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1722 ; CHECK-LABEL: @struct_buffer_load_f32(
1723 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1724 ; CHECK-NEXT: ret float [[DATA]]
1726 %data = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1730 define amdgpu_ps <1 x float> @struct_buffer_load_v1f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1731 ; CHECK-LABEL: @struct_buffer_load_v1f32(
1732 ; CHECK-NEXT: [[DATA:%.*]] = call <1 x float> @llvm.amdgcn.struct.buffer.load.v1f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1733 ; CHECK-NEXT: ret <1 x float> [[DATA]]
1735 %data = call <1 x float> @llvm.amdgcn.struct.buffer.load.v1f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1736 ret <1 x float> %data
1739 define amdgpu_ps <2 x float> @struct_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1740 ; CHECK-LABEL: @struct_buffer_load_v2f32(
1741 ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1742 ; CHECK-NEXT: ret <2 x float> [[DATA]]
1744 %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1745 ret <2 x float> %data
1748 define amdgpu_ps <4 x float> @struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1749 ; CHECK-LABEL: @struct_buffer_load_v4f32(
1750 ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1751 ; CHECK-NEXT: ret <4 x float> [[DATA]]
1753 %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1754 ret <4 x float> %data
1757 define amdgpu_ps float @extract_elt0_struct_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1758 ; CHECK-LABEL: @extract_elt0_struct_buffer_load_v2f32(
1759 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1760 ; CHECK-NEXT: ret float [[DATA]]
1762 %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1763 %elt0 = extractelement <2 x float> %data, i32 0
1767 define amdgpu_ps float @extract_elt1_struct_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1768 ; CHECK-LABEL: @extract_elt1_struct_buffer_load_v2f32(
1769 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 4
1770 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
1771 ; CHECK-NEXT: ret float [[DATA]]
1773 %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1774 %elt1 = extractelement <2 x float> %data, i32 1
1778 define amdgpu_ps float @extract_elt0_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1779 ; CHECK-LABEL: @extract_elt0_struct_buffer_load_v4f32(
1780 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1781 ; CHECK-NEXT: ret float [[DATA]]
1783 %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1784 %elt0 = extractelement <4 x float> %data, i32 0
1788 define amdgpu_ps float @extract_elt1_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1789 ; CHECK-LABEL: @extract_elt1_struct_buffer_load_v4f32(
1790 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 4
1791 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
1792 ; CHECK-NEXT: ret float [[DATA]]
1794 %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1795 %elt1 = extractelement <4 x float> %data, i32 1
1799 define amdgpu_ps float @extract_elt2_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1800 ; CHECK-LABEL: @extract_elt2_struct_buffer_load_v4f32(
1801 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 8
1802 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
1803 ; CHECK-NEXT: ret float [[DATA]]
1805 %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1806 %elt1 = extractelement <4 x float> %data, i32 2
1810 define amdgpu_ps float @extract_elt3_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1811 ; CHECK-LABEL: @extract_elt3_struct_buffer_load_v4f32(
1812 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 12
1813 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
1814 ; CHECK-NEXT: ret float [[DATA]]
1816 %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1817 %elt1 = extractelement <4 x float> %data, i32 3
1821 define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1822 ; CHECK-LABEL: @extract_elt0_elt1_struct_buffer_load_v4f32(
1823 ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1824 ; CHECK-NEXT: ret <2 x float> [[DATA]]
1826 %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1827 %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 0, i32 1>
1828 ret <2 x float> %shuf
1831 define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1832 ; CHECK-LABEL: @extract_elt1_elt2_struct_buffer_load_v4f32(
1833 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 4
1834 ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
1835 ; CHECK-NEXT: ret <2 x float> [[DATA]]
1837 %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1838 %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 1, i32 2>
1839 ret <2 x float> %shuf
1842 define amdgpu_ps <2 x float> @extract_elt2_elt3_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1843 ; CHECK-LABEL: @extract_elt2_elt3_struct_buffer_load_v4f32(
1844 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 8
1845 ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
1846 ; CHECK-NEXT: ret <2 x float> [[DATA]]
1848 %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1849 %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 2, i32 3>
1850 ret <2 x float> %shuf
1853 define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1854 ; CHECK-LABEL: @extract_elt0_elt1_elt2_struct_buffer_load_v4f32(
1855 ; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1856 ; CHECK-NEXT: ret <3 x float> [[DATA]]
1858 %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1859 %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
1860 ret <3 x float> %shuf
1863 define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1864 ; CHECK-LABEL: @extract_elt1_elt2_elt3_struct_buffer_load_v4f32(
1865 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 4
1866 ; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
1867 ; CHECK-NEXT: ret <3 x float> [[DATA]]
1869 %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1870 %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 1, i32 2, i32 3>
1871 ret <3 x float> %shuf
1874 define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1875 ; CHECK-LABEL: @extract_elt0_elt2_elt3_struct_buffer_load_v4f32(
1876 ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1877 ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3>
1878 ; CHECK-NEXT: ret <3 x float> [[SHUF]]
1880 %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1881 %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3>
1882 ret <3 x float> %shuf
1885 define amdgpu_ps float @extract_elt0_struct_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1886 ; CHECK-LABEL: @extract_elt0_struct_buffer_load_v3f32(
1887 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1888 ; CHECK-NEXT: ret float [[DATA]]
1890 %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1891 %elt0 = extractelement <3 x float> %data, i32 0
1895 define amdgpu_ps float @extract_elt1_struct_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1896 ; CHECK-LABEL: @extract_elt1_struct_buffer_load_v3f32(
1897 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 4
1898 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
1899 ; CHECK-NEXT: ret float [[DATA]]
1901 %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1902 %elt1 = extractelement <3 x float> %data, i32 1
1906 define amdgpu_ps float @extract_elt2_struct_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1907 ; CHECK-LABEL: @extract_elt2_struct_buffer_load_v3f32(
1908 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 8
1909 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
1910 ; CHECK-NEXT: ret float [[DATA]]
1912 %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1913 %elt1 = extractelement <3 x float> %data, i32 2
1917 define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1918 ; CHECK-LABEL: @extract_elt0_elt1_struct_buffer_load_v3f32(
1919 ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1920 ; CHECK-NEXT: ret <2 x float> [[DATA]]
1922 %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1923 %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 0, i32 1>
1924 ret <2 x float> %shuf
1927 define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1928 ; CHECK-LABEL: @extract_elt1_elt2_struct_buffer_load_v3f32(
1929 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 4
1930 ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
1931 ; CHECK-NEXT: ret <2 x float> [[DATA]]
1933 %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1934 %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 1, i32 2>
1935 ret <2 x float> %shuf
1938 define i32 @extract0_bitcast_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1939 ; CHECK-LABEL: @extract0_bitcast_struct_buffer_load_v4f32(
1940 ; CHECK-NEXT: [[VAR:%.*]] = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1941 ; CHECK-NEXT: [[VAR2:%.*]] = bitcast float [[VAR]] to i32
1942 ; CHECK-NEXT: ret i32 [[VAR2]]
1944 %var = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1945 %var1 = bitcast <4 x float> %var to <4 x i32>
1946 %var2 = extractelement <4 x i32> %var1, i32 0
1950 define float @extract0_bitcast_struct_buffer_load_v4i32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1951 ; CHECK-LABEL: @extract0_bitcast_struct_buffer_load_v4i32(
1952 ; CHECK-NEXT: [[VAR:%.*]] = call i32 @llvm.amdgcn.struct.buffer.load.i32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1953 ; CHECK-NEXT: [[VAR2:%.*]] = bitcast i32 [[VAR]] to float
1954 ; CHECK-NEXT: ret float [[VAR2]]
1956 %var = call <4 x i32> @llvm.amdgcn.struct.buffer.load.v4i32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1957 %var1 = bitcast <4 x i32> %var to <4 x float>
1958 %var2 = extractelement <4 x float> %var1, i32 0
1962 define amdgpu_ps float @preserve_metadata_extract_elt0_struct_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1963 ; CHECK-LABEL: @preserve_metadata_extract_elt0_struct_buffer_load_v2f32(
1964 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0), !fpmath [[META0]]
1965 ; CHECK-NEXT: ret float [[DATA]]
1967 %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0), !fpmath !0
1968 %elt0 = extractelement <2 x float> %data, i32 0
1972 declare float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32>, i32, i32, i32, i32) #1
1973 declare <1 x float> @llvm.amdgcn.struct.buffer.load.v1f32(<4 x i32>, i32, i32, i32, i32) #1
1974 declare <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32>, i32, i32, i32, i32) #1
1975 declare <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32>, i32, i32, i32, i32) #1
1976 declare <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32>, i32, i32, i32, i32) #1
1978 declare <4 x i32> @llvm.amdgcn.struct.buffer.load.v4i32(<4 x i32>, i32, i32, i32, i32) #1
1980 define amdgpu_ps half @extract_elt0_struct_buffer_load_v2f16(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1981 ; CHECK-LABEL: @extract_elt0_struct_buffer_load_v2f16(
1982 ; CHECK-NEXT: [[DATA:%.*]] = call half @llvm.amdgcn.struct.buffer.load.f16(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
1983 ; CHECK-NEXT: ret half [[DATA]]
1985 %data = call <2 x half> @llvm.amdgcn.struct.buffer.load.v2f16(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1986 %elt0 = extractelement <2 x half> %data, i32 0
1990 define amdgpu_ps half @extract_elt1_struct_buffer_load_v2f16(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1991 ; CHECK-LABEL: @extract_elt1_struct_buffer_load_v2f16(
1992 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 2
1993 ; CHECK-NEXT: [[DATA:%.*]] = call half @llvm.amdgcn.struct.buffer.load.f16(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
1994 ; CHECK-NEXT: ret half [[DATA]]
1996 %data = call <2 x half> @llvm.amdgcn.struct.buffer.load.v2f16(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1997 %elt1 = extractelement <2 x half> %data, i32 1
2001 define amdgpu_ps half @extract_elt1_struct_buffer_load_v3f16(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2002 ; CHECK-LABEL: @extract_elt1_struct_buffer_load_v3f16(
2003 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 2
2004 ; CHECK-NEXT: [[DATA:%.*]] = call half @llvm.amdgcn.struct.buffer.load.f16(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
2005 ; CHECK-NEXT: ret half [[DATA]]
2007 %data = call <3 x half> @llvm.amdgcn.struct.buffer.load.v3f16(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2008 %elt1 = extractelement <3 x half> %data, i32 1
2012 define amdgpu_ps half @extract_elt1_struct_buffer_load_v4f16(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2013 ; CHECK-LABEL: @extract_elt1_struct_buffer_load_v4f16(
2014 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 2
2015 ; CHECK-NEXT: [[DATA:%.*]] = call half @llvm.amdgcn.struct.buffer.load.f16(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
2016 ; CHECK-NEXT: ret half [[DATA]]
2018 %data = call <4 x half> @llvm.amdgcn.struct.buffer.load.v4f16(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2019 %elt1 = extractelement <4 x half> %data, i32 1
2023 define amdgpu_ps half @extract_elt3_struct_buffer_load_v4f16(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2024 ; CHECK-LABEL: @extract_elt3_struct_buffer_load_v4f16(
2025 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 6
2026 ; CHECK-NEXT: [[DATA:%.*]] = call half @llvm.amdgcn.struct.buffer.load.f16(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
2027 ; CHECK-NEXT: ret half [[DATA]]
2029 %data = call <4 x half> @llvm.amdgcn.struct.buffer.load.v4f16(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2030 %elt1 = extractelement <4 x half> %data, i32 3
2034 define amdgpu_ps <2 x half> @extract_elt0_elt1_struct_buffer_load_v4f16(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2035 ; CHECK-LABEL: @extract_elt0_elt1_struct_buffer_load_v4f16(
2036 ; CHECK-NEXT: [[DATA:%.*]] = call <2 x half> @llvm.amdgcn.struct.buffer.load.v2f16(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2037 ; CHECK-NEXT: ret <2 x half> [[DATA]]
2039 %data = call <4 x half> @llvm.amdgcn.struct.buffer.load.v4f16(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2040 %shuf = shufflevector <4 x half> %data, <4 x half> poison, <2 x i32> <i32 0, i32 1>
2041 ret <2 x half> %shuf
2044 declare half @llvm.amdgcn.struct.buffer.load.f16(<4 x i32>, i32, i32, i32, i32) #1
2045 declare <2 x half> @llvm.amdgcn.struct.buffer.load.v2f16(<4 x i32>, i32, i32, i32, i32) #1
2046 declare <3 x half> @llvm.amdgcn.struct.buffer.load.v3f16(<4 x i32>, i32, i32, i32, i32) #1
2047 declare <4 x half> @llvm.amdgcn.struct.buffer.load.v4f16(<4 x i32>, i32, i32, i32, i32) #1
2049 define amdgpu_ps i8 @extract_elt0_struct_buffer_load_v2i8(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2050 ; CHECK-LABEL: @extract_elt0_struct_buffer_load_v2i8(
2051 ; CHECK-NEXT: [[DATA:%.*]] = call i8 @llvm.amdgcn.struct.buffer.load.i8(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2052 ; CHECK-NEXT: ret i8 [[DATA]]
2054 %data = call <2 x i8> @llvm.amdgcn.struct.buffer.load.v2i8(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2055 %elt0 = extractelement <2 x i8> %data, i32 0
2059 define amdgpu_ps i8 @extract_elt1_struct_buffer_load_v2i8(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2060 ; CHECK-LABEL: @extract_elt1_struct_buffer_load_v2i8(
2061 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 1
2062 ; CHECK-NEXT: [[DATA:%.*]] = call i8 @llvm.amdgcn.struct.buffer.load.i8(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
2063 ; CHECK-NEXT: ret i8 [[DATA]]
2065 %data = call <2 x i8> @llvm.amdgcn.struct.buffer.load.v2i8(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2066 %elt1 = extractelement <2 x i8> %data, i32 1
2070 define amdgpu_ps i8 @extract_elt1_struct_buffer_load_v3i8(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2071 ; CHECK-LABEL: @extract_elt1_struct_buffer_load_v3i8(
2072 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 1
2073 ; CHECK-NEXT: [[DATA:%.*]] = call i8 @llvm.amdgcn.struct.buffer.load.i8(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
2074 ; CHECK-NEXT: ret i8 [[DATA]]
2076 %data = call <3 x i8> @llvm.amdgcn.struct.buffer.load.v3i8(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2077 %elt1 = extractelement <3 x i8> %data, i32 1
2081 define amdgpu_ps i8 @extract_elt1_struct_buffer_load_v4i8(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2082 ; CHECK-LABEL: @extract_elt1_struct_buffer_load_v4i8(
2083 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 1
2084 ; CHECK-NEXT: [[DATA:%.*]] = call i8 @llvm.amdgcn.struct.buffer.load.i8(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
2085 ; CHECK-NEXT: ret i8 [[DATA]]
2087 %data = call <4 x i8> @llvm.amdgcn.struct.buffer.load.v4i8(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2088 %elt1 = extractelement <4 x i8> %data, i32 1
2092 define amdgpu_ps i8 @extract_elt3_struct_buffer_load_v4i8(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2093 ; CHECK-LABEL: @extract_elt3_struct_buffer_load_v4i8(
2094 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 3
2095 ; CHECK-NEXT: [[DATA:%.*]] = call i8 @llvm.amdgcn.struct.buffer.load.i8(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
2096 ; CHECK-NEXT: ret i8 [[DATA]]
2098 %data = call <4 x i8> @llvm.amdgcn.struct.buffer.load.v4i8(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2099 %elt1 = extractelement <4 x i8> %data, i32 3
2103 define amdgpu_ps <2 x i8> @extract_elt0_elt1_struct_buffer_load_v4i8(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2104 ; CHECK-LABEL: @extract_elt0_elt1_struct_buffer_load_v4i8(
2105 ; CHECK-NEXT: [[DATA:%.*]] = call <2 x i8> @llvm.amdgcn.struct.buffer.load.v2i8(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2106 ; CHECK-NEXT: ret <2 x i8> [[DATA]]
2108 %data = call <4 x i8> @llvm.amdgcn.struct.buffer.load.v4i8(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2109 %shuf = shufflevector <4 x i8> %data, <4 x i8> poison, <2 x i32> <i32 0, i32 1>
2113 declare i8 @llvm.amdgcn.struct.buffer.load.i8(<4 x i32>, i32, i32, i32, i32) #1
2114 declare <2 x i8> @llvm.amdgcn.struct.buffer.load.v2i8(<4 x i32>, i32, i32, i32, i32) #1
2115 declare <3 x i8> @llvm.amdgcn.struct.buffer.load.v3i8(<4 x i32>, i32, i32, i32, i32) #1
2116 declare <4 x i8> @llvm.amdgcn.struct.buffer.load.v4i8(<4 x i32>, i32, i32, i32, i32) #1
2118 ; --------------------------------------------------------------------
2119 ; llvm.amdgcn.struct.ptr.buffer.load
2120 ; --------------------------------------------------------------------
2122 define amdgpu_ps float @struct_ptr_buffer_load_f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2123 ; CHECK-LABEL: @struct_ptr_buffer_load_f32(
2124 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2125 ; CHECK-NEXT: ret float [[DATA]]
2127 %data = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2131 define amdgpu_ps <1 x float> @struct_ptr_buffer_load_v1f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2132 ; CHECK-LABEL: @struct_ptr_buffer_load_v1f32(
2133 ; CHECK-NEXT: [[DATA:%.*]] = call <1 x float> @llvm.amdgcn.struct.ptr.buffer.load.v1f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2134 ; CHECK-NEXT: ret <1 x float> [[DATA]]
2136 %data = call <1 x float> @llvm.amdgcn.struct.ptr.buffer.load.v1f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2137 ret <1 x float> %data
2140 define amdgpu_ps <2 x float> @struct_ptr_buffer_load_v2f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2141 ; CHECK-LABEL: @struct_ptr_buffer_load_v2f32(
2142 ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2143 ; CHECK-NEXT: ret <2 x float> [[DATA]]
2145 %data = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.v2f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2146 ret <2 x float> %data
2149 define amdgpu_ps <4 x float> @struct_ptr_buffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2150 ; CHECK-LABEL: @struct_ptr_buffer_load_v4f32(
2151 ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.v4f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2152 ; CHECK-NEXT: ret <4 x float> [[DATA]]
2154 %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2155 ret <4 x float> %data
2158 define amdgpu_ps float @extract_elt0_struct_ptr_buffer_load_v2f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2159 ; CHECK-LABEL: @extract_elt0_struct_ptr_buffer_load_v2f32(
2160 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2161 ; CHECK-NEXT: ret float [[DATA]]
2163 %data = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.v2f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2164 %elt0 = extractelement <2 x float> %data, i32 0
2168 define amdgpu_ps float @extract_elt1_struct_ptr_buffer_load_v2f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2169 ; CHECK-LABEL: @extract_elt1_struct_ptr_buffer_load_v2f32(
2170 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 4
2171 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
2172 ; CHECK-NEXT: ret float [[DATA]]
2174 %data = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.v2f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2175 %elt1 = extractelement <2 x float> %data, i32 1
2179 define amdgpu_ps float @extract_elt0_struct_ptr_buffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2180 ; CHECK-LABEL: @extract_elt0_struct_ptr_buffer_load_v4f32(
2181 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2182 ; CHECK-NEXT: ret float [[DATA]]
2184 %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2185 %elt0 = extractelement <4 x float> %data, i32 0
2189 define amdgpu_ps float @extract_elt1_struct_ptr_buffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2190 ; CHECK-LABEL: @extract_elt1_struct_ptr_buffer_load_v4f32(
2191 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 4
2192 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
2193 ; CHECK-NEXT: ret float [[DATA]]
2195 %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2196 %elt1 = extractelement <4 x float> %data, i32 1
2200 define amdgpu_ps float @extract_elt2_struct_ptr_buffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2201 ; CHECK-LABEL: @extract_elt2_struct_ptr_buffer_load_v4f32(
2202 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 8
2203 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
2204 ; CHECK-NEXT: ret float [[DATA]]
2206 %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2207 %elt1 = extractelement <4 x float> %data, i32 2
2211 define amdgpu_ps float @extract_elt3_struct_ptr_buffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2212 ; CHECK-LABEL: @extract_elt3_struct_ptr_buffer_load_v4f32(
2213 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 12
2214 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
2215 ; CHECK-NEXT: ret float [[DATA]]
2217 %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2218 %elt1 = extractelement <4 x float> %data, i32 3
2222 define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_ptr_buffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2223 ; CHECK-LABEL: @extract_elt0_elt1_struct_ptr_buffer_load_v4f32(
2224 ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2225 ; CHECK-NEXT: ret <2 x float> [[DATA]]
2227 %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2228 %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 0, i32 1>
2229 ret <2 x float> %shuf
2232 define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_ptr_buffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2233 ; CHECK-LABEL: @extract_elt1_elt2_struct_ptr_buffer_load_v4f32(
2234 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 4
2235 ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
2236 ; CHECK-NEXT: ret <2 x float> [[DATA]]
2238 %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2239 %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 1, i32 2>
2240 ret <2 x float> %shuf
2243 define amdgpu_ps <2 x float> @extract_elt2_elt3_struct_ptr_buffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2244 ; CHECK-LABEL: @extract_elt2_elt3_struct_ptr_buffer_load_v4f32(
2245 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 8
2246 ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
2247 ; CHECK-NEXT: ret <2 x float> [[DATA]]
2249 %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2250 %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 2, i32 3>
2251 ret <2 x float> %shuf
2254 define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_struct_ptr_buffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2255 ; CHECK-LABEL: @extract_elt0_elt1_elt2_struct_ptr_buffer_load_v4f32(
2256 ; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.struct.ptr.buffer.load.v3f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2257 ; CHECK-NEXT: ret <3 x float> [[DATA]]
2259 %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2260 %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
2261 ret <3 x float> %shuf
2264 define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_struct_ptr_buffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2265 ; CHECK-LABEL: @extract_elt1_elt2_elt3_struct_ptr_buffer_load_v4f32(
2266 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 4
2267 ; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.struct.ptr.buffer.load.v3f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
2268 ; CHECK-NEXT: ret <3 x float> [[DATA]]
2270 %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2271 %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 1, i32 2, i32 3>
2272 ret <3 x float> %shuf
2275 define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_struct_ptr_buffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2276 ; CHECK-LABEL: @extract_elt0_elt2_elt3_struct_ptr_buffer_load_v4f32(
2277 ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.v4f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2278 ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3>
2279 ; CHECK-NEXT: ret <3 x float> [[SHUF]]
2281 %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2282 %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3>
2283 ret <3 x float> %shuf
2286 define amdgpu_ps float @extract_elt0_struct_ptr_buffer_load_v3f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2287 ; CHECK-LABEL: @extract_elt0_struct_ptr_buffer_load_v3f32(
2288 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2289 ; CHECK-NEXT: ret float [[DATA]]
2291 %data = call <3 x float> @llvm.amdgcn.struct.ptr.buffer.load.v3f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2292 %elt0 = extractelement <3 x float> %data, i32 0
2296 define amdgpu_ps float @extract_elt1_struct_ptr_buffer_load_v3f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2297 ; CHECK-LABEL: @extract_elt1_struct_ptr_buffer_load_v3f32(
2298 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 4
2299 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
2300 ; CHECK-NEXT: ret float [[DATA]]
2302 %data = call <3 x float> @llvm.amdgcn.struct.ptr.buffer.load.v3f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2303 %elt1 = extractelement <3 x float> %data, i32 1
2307 define amdgpu_ps float @extract_elt2_struct_ptr_buffer_load_v3f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2308 ; CHECK-LABEL: @extract_elt2_struct_ptr_buffer_load_v3f32(
2309 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 8
2310 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
2311 ; CHECK-NEXT: ret float [[DATA]]
2313 %data = call <3 x float> @llvm.amdgcn.struct.ptr.buffer.load.v3f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2314 %elt1 = extractelement <3 x float> %data, i32 2
2318 define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_ptr_buffer_load_v3f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2319 ; CHECK-LABEL: @extract_elt0_elt1_struct_ptr_buffer_load_v3f32(
2320 ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2321 ; CHECK-NEXT: ret <2 x float> [[DATA]]
2323 %data = call <3 x float> @llvm.amdgcn.struct.ptr.buffer.load.v3f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2324 %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 0, i32 1>
2325 ret <2 x float> %shuf
2328 define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_ptr_buffer_load_v3f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2329 ; CHECK-LABEL: @extract_elt1_elt2_struct_ptr_buffer_load_v3f32(
2330 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 4
2331 ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
2332 ; CHECK-NEXT: ret <2 x float> [[DATA]]
2334 %data = call <3 x float> @llvm.amdgcn.struct.ptr.buffer.load.v3f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2335 %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 1, i32 2>
2336 ret <2 x float> %shuf
2339 define i32 @extract0_bitcast_struct_ptr_buffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2340 ; CHECK-LABEL: @extract0_bitcast_struct_ptr_buffer_load_v4f32(
2341 ; CHECK-NEXT: [[VAR:%.*]] = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2342 ; CHECK-NEXT: [[VAR2:%.*]] = bitcast float [[VAR]] to i32
2343 ; CHECK-NEXT: ret i32 [[VAR2]]
2345 %var = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2346 %var1 = bitcast <4 x float> %var to <4 x i32>
2347 %var2 = extractelement <4 x i32> %var1, i32 0
2351 define float @extract0_bitcast_struct_ptr_buffer_load_v4i32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2352 ; CHECK-LABEL: @extract0_bitcast_struct_ptr_buffer_load_v4i32(
2353 ; CHECK-NEXT: [[VAR:%.*]] = call i32 @llvm.amdgcn.struct.ptr.buffer.load.i32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2354 ; CHECK-NEXT: [[VAR2:%.*]] = bitcast i32 [[VAR]] to float
2355 ; CHECK-NEXT: ret float [[VAR2]]
2357 %var = call <4 x i32> @llvm.amdgcn.struct.ptr.buffer.load.v4i32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2358 %var1 = bitcast <4 x i32> %var to <4 x float>
2359 %var2 = extractelement <4 x float> %var1, i32 0
2363 define amdgpu_ps float @preserve_metadata_extract_elt0_struct_ptr_buffer_load_v2f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2364 ; CHECK-LABEL: @preserve_metadata_extract_elt0_struct_ptr_buffer_load_v2f32(
2365 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0), !fpmath [[META0]]
2366 ; CHECK-NEXT: ret float [[DATA]]
2368 %data = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.v2f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0), !fpmath !0
2369 %elt0 = extractelement <2 x float> %data, i32 0
2373 declare float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8), i32, i32, i32, i32) #1
2374 declare <1 x float> @llvm.amdgcn.struct.ptr.buffer.load.v1f32(ptr addrspace(8), i32, i32, i32, i32) #1
2375 declare <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.v2f32(ptr addrspace(8), i32, i32, i32, i32) #1
2376 declare <3 x float> @llvm.amdgcn.struct.ptr.buffer.load.v3f32(ptr addrspace(8), i32, i32, i32, i32) #1
2377 declare <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.v4f32(ptr addrspace(8), i32, i32, i32, i32) #1
2379 declare <4 x i32> @llvm.amdgcn.struct.ptr.buffer.load.v4i32(ptr addrspace(8), i32, i32, i32, i32) #1
2381 define amdgpu_ps half @extract_elt0_struct_ptr_buffer_load_v2f16(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2382 ; CHECK-LABEL: @extract_elt0_struct_ptr_buffer_load_v2f16(
2383 ; CHECK-NEXT: [[DATA:%.*]] = call half @llvm.amdgcn.struct.ptr.buffer.load.f16(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2384 ; CHECK-NEXT: ret half [[DATA]]
2386 %data = call <2 x half> @llvm.amdgcn.struct.ptr.buffer.load.v2f16(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2387 %elt0 = extractelement <2 x half> %data, i32 0
2391 define amdgpu_ps half @extract_elt1_struct_ptr_buffer_load_v2f16(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2392 ; CHECK-LABEL: @extract_elt1_struct_ptr_buffer_load_v2f16(
2393 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 2
2394 ; CHECK-NEXT: [[DATA:%.*]] = call half @llvm.amdgcn.struct.ptr.buffer.load.f16(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
2395 ; CHECK-NEXT: ret half [[DATA]]
2397 %data = call <2 x half> @llvm.amdgcn.struct.ptr.buffer.load.v2f16(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2398 %elt1 = extractelement <2 x half> %data, i32 1
2402 define amdgpu_ps half @extract_elt1_struct_ptr_buffer_load_v3f16(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2403 ; CHECK-LABEL: @extract_elt1_struct_ptr_buffer_load_v3f16(
2404 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 2
2405 ; CHECK-NEXT: [[DATA:%.*]] = call half @llvm.amdgcn.struct.ptr.buffer.load.f16(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
2406 ; CHECK-NEXT: ret half [[DATA]]
2408 %data = call <3 x half> @llvm.amdgcn.struct.ptr.buffer.load.v3f16(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2409 %elt1 = extractelement <3 x half> %data, i32 1
2413 define amdgpu_ps half @extract_elt1_struct_ptr_buffer_load_v4f16(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2414 ; CHECK-LABEL: @extract_elt1_struct_ptr_buffer_load_v4f16(
2415 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 2
2416 ; CHECK-NEXT: [[DATA:%.*]] = call half @llvm.amdgcn.struct.ptr.buffer.load.f16(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
2417 ; CHECK-NEXT: ret half [[DATA]]
2419 %data = call <4 x half> @llvm.amdgcn.struct.ptr.buffer.load.v4f16(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2420 %elt1 = extractelement <4 x half> %data, i32 1
2424 define amdgpu_ps half @extract_elt3_struct_ptr_buffer_load_v4f16(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2425 ; CHECK-LABEL: @extract_elt3_struct_ptr_buffer_load_v4f16(
2426 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 6
2427 ; CHECK-NEXT: [[DATA:%.*]] = call half @llvm.amdgcn.struct.ptr.buffer.load.f16(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
2428 ; CHECK-NEXT: ret half [[DATA]]
2430 %data = call <4 x half> @llvm.amdgcn.struct.ptr.buffer.load.v4f16(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2431 %elt1 = extractelement <4 x half> %data, i32 3
2435 define amdgpu_ps <2 x half> @extract_elt0_elt1_struct_ptr_buffer_load_v4f16(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2436 ; CHECK-LABEL: @extract_elt0_elt1_struct_ptr_buffer_load_v4f16(
2437 ; CHECK-NEXT: [[DATA:%.*]] = call <2 x half> @llvm.amdgcn.struct.ptr.buffer.load.v2f16(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2438 ; CHECK-NEXT: ret <2 x half> [[DATA]]
2440 %data = call <4 x half> @llvm.amdgcn.struct.ptr.buffer.load.v4f16(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2441 %shuf = shufflevector <4 x half> %data, <4 x half> poison, <2 x i32> <i32 0, i32 1>
2442 ret <2 x half> %shuf
2445 declare half @llvm.amdgcn.struct.ptr.buffer.load.f16(ptr addrspace(8), i32, i32, i32, i32) #1
2446 declare <2 x half> @llvm.amdgcn.struct.ptr.buffer.load.v2f16(ptr addrspace(8), i32, i32, i32, i32) #1
2447 declare <3 x half> @llvm.amdgcn.struct.ptr.buffer.load.v3f16(ptr addrspace(8), i32, i32, i32, i32) #1
2448 declare <4 x half> @llvm.amdgcn.struct.ptr.buffer.load.v4f16(ptr addrspace(8), i32, i32, i32, i32) #1
2450 define amdgpu_ps i8 @extract_elt0_struct_ptr_buffer_load_v2i8(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2451 ; CHECK-LABEL: @extract_elt0_struct_ptr_buffer_load_v2i8(
2452 ; CHECK-NEXT: [[DATA:%.*]] = call i8 @llvm.amdgcn.struct.ptr.buffer.load.i8(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2453 ; CHECK-NEXT: ret i8 [[DATA]]
2455 %data = call <2 x i8> @llvm.amdgcn.struct.ptr.buffer.load.v2i8(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2456 %elt0 = extractelement <2 x i8> %data, i32 0
2460 define amdgpu_ps i8 @extract_elt1_struct_ptr_buffer_load_v2i8(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2461 ; CHECK-LABEL: @extract_elt1_struct_ptr_buffer_load_v2i8(
2462 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 1
2463 ; CHECK-NEXT: [[DATA:%.*]] = call i8 @llvm.amdgcn.struct.ptr.buffer.load.i8(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
2464 ; CHECK-NEXT: ret i8 [[DATA]]
2466 %data = call <2 x i8> @llvm.amdgcn.struct.ptr.buffer.load.v2i8(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2467 %elt1 = extractelement <2 x i8> %data, i32 1
2471 define amdgpu_ps i8 @extract_elt1_struct_ptr_buffer_load_v3i8(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2472 ; CHECK-LABEL: @extract_elt1_struct_ptr_buffer_load_v3i8(
2473 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 1
2474 ; CHECK-NEXT: [[DATA:%.*]] = call i8 @llvm.amdgcn.struct.ptr.buffer.load.i8(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
2475 ; CHECK-NEXT: ret i8 [[DATA]]
2477 %data = call <3 x i8> @llvm.amdgcn.struct.ptr.buffer.load.v3i8(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2478 %elt1 = extractelement <3 x i8> %data, i32 1
2482 define amdgpu_ps i8 @extract_elt1_struct_ptr_buffer_load_v4i8(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2483 ; CHECK-LABEL: @extract_elt1_struct_ptr_buffer_load_v4i8(
2484 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 1
2485 ; CHECK-NEXT: [[DATA:%.*]] = call i8 @llvm.amdgcn.struct.ptr.buffer.load.i8(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
2486 ; CHECK-NEXT: ret i8 [[DATA]]
2488 %data = call <4 x i8> @llvm.amdgcn.struct.ptr.buffer.load.v4i8(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2489 %elt1 = extractelement <4 x i8> %data, i32 1
2493 define amdgpu_ps i8 @extract_elt3_struct_ptr_buffer_load_v4i8(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2494 ; CHECK-LABEL: @extract_elt3_struct_ptr_buffer_load_v4i8(
2495 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 3
2496 ; CHECK-NEXT: [[DATA:%.*]] = call i8 @llvm.amdgcn.struct.ptr.buffer.load.i8(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0)
2497 ; CHECK-NEXT: ret i8 [[DATA]]
2499 %data = call <4 x i8> @llvm.amdgcn.struct.ptr.buffer.load.v4i8(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2500 %elt1 = extractelement <4 x i8> %data, i32 3
2504 define amdgpu_ps <2 x i8> @extract_elt0_elt1_struct_ptr_buffer_load_v4i8(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2505 ; CHECK-LABEL: @extract_elt0_elt1_struct_ptr_buffer_load_v4i8(
2506 ; CHECK-NEXT: [[DATA:%.*]] = call <2 x i8> @llvm.amdgcn.struct.ptr.buffer.load.v2i8(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2507 ; CHECK-NEXT: ret <2 x i8> [[DATA]]
2509 %data = call <4 x i8> @llvm.amdgcn.struct.ptr.buffer.load.v4i8(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2510 %shuf = shufflevector <4 x i8> %data, <4 x i8> poison, <2 x i32> <i32 0, i32 1>
2514 declare i8 @llvm.amdgcn.struct.ptr.buffer.load.i8(ptr addrspace(8), i32, i32, i32, i32) #1
2515 declare <2 x i8> @llvm.amdgcn.struct.ptr.buffer.load.v2i8(ptr addrspace(8), i32, i32, i32, i32) #1
2516 declare <3 x i8> @llvm.amdgcn.struct.ptr.buffer.load.v3i8(ptr addrspace(8), i32, i32, i32, i32) #1
2517 declare <4 x i8> @llvm.amdgcn.struct.ptr.buffer.load.v4i8(ptr addrspace(8), i32, i32, i32, i32) #1
2519 ; --------------------------------------------------------------------
2520 ; llvm.amdgcn.struct.buffer.load.format
2521 ; --------------------------------------------------------------------
2523 define amdgpu_ps float @struct_buffer_load_format_f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2524 ; CHECK-LABEL: @struct_buffer_load_format_f32(
2525 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2526 ; CHECK-NEXT: ret float [[DATA]]
2528 %data = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2532 define amdgpu_ps <1 x float> @struct_buffer_load_format_v1f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2533 ; CHECK-LABEL: @struct_buffer_load_format_v1f32(
2534 ; CHECK-NEXT: [[DATA:%.*]] = call <1 x float> @llvm.amdgcn.struct.buffer.load.format.v1f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2535 ; CHECK-NEXT: ret <1 x float> [[DATA]]
2537 %data = call <1 x float> @llvm.amdgcn.struct.buffer.load.format.v1f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2538 ret <1 x float> %data
2541 define amdgpu_ps <2 x float> @struct_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2542 ; CHECK-LABEL: @struct_buffer_load_format_v2f32(
2543 ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2544 ; CHECK-NEXT: ret <2 x float> [[DATA]]
2546 %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2547 ret <2 x float> %data
2550 define amdgpu_ps <4 x float> @struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2551 ; CHECK-LABEL: @struct_buffer_load_format_v4f32(
2552 ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2553 ; CHECK-NEXT: ret <4 x float> [[DATA]]
2555 %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2556 ret <4 x float> %data
2559 define amdgpu_ps float @extract_elt0_struct_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2560 ; CHECK-LABEL: @extract_elt0_struct_buffer_load_format_v2f32(
2561 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2562 ; CHECK-NEXT: ret float [[DATA]]
2564 %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2565 %elt0 = extractelement <2 x float> %data, i32 0
2569 define amdgpu_ps float @extract_elt1_struct_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2570 ; CHECK-LABEL: @extract_elt1_struct_buffer_load_format_v2f32(
2571 ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2572 ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <2 x float> [[DATA]], i64 1
2573 ; CHECK-NEXT: ret float [[ELT1]]
2575 %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2576 %elt1 = extractelement <2 x float> %data, i32 1
2580 define amdgpu_ps float @extract_elt0_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2581 ; CHECK-LABEL: @extract_elt0_struct_buffer_load_format_v4f32(
2582 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2583 ; CHECK-NEXT: ret float [[DATA]]
2585 %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2586 %elt0 = extractelement <4 x float> %data, i32 0
2590 define amdgpu_ps float @extract_elt1_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2591 ; CHECK-LABEL: @extract_elt1_struct_buffer_load_format_v4f32(
2592 ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2593 ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <2 x float> [[DATA]], i64 1
2594 ; CHECK-NEXT: ret float [[ELT1]]
2596 %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2597 %elt1 = extractelement <4 x float> %data, i32 1
2601 define amdgpu_ps float @extract_elt2_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2602 ; CHECK-LABEL: @extract_elt2_struct_buffer_load_format_v4f32(
2603 ; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2604 ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <3 x float> [[DATA]], i64 2
2605 ; CHECK-NEXT: ret float [[ELT1]]
2607 %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2608 %elt1 = extractelement <4 x float> %data, i32 2
2612 define amdgpu_ps float @extract_elt3_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2613 ; CHECK-LABEL: @extract_elt3_struct_buffer_load_format_v4f32(
2614 ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2615 ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <4 x float> [[DATA]], i64 3
2616 ; CHECK-NEXT: ret float [[ELT1]]
2618 %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2619 %elt1 = extractelement <4 x float> %data, i32 3
2623 define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2624 ; CHECK-LABEL: @extract_elt0_elt1_struct_buffer_load_format_v4f32(
2625 ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2626 ; CHECK-NEXT: ret <2 x float> [[DATA]]
2628 %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2629 %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 0, i32 1>
2630 ret <2 x float> %shuf
2633 define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2634 ; CHECK-LABEL: @extract_elt1_elt2_struct_buffer_load_format_v4f32(
2635 ; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2636 ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <3 x float> [[DATA]], <3 x float> poison, <2 x i32> <i32 1, i32 2>
2637 ; CHECK-NEXT: ret <2 x float> [[SHUF]]
2639 %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2640 %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 1, i32 2>
2641 ret <2 x float> %shuf
2644 define amdgpu_ps <2 x float> @extract_elt2_elt3_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2645 ; CHECK-LABEL: @extract_elt2_elt3_struct_buffer_load_format_v4f32(
2646 ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2647 ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
2648 ; CHECK-NEXT: ret <2 x float> [[SHUF]]
2650 %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2651 %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 2, i32 3>
2652 ret <2 x float> %shuf
2655 define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2656 ; CHECK-LABEL: @extract_elt0_elt1_elt2_struct_buffer_load_format_v4f32(
2657 ; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2658 ; CHECK-NEXT: ret <3 x float> [[DATA]]
2660 %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2661 %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
2662 ret <3 x float> %shuf
2665 define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2666 ; CHECK-LABEL: @extract_elt1_elt2_elt3_struct_buffer_load_format_v4f32(
2667 ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2668 ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <3 x i32> <i32 1, i32 2, i32 3>
2669 ; CHECK-NEXT: ret <3 x float> [[SHUF]]
2671 %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2672 %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 1, i32 2, i32 3>
2673 ret <3 x float> %shuf
2676 define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2677 ; CHECK-LABEL: @extract_elt0_elt2_elt3_struct_buffer_load_format_v4f32(
2678 ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2679 ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3>
2680 ; CHECK-NEXT: ret <3 x float> [[SHUF]]
2682 %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2683 %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3>
2684 ret <3 x float> %shuf
2687 define amdgpu_ps float @extract_elt0_struct_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2688 ; CHECK-LABEL: @extract_elt0_struct_buffer_load_format_v3f32(
2689 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2690 ; CHECK-NEXT: ret float [[DATA]]
2692 %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2693 %elt0 = extractelement <3 x float> %data, i32 0
2697 define amdgpu_ps float @extract_elt1_struct_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2698 ; CHECK-LABEL: @extract_elt1_struct_buffer_load_format_v3f32(
2699 ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2700 ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <2 x float> [[DATA]], i64 1
2701 ; CHECK-NEXT: ret float [[ELT1]]
2703 %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2704 %elt1 = extractelement <3 x float> %data, i32 1
2708 define amdgpu_ps float @extract_elt2_struct_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2709 ; CHECK-LABEL: @extract_elt2_struct_buffer_load_format_v3f32(
2710 ; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2711 ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <3 x float> [[DATA]], i64 2
2712 ; CHECK-NEXT: ret float [[ELT1]]
2714 %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2715 %elt1 = extractelement <3 x float> %data, i32 2
2719 define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2720 ; CHECK-LABEL: @extract_elt0_elt1_struct_buffer_load_format_v3f32(
2721 ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2722 ; CHECK-NEXT: ret <2 x float> [[DATA]]
2724 %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2725 %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 0, i32 1>
2726 ret <2 x float> %shuf
2729 define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2730 ; CHECK-LABEL: @extract_elt1_elt2_struct_buffer_load_format_v3f32(
2731 ; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2732 ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <3 x float> [[DATA]], <3 x float> poison, <2 x i32> <i32 1, i32 2>
2733 ; CHECK-NEXT: ret <2 x float> [[SHUF]]
2735 %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2736 %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 1, i32 2>
2737 ret <2 x float> %shuf
2740 define i32 @extract0_bitcast_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2741 ; CHECK-LABEL: @extract0_bitcast_struct_buffer_load_format_v4f32(
2742 ; CHECK-NEXT: [[VAR:%.*]] = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2743 ; CHECK-NEXT: [[VAR2:%.*]] = bitcast float [[VAR]] to i32
2744 ; CHECK-NEXT: ret i32 [[VAR2]]
2746 %var = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2747 %var1 = bitcast <4 x float> %var to <4 x i32>
2748 %var2 = extractelement <4 x i32> %var1, i32 0
2752 define amdgpu_ps float @preserve_metadata_extract_elt0_struct_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2753 ; CHECK-LABEL: @preserve_metadata_extract_elt0_struct_buffer_load_format_v2f32(
2754 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0), !fpmath [[META0]]
2755 ; CHECK-NEXT: ret float [[DATA]]
2757 %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0), !fpmath !0
2758 %elt0 = extractelement <2 x float> %data, i32 0
2762 declare float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32>, i32, i32, i32, i32) #1
2763 declare <1 x float> @llvm.amdgcn.struct.buffer.load.format.v1f32(<4 x i32>, i32, i32, i32, i32) #1
2764 declare <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32>, i32, i32, i32, i32) #1
2765 declare <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32>, i32, i32, i32, i32) #1
2766 declare <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32>, i32, i32, i32, i32) #1
2768 declare <4 x i32> @llvm.amdgcn.struct.buffer.load.format.v4i32(<4 x i32>, i32, i32, i32, i32) #1
2770 ; --------------------------------------------------------------------
2771 ; llvm.amdgcn.struct.ptr.buffer.load.format
2772 ; --------------------------------------------------------------------
2774 define amdgpu_ps float @struct_ptr_buffer_load_format_f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2775 ; CHECK-LABEL: @struct_ptr_buffer_load_format_f32(
2776 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.struct.ptr.buffer.load.format.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2777 ; CHECK-NEXT: ret float [[DATA]]
2779 %data = call float @llvm.amdgcn.struct.ptr.buffer.load.format.f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2783 define amdgpu_ps <1 x float> @struct_ptr_buffer_load_format_v1f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2784 ; CHECK-LABEL: @struct_ptr_buffer_load_format_v1f32(
2785 ; CHECK-NEXT: [[DATA:%.*]] = call <1 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v1f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2786 ; CHECK-NEXT: ret <1 x float> [[DATA]]
2788 %data = call <1 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v1f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2789 ret <1 x float> %data
2792 define amdgpu_ps <2 x float> @struct_ptr_buffer_load_format_v2f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2793 ; CHECK-LABEL: @struct_ptr_buffer_load_format_v2f32(
2794 ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2795 ; CHECK-NEXT: ret <2 x float> [[DATA]]
2797 %data = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v2f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2798 ret <2 x float> %data
2801 define amdgpu_ps <4 x float> @struct_ptr_buffer_load_format_v4f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2802 ; CHECK-LABEL: @struct_ptr_buffer_load_format_v4f32(
2803 ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2804 ; CHECK-NEXT: ret <4 x float> [[DATA]]
2806 %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2807 ret <4 x float> %data
2810 define amdgpu_ps float @extract_elt0_struct_ptr_buffer_load_format_v2f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2811 ; CHECK-LABEL: @extract_elt0_struct_ptr_buffer_load_format_v2f32(
2812 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.struct.ptr.buffer.load.format.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2813 ; CHECK-NEXT: ret float [[DATA]]
2815 %data = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v2f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2816 %elt0 = extractelement <2 x float> %data, i32 0
2820 define amdgpu_ps float @extract_elt1_struct_ptr_buffer_load_format_v2f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2821 ; CHECK-LABEL: @extract_elt1_struct_ptr_buffer_load_format_v2f32(
2822 ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2823 ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <2 x float> [[DATA]], i64 1
2824 ; CHECK-NEXT: ret float [[ELT1]]
2826 %data = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v2f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2827 %elt1 = extractelement <2 x float> %data, i32 1
2831 define amdgpu_ps float @extract_elt0_struct_ptr_buffer_load_format_v4f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2832 ; CHECK-LABEL: @extract_elt0_struct_ptr_buffer_load_format_v4f32(
2833 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.struct.ptr.buffer.load.format.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2834 ; CHECK-NEXT: ret float [[DATA]]
2836 %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2837 %elt0 = extractelement <4 x float> %data, i32 0
2841 define amdgpu_ps float @extract_elt1_struct_ptr_buffer_load_format_v4f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2842 ; CHECK-LABEL: @extract_elt1_struct_ptr_buffer_load_format_v4f32(
2843 ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2844 ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <2 x float> [[DATA]], i64 1
2845 ; CHECK-NEXT: ret float [[ELT1]]
2847 %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2848 %elt1 = extractelement <4 x float> %data, i32 1
2852 define amdgpu_ps float @extract_elt2_struct_ptr_buffer_load_format_v4f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2853 ; CHECK-LABEL: @extract_elt2_struct_ptr_buffer_load_format_v4f32(
2854 ; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v3f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2855 ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <3 x float> [[DATA]], i64 2
2856 ; CHECK-NEXT: ret float [[ELT1]]
2858 %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2859 %elt1 = extractelement <4 x float> %data, i32 2
2863 define amdgpu_ps float @extract_elt3_struct_ptr_buffer_load_format_v4f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2864 ; CHECK-LABEL: @extract_elt3_struct_ptr_buffer_load_format_v4f32(
2865 ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2866 ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <4 x float> [[DATA]], i64 3
2867 ; CHECK-NEXT: ret float [[ELT1]]
2869 %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2870 %elt1 = extractelement <4 x float> %data, i32 3
2874 define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_ptr_buffer_load_format_v4f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2875 ; CHECK-LABEL: @extract_elt0_elt1_struct_ptr_buffer_load_format_v4f32(
2876 ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2877 ; CHECK-NEXT: ret <2 x float> [[DATA]]
2879 %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2880 %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 0, i32 1>
2881 ret <2 x float> %shuf
2884 define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_ptr_buffer_load_format_v4f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2885 ; CHECK-LABEL: @extract_elt1_elt2_struct_ptr_buffer_load_format_v4f32(
2886 ; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v3f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2887 ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <3 x float> [[DATA]], <3 x float> poison, <2 x i32> <i32 1, i32 2>
2888 ; CHECK-NEXT: ret <2 x float> [[SHUF]]
2890 %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2891 %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 1, i32 2>
2892 ret <2 x float> %shuf
2895 define amdgpu_ps <2 x float> @extract_elt2_elt3_struct_ptr_buffer_load_format_v4f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2896 ; CHECK-LABEL: @extract_elt2_elt3_struct_ptr_buffer_load_format_v4f32(
2897 ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2898 ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
2899 ; CHECK-NEXT: ret <2 x float> [[SHUF]]
2901 %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2902 %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 2, i32 3>
2903 ret <2 x float> %shuf
2906 define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_struct_ptr_buffer_load_format_v4f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2907 ; CHECK-LABEL: @extract_elt0_elt1_elt2_struct_ptr_buffer_load_format_v4f32(
2908 ; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v3f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2909 ; CHECK-NEXT: ret <3 x float> [[DATA]]
2911 %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2912 %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
2913 ret <3 x float> %shuf
2916 define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_struct_ptr_buffer_load_format_v4f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2917 ; CHECK-LABEL: @extract_elt1_elt2_elt3_struct_ptr_buffer_load_format_v4f32(
2918 ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2919 ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <3 x i32> <i32 1, i32 2, i32 3>
2920 ; CHECK-NEXT: ret <3 x float> [[SHUF]]
2922 %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2923 %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 1, i32 2, i32 3>
2924 ret <3 x float> %shuf
2927 define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_struct_ptr_buffer_load_format_v4f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2928 ; CHECK-LABEL: @extract_elt0_elt2_elt3_struct_ptr_buffer_load_format_v4f32(
2929 ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2930 ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3>
2931 ; CHECK-NEXT: ret <3 x float> [[SHUF]]
2933 %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2934 %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3>
2935 ret <3 x float> %shuf
2938 define amdgpu_ps float @extract_elt0_struct_ptr_buffer_load_format_v3f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2939 ; CHECK-LABEL: @extract_elt0_struct_ptr_buffer_load_format_v3f32(
2940 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.struct.ptr.buffer.load.format.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2941 ; CHECK-NEXT: ret float [[DATA]]
2943 %data = call <3 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v3f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2944 %elt0 = extractelement <3 x float> %data, i32 0
2948 define amdgpu_ps float @extract_elt1_struct_ptr_buffer_load_format_v3f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2949 ; CHECK-LABEL: @extract_elt1_struct_ptr_buffer_load_format_v3f32(
2950 ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2951 ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <2 x float> [[DATA]], i64 1
2952 ; CHECK-NEXT: ret float [[ELT1]]
2954 %data = call <3 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v3f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2955 %elt1 = extractelement <3 x float> %data, i32 1
2959 define amdgpu_ps float @extract_elt2_struct_ptr_buffer_load_format_v3f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2960 ; CHECK-LABEL: @extract_elt2_struct_ptr_buffer_load_format_v3f32(
2961 ; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v3f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2962 ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <3 x float> [[DATA]], i64 2
2963 ; CHECK-NEXT: ret float [[ELT1]]
2965 %data = call <3 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v3f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2966 %elt1 = extractelement <3 x float> %data, i32 2
2970 define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_ptr_buffer_load_format_v3f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2971 ; CHECK-LABEL: @extract_elt0_elt1_struct_ptr_buffer_load_format_v3f32(
2972 ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2973 ; CHECK-NEXT: ret <2 x float> [[DATA]]
2975 %data = call <3 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v3f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2976 %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 0, i32 1>
2977 ret <2 x float> %shuf
2980 define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_ptr_buffer_load_format_v3f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2981 ; CHECK-LABEL: @extract_elt1_elt2_struct_ptr_buffer_load_format_v3f32(
2982 ; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v3f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2983 ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <3 x float> [[DATA]], <3 x float> poison, <2 x i32> <i32 1, i32 2>
2984 ; CHECK-NEXT: ret <2 x float> [[SHUF]]
2986 %data = call <3 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v3f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2987 %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 1, i32 2>
2988 ret <2 x float> %shuf
2991 define i32 @extract0_bitcast_struct_ptr_buffer_load_format_v4f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
2992 ; CHECK-LABEL: @extract0_bitcast_struct_ptr_buffer_load_format_v4f32(
2993 ; CHECK-NEXT: [[VAR:%.*]] = call float @llvm.amdgcn.struct.ptr.buffer.load.format.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0)
2994 ; CHECK-NEXT: [[VAR2:%.*]] = bitcast float [[VAR]] to i32
2995 ; CHECK-NEXT: ret i32 [[VAR2]]
2997 %var = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
2998 %var1 = bitcast <4 x float> %var to <4 x i32>
2999 %var2 = extractelement <4 x i32> %var1, i32 0
3003 define amdgpu_ps float @preserve_metadata_extract_elt0_struct_ptr_buffer_load_format_v2f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
3004 ; CHECK-LABEL: @preserve_metadata_extract_elt0_struct_ptr_buffer_load_format_v2f32(
3005 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.struct.ptr.buffer.load.format.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0), !fpmath [[META0]]
3006 ; CHECK-NEXT: ret float [[DATA]]
3008 %data = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v2f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0), !fpmath !0
3009 %elt0 = extractelement <2 x float> %data, i32 0
3013 declare float @llvm.amdgcn.struct.ptr.buffer.load.format.f32(ptr addrspace(8), i32, i32, i32, i32) #1
3014 declare <1 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v1f32(ptr addrspace(8), i32, i32, i32, i32) #1
3015 declare <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v2f32(ptr addrspace(8), i32, i32, i32, i32) #1
3016 declare <3 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v3f32(ptr addrspace(8), i32, i32, i32, i32) #1
3017 declare <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8), i32, i32, i32, i32) #1
3019 declare <4 x i32> @llvm.amdgcn.struct.ptr.buffer.load.format.v4i32(ptr addrspace(8), i32, i32, i32, i32) #1
3022 ; --------------------------------------------------------------------
3023 ; llvm.amdgcn.raw.tbuffer.load
3024 ; --------------------------------------------------------------------
3026 define amdgpu_ps float @raw_tbuffer_load_f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3027 ; CHECK-LABEL: @raw_tbuffer_load_f32(
3028 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.raw.tbuffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3029 ; CHECK-NEXT: ret float [[DATA]]
3031 %data = call float @llvm.amdgcn.raw.tbuffer.load.f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3035 define amdgpu_ps <2 x float> @raw_tbuffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3036 ; CHECK-LABEL: @raw_tbuffer_load_v2f32(
3037 ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.tbuffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3038 ; CHECK-NEXT: ret <2 x float> [[DATA]]
3040 %data = call <2 x float> @llvm.amdgcn.raw.tbuffer.load.v2f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3041 ret <2 x float> %data
3044 define amdgpu_ps <4 x float> @raw_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3045 ; CHECK-LABEL: @raw_tbuffer_load_v4f32(
3046 ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3047 ; CHECK-NEXT: ret <4 x float> [[DATA]]
3049 %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3050 ret <4 x float> %data
3053 define amdgpu_ps float @extract_elt0_raw_tbuffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3054 ; CHECK-LABEL: @extract_elt0_raw_tbuffer_load_v2f32(
3055 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.raw.tbuffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3056 ; CHECK-NEXT: ret float [[DATA]]
3058 %data = call <2 x float> @llvm.amdgcn.raw.tbuffer.load.v2f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3059 %elt0 = extractelement <2 x float> %data, i32 0
3063 define amdgpu_ps float @extract_elt1_raw_tbuffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3064 ; CHECK-LABEL: @extract_elt1_raw_tbuffer_load_v2f32(
3065 ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.tbuffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3066 ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <2 x float> [[DATA]], i64 1
3067 ; CHECK-NEXT: ret float [[ELT1]]
3069 %data = call <2 x float> @llvm.amdgcn.raw.tbuffer.load.v2f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3070 %elt1 = extractelement <2 x float> %data, i32 1
3074 define amdgpu_ps float @extract_elt0_raw_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3075 ; CHECK-LABEL: @extract_elt0_raw_tbuffer_load_v4f32(
3076 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.raw.tbuffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3077 ; CHECK-NEXT: ret float [[DATA]]
3079 %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3080 %elt0 = extractelement <4 x float> %data, i32 0
3084 define amdgpu_ps float @extract_elt1_raw_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3085 ; CHECK-LABEL: @extract_elt1_raw_tbuffer_load_v4f32(
3086 ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.tbuffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3087 ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <2 x float> [[DATA]], i64 1
3088 ; CHECK-NEXT: ret float [[ELT1]]
3090 %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3091 %elt1 = extractelement <4 x float> %data, i32 1
3095 define amdgpu_ps float @extract_elt2_raw_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3096 ; CHECK-LABEL: @extract_elt2_raw_tbuffer_load_v4f32(
3097 ; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.raw.tbuffer.load.v3f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3098 ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <3 x float> [[DATA]], i64 2
3099 ; CHECK-NEXT: ret float [[ELT1]]
3101 %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3102 %elt1 = extractelement <4 x float> %data, i32 2
3106 define amdgpu_ps float @extract_elt3_raw_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3107 ; CHECK-LABEL: @extract_elt3_raw_tbuffer_load_v4f32(
3108 ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3109 ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <4 x float> [[DATA]], i64 3
3110 ; CHECK-NEXT: ret float [[ELT1]]
3112 %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3113 %elt1 = extractelement <4 x float> %data, i32 3
3117 define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3118 ; CHECK-LABEL: @extract_elt0_elt1_raw_tbuffer_load_v4f32(
3119 ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.tbuffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3120 ; CHECK-NEXT: ret <2 x float> [[DATA]]
3122 %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3123 %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 0, i32 1>
3124 ret <2 x float> %shuf
3127 define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3128 ; CHECK-LABEL: @extract_elt1_elt2_raw_tbuffer_load_v4f32(
3129 ; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.raw.tbuffer.load.v3f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3130 ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <3 x float> [[DATA]], <3 x float> poison, <2 x i32> <i32 1, i32 2>
3131 ; CHECK-NEXT: ret <2 x float> [[SHUF]]
3133 %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3134 %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 1, i32 2>
3135 ret <2 x float> %shuf
3138 define amdgpu_ps <2 x float> @extract_elt2_elt3_raw_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3139 ; CHECK-LABEL: @extract_elt2_elt3_raw_tbuffer_load_v4f32(
3140 ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3141 ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
3142 ; CHECK-NEXT: ret <2 x float> [[SHUF]]
3144 %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3145 %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 2, i32 3>
3146 ret <2 x float> %shuf
3149 define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_raw_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3150 ; CHECK-LABEL: @extract_elt0_elt1_elt2_raw_tbuffer_load_v4f32(
3151 ; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.raw.tbuffer.load.v3f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3152 ; CHECK-NEXT: ret <3 x float> [[DATA]]
3154 %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3155 %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
3156 ret <3 x float> %shuf
3159 define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_raw_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3160 ; CHECK-LABEL: @extract_elt1_elt2_elt3_raw_tbuffer_load_v4f32(
3161 ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3162 ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <3 x i32> <i32 1, i32 2, i32 3>
3163 ; CHECK-NEXT: ret <3 x float> [[SHUF]]
3165 %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3166 %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 1, i32 2, i32 3>
3167 ret <3 x float> %shuf
3170 define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_raw_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3171 ; CHECK-LABEL: @extract_elt0_elt2_elt3_raw_tbuffer_load_v4f32(
3172 ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3173 ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3>
3174 ; CHECK-NEXT: ret <3 x float> [[SHUF]]
3176 %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3177 %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3>
3178 ret <3 x float> %shuf
3181 define amdgpu_ps float @extract_elt0_raw_tbuffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3182 ; CHECK-LABEL: @extract_elt0_raw_tbuffer_load_v3f32(
3183 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.raw.tbuffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3184 ; CHECK-NEXT: ret float [[DATA]]
3186 %data = call <3 x float> @llvm.amdgcn.raw.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3187 %elt0 = extractelement <3 x float> %data, i32 0
3191 define amdgpu_ps float @extract_elt1_raw_tbuffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3192 ; CHECK-LABEL: @extract_elt1_raw_tbuffer_load_v3f32(
3193 ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.tbuffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3194 ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <2 x float> [[DATA]], i64 1
3195 ; CHECK-NEXT: ret float [[ELT1]]
3197 %data = call <3 x float> @llvm.amdgcn.raw.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3198 %elt1 = extractelement <3 x float> %data, i32 1
3202 define amdgpu_ps float @extract_elt2_raw_tbuffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3203 ; CHECK-LABEL: @extract_elt2_raw_tbuffer_load_v3f32(
3204 ; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.raw.tbuffer.load.v3f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3205 ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <3 x float> [[DATA]], i64 2
3206 ; CHECK-NEXT: ret float [[ELT1]]
3208 %data = call <3 x float> @llvm.amdgcn.raw.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3209 %elt1 = extractelement <3 x float> %data, i32 2
3213 define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_tbuffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3214 ; CHECK-LABEL: @extract_elt0_elt1_raw_tbuffer_load_v3f32(
3215 ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.tbuffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3216 ; CHECK-NEXT: ret <2 x float> [[DATA]]
3218 %data = call <3 x float> @llvm.amdgcn.raw.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3219 %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 0, i32 1>
3220 ret <2 x float> %shuf
3223 define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_tbuffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3224 ; CHECK-LABEL: @extract_elt1_elt2_raw_tbuffer_load_v3f32(
3225 ; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.raw.tbuffer.load.v3f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3226 ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <3 x float> [[DATA]], <3 x float> poison, <2 x i32> <i32 1, i32 2>
3227 ; CHECK-NEXT: ret <2 x float> [[SHUF]]
3229 %data = call <3 x float> @llvm.amdgcn.raw.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3230 %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 1, i32 2>
3231 ret <2 x float> %shuf
3234 define i32 @extract0_bitcast_raw_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3235 ; CHECK-LABEL: @extract0_bitcast_raw_tbuffer_load_v4f32(
3236 ; CHECK-NEXT: [[VAR:%.*]] = call float @llvm.amdgcn.raw.tbuffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3237 ; CHECK-NEXT: [[VAR2:%.*]] = bitcast float [[VAR]] to i32
3238 ; CHECK-NEXT: ret i32 [[VAR2]]
3240 %var = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3241 %var1 = bitcast <4 x float> %var to <4 x i32>
3242 %var2 = extractelement <4 x i32> %var1, i32 0
3246 define amdgpu_ps float @preserve_metadata_extract_elt0_raw_tbuffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3247 ; CHECK-LABEL: @preserve_metadata_extract_elt0_raw_tbuffer_load_v2f32(
3248 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.raw.tbuffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0), !fpmath [[META0]]
3249 ; CHECK-NEXT: ret float [[DATA]]
3251 %data = call <2 x float> @llvm.amdgcn.raw.tbuffer.load.v2f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0), !fpmath !0
3252 %elt0 = extractelement <2 x float> %data, i32 0
3256 declare float @llvm.amdgcn.raw.tbuffer.load.f32(<4 x i32>, i32, i32, i32, i32) #1
3257 declare <2 x float> @llvm.amdgcn.raw.tbuffer.load.v2f32(<4 x i32>, i32, i32, i32, i32) #1
3258 declare <3 x float> @llvm.amdgcn.raw.tbuffer.load.v3f32(<4 x i32>, i32, i32, i32, i32) #1
3259 declare <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32>, i32, i32, i32, i32) #1
3261 declare <4 x i32> @llvm.amdgcn.raw.tbuffer.load.v4i32(<4 x i32>, i32, i32, i32, i32) #1
3263 define amdgpu_ps half @extract_elt3_raw_tbuffer_load_v4f16(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3264 ; CHECK-LABEL: @extract_elt3_raw_tbuffer_load_v4f16(
3265 ; CHECK-NEXT: [[DATA:%.*]] = call <4 x half> @llvm.amdgcn.raw.tbuffer.load.v4f16(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3266 ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <4 x half> [[DATA]], i64 3
3267 ; CHECK-NEXT: ret half [[ELT1]]
3269 %data = call <4 x half> @llvm.amdgcn.raw.tbuffer.load.v4f16(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3270 %elt1 = extractelement <4 x half> %data, i32 3
3274 define amdgpu_ps half @extract_elt2_raw_tbuffer_load_v4f16(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3275 ; CHECK-LABEL: @extract_elt2_raw_tbuffer_load_v4f16(
3276 ; CHECK-NEXT: [[DATA:%.*]] = call <3 x half> @llvm.amdgcn.raw.tbuffer.load.v3f16(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3277 ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <3 x half> [[DATA]], i64 2
3278 ; CHECK-NEXT: ret half [[ELT1]]
3280 %data = call <4 x half> @llvm.amdgcn.raw.tbuffer.load.v4f16(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3281 %elt1 = extractelement <4 x half> %data, i32 2
3285 define amdgpu_ps half @extract_elt1_raw_tbuffer_load_v4f16(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3286 ; CHECK-LABEL: @extract_elt1_raw_tbuffer_load_v4f16(
3287 ; CHECK-NEXT: [[DATA:%.*]] = call <2 x half> @llvm.amdgcn.raw.tbuffer.load.v2f16(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3288 ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <2 x half> [[DATA]], i64 1
3289 ; CHECK-NEXT: ret half [[ELT1]]
3291 %data = call <4 x half> @llvm.amdgcn.raw.tbuffer.load.v4f16(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3292 %elt1 = extractelement <4 x half> %data, i32 1
3296 define amdgpu_ps half @extract_elt0_raw_tbuffer_load_v4f16(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3297 ; CHECK-LABEL: @extract_elt0_raw_tbuffer_load_v4f16(
3298 ; CHECK-NEXT: [[DATA:%.*]] = call half @llvm.amdgcn.raw.tbuffer.load.f16(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3299 ; CHECK-NEXT: ret half [[DATA]]
3301 %data = call <4 x half> @llvm.amdgcn.raw.tbuffer.load.v4f16(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3302 %elt1 = extractelement <4 x half> %data, i32 0
3306 declare half @llvm.amdgcn.raw.tbuffer.load.f16(<4 x i32>, i32, i32, i32, i32) #1
3307 declare <2 x half> @llvm.amdgcn.raw.tbuffer.load.v2f16(<4 x i32>, i32, i32, i32, i32) #1
3308 declare <3 x half> @llvm.amdgcn.raw.tbuffer.load.v3f16(<4 x i32>, i32, i32, i32, i32) #1
3309 declare <4 x half> @llvm.amdgcn.raw.tbuffer.load.v4f16(<4 x i32>, i32, i32, i32, i32) #1
3311 ; --------------------------------------------------------------------
3312 ; llvm.amdgcn.raw.ptr.tbuffer.load
3313 ; --------------------------------------------------------------------
3315 define amdgpu_ps float @raw_ptr_tbuffer_load_f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3316 ; CHECK-LABEL: @raw_ptr_tbuffer_load_f32(
3317 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.raw.ptr.tbuffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3318 ; CHECK-NEXT: ret float [[DATA]]
3320 %data = call float @llvm.amdgcn.raw.ptr.tbuffer.load.f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3324 define amdgpu_ps <2 x float> @raw_ptr_tbuffer_load_v2f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3325 ; CHECK-LABEL: @raw_ptr_tbuffer_load_v2f32(
3326 ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3327 ; CHECK-NEXT: ret <2 x float> [[DATA]]
3329 %data = call <2 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v2f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3330 ret <2 x float> %data
3333 define amdgpu_ps <4 x float> @raw_ptr_tbuffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3334 ; CHECK-LABEL: @raw_ptr_tbuffer_load_v4f32(
3335 ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3336 ; CHECK-NEXT: ret <4 x float> [[DATA]]
3338 %data = call <4 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3339 ret <4 x float> %data
3342 define amdgpu_ps float @extract_elt0_raw_ptr_tbuffer_load_v2f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3343 ; CHECK-LABEL: @extract_elt0_raw_ptr_tbuffer_load_v2f32(
3344 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.raw.ptr.tbuffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3345 ; CHECK-NEXT: ret float [[DATA]]
3347 %data = call <2 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v2f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3348 %elt0 = extractelement <2 x float> %data, i32 0
3352 define amdgpu_ps float @extract_elt1_raw_ptr_tbuffer_load_v2f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3353 ; CHECK-LABEL: @extract_elt1_raw_ptr_tbuffer_load_v2f32(
3354 ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3355 ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <2 x float> [[DATA]], i64 1
3356 ; CHECK-NEXT: ret float [[ELT1]]
3358 %data = call <2 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v2f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3359 %elt1 = extractelement <2 x float> %data, i32 1
3363 define amdgpu_ps float @extract_elt0_raw_ptr_tbuffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3364 ; CHECK-LABEL: @extract_elt0_raw_ptr_tbuffer_load_v4f32(
3365 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.raw.ptr.tbuffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3366 ; CHECK-NEXT: ret float [[DATA]]
3368 %data = call <4 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3369 %elt0 = extractelement <4 x float> %data, i32 0
3373 define amdgpu_ps float @extract_elt1_raw_ptr_tbuffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3374 ; CHECK-LABEL: @extract_elt1_raw_ptr_tbuffer_load_v4f32(
3375 ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3376 ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <2 x float> [[DATA]], i64 1
3377 ; CHECK-NEXT: ret float [[ELT1]]
3379 %data = call <4 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3380 %elt1 = extractelement <4 x float> %data, i32 1
3384 define amdgpu_ps float @extract_elt2_raw_ptr_tbuffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3385 ; CHECK-LABEL: @extract_elt2_raw_ptr_tbuffer_load_v4f32(
3386 ; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v3f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3387 ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <3 x float> [[DATA]], i64 2
3388 ; CHECK-NEXT: ret float [[ELT1]]
3390 %data = call <4 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3391 %elt1 = extractelement <4 x float> %data, i32 2
3395 define amdgpu_ps float @extract_elt3_raw_ptr_tbuffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3396 ; CHECK-LABEL: @extract_elt3_raw_ptr_tbuffer_load_v4f32(
3397 ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3398 ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <4 x float> [[DATA]], i64 3
3399 ; CHECK-NEXT: ret float [[ELT1]]
3401 %data = call <4 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3402 %elt1 = extractelement <4 x float> %data, i32 3
3406 define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_ptr_tbuffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3407 ; CHECK-LABEL: @extract_elt0_elt1_raw_ptr_tbuffer_load_v4f32(
3408 ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3409 ; CHECK-NEXT: ret <2 x float> [[DATA]]
3411 %data = call <4 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3412 %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 0, i32 1>
3413 ret <2 x float> %shuf
3416 define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_ptr_tbuffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3417 ; CHECK-LABEL: @extract_elt1_elt2_raw_ptr_tbuffer_load_v4f32(
3418 ; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v3f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3419 ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <3 x float> [[DATA]], <3 x float> poison, <2 x i32> <i32 1, i32 2>
3420 ; CHECK-NEXT: ret <2 x float> [[SHUF]]
3422 %data = call <4 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3423 %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 1, i32 2>
3424 ret <2 x float> %shuf
3427 define amdgpu_ps <2 x float> @extract_elt2_elt3_raw_ptr_tbuffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3428 ; CHECK-LABEL: @extract_elt2_elt3_raw_ptr_tbuffer_load_v4f32(
3429 ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3430 ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
3431 ; CHECK-NEXT: ret <2 x float> [[SHUF]]
3433 %data = call <4 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3434 %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 2, i32 3>
3435 ret <2 x float> %shuf
3438 define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_raw_ptr_tbuffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3439 ; CHECK-LABEL: @extract_elt0_elt1_elt2_raw_ptr_tbuffer_load_v4f32(
3440 ; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v3f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3441 ; CHECK-NEXT: ret <3 x float> [[DATA]]
3443 %data = call <4 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3444 %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
3445 ret <3 x float> %shuf
3448 define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_raw_ptr_tbuffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3449 ; CHECK-LABEL: @extract_elt1_elt2_elt3_raw_ptr_tbuffer_load_v4f32(
3450 ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3451 ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <3 x i32> <i32 1, i32 2, i32 3>
3452 ; CHECK-NEXT: ret <3 x float> [[SHUF]]
3454 %data = call <4 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3455 %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 1, i32 2, i32 3>
3456 ret <3 x float> %shuf
3459 define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_raw_ptr_tbuffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3460 ; CHECK-LABEL: @extract_elt0_elt2_elt3_raw_ptr_tbuffer_load_v4f32(
3461 ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3462 ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3>
3463 ; CHECK-NEXT: ret <3 x float> [[SHUF]]
3465 %data = call <4 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3466 %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3>
3467 ret <3 x float> %shuf
3470 define amdgpu_ps float @extract_elt0_raw_ptr_tbuffer_load_v3f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3471 ; CHECK-LABEL: @extract_elt0_raw_ptr_tbuffer_load_v3f32(
3472 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.raw.ptr.tbuffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3473 ; CHECK-NEXT: ret float [[DATA]]
3475 %data = call <3 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v3f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3476 %elt0 = extractelement <3 x float> %data, i32 0
3480 define amdgpu_ps float @extract_elt1_raw_ptr_tbuffer_load_v3f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3481 ; CHECK-LABEL: @extract_elt1_raw_ptr_tbuffer_load_v3f32(
3482 ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3483 ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <2 x float> [[DATA]], i64 1
3484 ; CHECK-NEXT: ret float [[ELT1]]
3486 %data = call <3 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v3f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3487 %elt1 = extractelement <3 x float> %data, i32 1
3491 define amdgpu_ps float @extract_elt2_raw_ptr_tbuffer_load_v3f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3492 ; CHECK-LABEL: @extract_elt2_raw_ptr_tbuffer_load_v3f32(
3493 ; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v3f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3494 ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <3 x float> [[DATA]], i64 2
3495 ; CHECK-NEXT: ret float [[ELT1]]
3497 %data = call <3 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v3f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3498 %elt1 = extractelement <3 x float> %data, i32 2
3502 define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_ptr_tbuffer_load_v3f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3503 ; CHECK-LABEL: @extract_elt0_elt1_raw_ptr_tbuffer_load_v3f32(
3504 ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3505 ; CHECK-NEXT: ret <2 x float> [[DATA]]
3507 %data = call <3 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v3f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3508 %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 0, i32 1>
3509 ret <2 x float> %shuf
3512 define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_ptr_tbuffer_load_v3f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3513 ; CHECK-LABEL: @extract_elt1_elt2_raw_ptr_tbuffer_load_v3f32(
3514 ; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v3f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3515 ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <3 x float> [[DATA]], <3 x float> poison, <2 x i32> <i32 1, i32 2>
3516 ; CHECK-NEXT: ret <2 x float> [[SHUF]]
3518 %data = call <3 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v3f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3519 %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 1, i32 2>
3520 ret <2 x float> %shuf
3523 define i32 @extract0_bitcast_raw_ptr_tbuffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3524 ; CHECK-LABEL: @extract0_bitcast_raw_ptr_tbuffer_load_v4f32(
3525 ; CHECK-NEXT: [[VAR:%.*]] = call float @llvm.amdgcn.raw.ptr.tbuffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3526 ; CHECK-NEXT: [[VAR2:%.*]] = bitcast float [[VAR]] to i32
3527 ; CHECK-NEXT: ret i32 [[VAR2]]
3529 %var = call <4 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3530 %var1 = bitcast <4 x float> %var to <4 x i32>
3531 %var2 = extractelement <4 x i32> %var1, i32 0
3535 define amdgpu_ps float @preserve_metadata_extract_elt0_raw_ptr_tbuffer_load_v2f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3536 ; CHECK-LABEL: @preserve_metadata_extract_elt0_raw_ptr_tbuffer_load_v2f32(
3537 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.raw.ptr.tbuffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0), !fpmath [[META0]]
3538 ; CHECK-NEXT: ret float [[DATA]]
3540 %data = call <2 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v2f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0), !fpmath !0
3541 %elt0 = extractelement <2 x float> %data, i32 0
3545 declare float @llvm.amdgcn.raw.ptr.tbuffer.load.f32(ptr addrspace(8), i32, i32, i32, i32) #1
3546 declare <2 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v2f32(ptr addrspace(8), i32, i32, i32, i32) #1
3547 declare <3 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v3f32(ptr addrspace(8), i32, i32, i32, i32) #1
3548 declare <4 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f32(ptr addrspace(8), i32, i32, i32, i32) #1
3550 declare <4 x i32> @llvm.amdgcn.raw.ptr.tbuffer.load.v4i32(ptr addrspace(8), i32, i32, i32, i32) #1
3552 define amdgpu_ps half @extract_elt3_raw_ptr_tbuffer_load_v4f16(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3553 ; CHECK-LABEL: @extract_elt3_raw_ptr_tbuffer_load_v4f16(
3554 ; CHECK-NEXT: [[DATA:%.*]] = call <4 x half> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f16(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3555 ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <4 x half> [[DATA]], i64 3
3556 ; CHECK-NEXT: ret half [[ELT1]]
3558 %data = call <4 x half> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f16(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3559 %elt1 = extractelement <4 x half> %data, i32 3
3563 define amdgpu_ps half @extract_elt2_raw_ptr_tbuffer_load_v4f16(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3564 ; CHECK-LABEL: @extract_elt2_raw_ptr_tbuffer_load_v4f16(
3565 ; CHECK-NEXT: [[DATA:%.*]] = call <3 x half> @llvm.amdgcn.raw.ptr.tbuffer.load.v3f16(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3566 ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <3 x half> [[DATA]], i64 2
3567 ; CHECK-NEXT: ret half [[ELT1]]
3569 %data = call <4 x half> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f16(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3570 %elt1 = extractelement <4 x half> %data, i32 2
3574 define amdgpu_ps half @extract_elt1_raw_ptr_tbuffer_load_v4f16(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3575 ; CHECK-LABEL: @extract_elt1_raw_ptr_tbuffer_load_v4f16(
3576 ; CHECK-NEXT: [[DATA:%.*]] = call <2 x half> @llvm.amdgcn.raw.ptr.tbuffer.load.v2f16(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3577 ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <2 x half> [[DATA]], i64 1
3578 ; CHECK-NEXT: ret half [[ELT1]]
3580 %data = call <4 x half> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f16(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3581 %elt1 = extractelement <4 x half> %data, i32 1
3585 define amdgpu_ps half @extract_elt0_raw_ptr_tbuffer_load_v4f16(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
3586 ; CHECK-LABEL: @extract_elt0_raw_ptr_tbuffer_load_v4f16(
3587 ; CHECK-NEXT: [[DATA:%.*]] = call half @llvm.amdgcn.raw.ptr.tbuffer.load.f16(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0)
3588 ; CHECK-NEXT: ret half [[DATA]]
3590 %data = call <4 x half> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f16(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
3591 %elt1 = extractelement <4 x half> %data, i32 0
3595 declare half @llvm.amdgcn.raw.ptr.tbuffer.load.f16(ptr addrspace(8), i32, i32, i32, i32) #1
3596 declare <2 x half> @llvm.amdgcn.raw.ptr.tbuffer.load.v2f16(ptr addrspace(8), i32, i32, i32, i32) #1
3597 declare <3 x half> @llvm.amdgcn.raw.ptr.tbuffer.load.v3f16(ptr addrspace(8), i32, i32, i32, i32) #1
3598 declare <4 x half> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f16(ptr addrspace(8), i32, i32, i32, i32) #1
3600 ; --------------------------------------------------------------------
3601 ; llvm.amdgcn.struct.tbuffer.load
3602 ; --------------------------------------------------------------------
3604 define amdgpu_ps float @struct_tbuffer_load_f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
3605 ; CHECK-LABEL: @struct_tbuffer_load_f32(
3606 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.struct.tbuffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
3607 ; CHECK-NEXT: ret float [[DATA]]
3609 %data = call float @llvm.amdgcn.struct.tbuffer.load.f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
3613 define amdgpu_ps <2 x float> @struct_tbuffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
3614 ; CHECK-LABEL: @struct_tbuffer_load_v2f32(
3615 ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.tbuffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
3616 ; CHECK-NEXT: ret <2 x float> [[DATA]]
3618 %data = call <2 x float> @llvm.amdgcn.struct.tbuffer.load.v2f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
3619 ret <2 x float> %data
3622 define amdgpu_ps <4 x float> @struct_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
3623 ; CHECK-LABEL: @struct_tbuffer_load_v4f32(
3624 ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
3625 ; CHECK-NEXT: ret <4 x float> [[DATA]]
3627 %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
3628 ret <4 x float> %data
3631 define amdgpu_ps float @extract_elt0_struct_tbuffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
3632 ; CHECK-LABEL: @extract_elt0_struct_tbuffer_load_v2f32(
3633 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.struct.tbuffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
3634 ; CHECK-NEXT: ret float [[DATA]]
3636 %data = call <2 x float> @llvm.amdgcn.struct.tbuffer.load.v2f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
3637 %elt0 = extractelement <2 x float> %data, i32 0
3641 define amdgpu_ps float @extract_elt1_struct_tbuffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
3642 ; CHECK-LABEL: @extract_elt1_struct_tbuffer_load_v2f32(
3643 ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.tbuffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
3644 ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <2 x float> [[DATA]], i64 1
3645 ; CHECK-NEXT: ret float [[ELT1]]
3647 %data = call <2 x float> @llvm.amdgcn.struct.tbuffer.load.v2f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
3648 %elt1 = extractelement <2 x float> %data, i32 1
3652 define amdgpu_ps float @extract_elt0_struct_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
3653 ; CHECK-LABEL: @extract_elt0_struct_tbuffer_load_v4f32(
3654 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.struct.tbuffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
3655 ; CHECK-NEXT: ret float [[DATA]]
3657 %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
3658 %elt0 = extractelement <4 x float> %data, i32 0
3662 define amdgpu_ps float @extract_elt1_struct_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
3663 ; CHECK-LABEL: @extract_elt1_struct_tbuffer_load_v4f32(
3664 ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.tbuffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
3665 ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <2 x float> [[DATA]], i64 1
3666 ; CHECK-NEXT: ret float [[ELT1]]
3668 %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
3669 %elt1 = extractelement <4 x float> %data, i32 1
3673 define amdgpu_ps float @extract_elt2_struct_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
3674 ; CHECK-LABEL: @extract_elt2_struct_tbuffer_load_v4f32(
3675 ; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.struct.tbuffer.load.v3f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
3676 ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <3 x float> [[DATA]], i64 2
3677 ; CHECK-NEXT: ret float [[ELT1]]
3679 %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
3680 %elt1 = extractelement <4 x float> %data, i32 2
3684 define amdgpu_ps float @extract_elt3_struct_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
3685 ; CHECK-LABEL: @extract_elt3_struct_tbuffer_load_v4f32(
3686 ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
3687 ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <4 x float> [[DATA]], i64 3
3688 ; CHECK-NEXT: ret float [[ELT1]]
3690 %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
3691 %elt1 = extractelement <4 x float> %data, i32 3
3695 define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
3696 ; CHECK-LABEL: @extract_elt0_elt1_struct_tbuffer_load_v4f32(
3697 ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.tbuffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
3698 ; CHECK-NEXT: ret <2 x float> [[DATA]]
3700 %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
3701 %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 0, i32 1>
3702 ret <2 x float> %shuf
3705 define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
3706 ; CHECK-LABEL: @extract_elt1_elt2_struct_tbuffer_load_v4f32(
3707 ; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.struct.tbuffer.load.v3f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
3708 ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <3 x float> [[DATA]], <3 x float> poison, <2 x i32> <i32 1, i32 2>
3709 ; CHECK-NEXT: ret <2 x float> [[SHUF]]
3711 %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
3712 %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 1, i32 2>
3713 ret <2 x float> %shuf
3716 define amdgpu_ps <2 x float> @extract_elt2_elt3_struct_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
3717 ; CHECK-LABEL: @extract_elt2_elt3_struct_tbuffer_load_v4f32(
3718 ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
3719 ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
3720 ; CHECK-NEXT: ret <2 x float> [[SHUF]]
3722 %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
3723 %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 2, i32 3>
3724 ret <2 x float> %shuf
3727 define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_struct_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
3728 ; CHECK-LABEL: @extract_elt0_elt1_elt2_struct_tbuffer_load_v4f32(
3729 ; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.struct.tbuffer.load.v3f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
3730 ; CHECK-NEXT: ret <3 x float> [[DATA]]
3732 %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
3733 %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
3734 ret <3 x float> %shuf
3737 define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_struct_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
3738 ; CHECK-LABEL: @extract_elt1_elt2_elt3_struct_tbuffer_load_v4f32(
3739 ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
3740 ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <3 x i32> <i32 1, i32 2, i32 3>
3741 ; CHECK-NEXT: ret <3 x float> [[SHUF]]
3743 %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
3744 %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 1, i32 2, i32 3>
3745 ret <3 x float> %shuf
3748 define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_struct_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
3749 ; CHECK-LABEL: @extract_elt0_elt2_elt3_struct_tbuffer_load_v4f32(
3750 ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
3751 ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3>
3752 ; CHECK-NEXT: ret <3 x float> [[SHUF]]
3754 %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
3755 %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3>
3756 ret <3 x float> %shuf
3759 define amdgpu_ps float @extract_elt0_struct_tbuffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
3760 ; CHECK-LABEL: @extract_elt0_struct_tbuffer_load_v3f32(
3761 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.struct.tbuffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
3762 ; CHECK-NEXT: ret float [[DATA]]
3764 %data = call <3 x float> @llvm.amdgcn.struct.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
3765 %elt0 = extractelement <3 x float> %data, i32 0
3769 define amdgpu_ps float @extract_elt1_struct_tbuffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
3770 ; CHECK-LABEL: @extract_elt1_struct_tbuffer_load_v3f32(
3771 ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.tbuffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
3772 ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <2 x float> [[DATA]], i64 1
3773 ; CHECK-NEXT: ret float [[ELT1]]
3775 %data = call <3 x float> @llvm.amdgcn.struct.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
3776 %elt1 = extractelement <3 x float> %data, i32 1
3780 define amdgpu_ps float @extract_elt2_struct_tbuffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
3781 ; CHECK-LABEL: @extract_elt2_struct_tbuffer_load_v3f32(
3782 ; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.struct.tbuffer.load.v3f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
3783 ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <3 x float> [[DATA]], i64 2
3784 ; CHECK-NEXT: ret float [[ELT1]]
3786 %data = call <3 x float> @llvm.amdgcn.struct.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
3787 %elt1 = extractelement <3 x float> %data, i32 2
3791 define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_tbuffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
3792 ; CHECK-LABEL: @extract_elt0_elt1_struct_tbuffer_load_v3f32(
3793 ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.tbuffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
3794 ; CHECK-NEXT: ret <2 x float> [[DATA]]
3796 %data = call <3 x float> @llvm.amdgcn.struct.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
3797 %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 0, i32 1>
3798 ret <2 x float> %shuf
3801 define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_tbuffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
3802 ; CHECK-LABEL: @extract_elt1_elt2_struct_tbuffer_load_v3f32(
3803 ; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.struct.tbuffer.load.v3f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
3804 ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <3 x float> [[DATA]], <3 x float> poison, <2 x i32> <i32 1, i32 2>
3805 ; CHECK-NEXT: ret <2 x float> [[SHUF]]
3807 %data = call <3 x float> @llvm.amdgcn.struct.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
3808 %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 1, i32 2>
3809 ret <2 x float> %shuf
3812 define i32 @extract0_bitcast_struct_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
3813 ; CHECK-LABEL: @extract0_bitcast_struct_tbuffer_load_v4f32(
3814 ; CHECK-NEXT: [[VAR:%.*]] = call float @llvm.amdgcn.struct.tbuffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
3815 ; CHECK-NEXT: [[VAR2:%.*]] = bitcast float [[VAR]] to i32
3816 ; CHECK-NEXT: ret i32 [[VAR2]]
3818 %var = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
3819 %var1 = bitcast <4 x float> %var to <4 x i32>
3820 %var2 = extractelement <4 x i32> %var1, i32 0
3824 define amdgpu_ps float @preserve_metadata_extract_elt0_struct_tbuffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
3825 ; CHECK-LABEL: @preserve_metadata_extract_elt0_struct_tbuffer_load_v2f32(
3826 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.struct.tbuffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0), !fpmath [[META0]]
3827 ; CHECK-NEXT: ret float [[DATA]]
3829 %data = call <2 x float> @llvm.amdgcn.struct.tbuffer.load.v2f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0), !fpmath !0
3830 %elt0 = extractelement <2 x float> %data, i32 0
3834 declare float @llvm.amdgcn.struct.tbuffer.load.f32(<4 x i32>, i32, i32, i32, i32, i32) #1
3835 declare <2 x float> @llvm.amdgcn.struct.tbuffer.load.v2f32(<4 x i32>, i32, i32, i32, i32, i32) #1
3836 declare <3 x float> @llvm.amdgcn.struct.tbuffer.load.v3f32(<4 x i32>, i32, i32, i32, i32, i32) #1
3837 declare <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32>, i32, i32, i32, i32, i32) #1
3839 declare <4 x i32> @llvm.amdgcn.struct.tbuffer.load.v4i32(<4 x i32>, i32, i32, i32, i32, i32) #1
3841 ; --------------------------------------------------------------------
3842 ; llvm.amdgcn.struct.ptr.tbuffer.load
3843 ; --------------------------------------------------------------------
3845 define amdgpu_ps float @struct_ptr_tbuffer_load_f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
3846 ; CHECK-LABEL: @struct_ptr_tbuffer_load_f32(
3847 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.struct.ptr.tbuffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
3848 ; CHECK-NEXT: ret float [[DATA]]
3850 %data = call float @llvm.amdgcn.struct.ptr.tbuffer.load.f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
3854 define amdgpu_ps <2 x float> @struct_ptr_tbuffer_load_v2f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
3855 ; CHECK-LABEL: @struct_ptr_tbuffer_load_v2f32(
3856 ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
3857 ; CHECK-NEXT: ret <2 x float> [[DATA]]
3859 %data = call <2 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v2f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
3860 ret <2 x float> %data
3863 define amdgpu_ps <4 x float> @struct_ptr_tbuffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
3864 ; CHECK-LABEL: @struct_ptr_tbuffer_load_v4f32(
3865 ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v4f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
3866 ; CHECK-NEXT: ret <4 x float> [[DATA]]
3868 %data = call <4 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
3869 ret <4 x float> %data
3872 define amdgpu_ps float @extract_elt0_struct_ptr_tbuffer_load_v2f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
3873 ; CHECK-LABEL: @extract_elt0_struct_ptr_tbuffer_load_v2f32(
3874 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.struct.ptr.tbuffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
3875 ; CHECK-NEXT: ret float [[DATA]]
3877 %data = call <2 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v2f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
3878 %elt0 = extractelement <2 x float> %data, i32 0
3882 define amdgpu_ps float @extract_elt1_struct_ptr_tbuffer_load_v2f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
3883 ; CHECK-LABEL: @extract_elt1_struct_ptr_tbuffer_load_v2f32(
3884 ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
3885 ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <2 x float> [[DATA]], i64 1
3886 ; CHECK-NEXT: ret float [[ELT1]]
3888 %data = call <2 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v2f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
3889 %elt1 = extractelement <2 x float> %data, i32 1
3893 define amdgpu_ps float @extract_elt0_struct_ptr_tbuffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
3894 ; CHECK-LABEL: @extract_elt0_struct_ptr_tbuffer_load_v4f32(
3895 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.struct.ptr.tbuffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
3896 ; CHECK-NEXT: ret float [[DATA]]
3898 %data = call <4 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
3899 %elt0 = extractelement <4 x float> %data, i32 0
3903 define amdgpu_ps float @extract_elt1_struct_ptr_tbuffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
3904 ; CHECK-LABEL: @extract_elt1_struct_ptr_tbuffer_load_v4f32(
3905 ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
3906 ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <2 x float> [[DATA]], i64 1
3907 ; CHECK-NEXT: ret float [[ELT1]]
3909 %data = call <4 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
3910 %elt1 = extractelement <4 x float> %data, i32 1
3914 define amdgpu_ps float @extract_elt2_struct_ptr_tbuffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
3915 ; CHECK-LABEL: @extract_elt2_struct_ptr_tbuffer_load_v4f32(
3916 ; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v3f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
3917 ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <3 x float> [[DATA]], i64 2
3918 ; CHECK-NEXT: ret float [[ELT1]]
3920 %data = call <4 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
3921 %elt1 = extractelement <4 x float> %data, i32 2
3925 define amdgpu_ps float @extract_elt3_struct_ptr_tbuffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
3926 ; CHECK-LABEL: @extract_elt3_struct_ptr_tbuffer_load_v4f32(
3927 ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v4f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
3928 ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <4 x float> [[DATA]], i64 3
3929 ; CHECK-NEXT: ret float [[ELT1]]
3931 %data = call <4 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
3932 %elt1 = extractelement <4 x float> %data, i32 3
3936 define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_ptr_tbuffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
3937 ; CHECK-LABEL: @extract_elt0_elt1_struct_ptr_tbuffer_load_v4f32(
3938 ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
3939 ; CHECK-NEXT: ret <2 x float> [[DATA]]
3941 %data = call <4 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
3942 %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 0, i32 1>
3943 ret <2 x float> %shuf
3946 define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_ptr_tbuffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
3947 ; CHECK-LABEL: @extract_elt1_elt2_struct_ptr_tbuffer_load_v4f32(
3948 ; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v3f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
3949 ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <3 x float> [[DATA]], <3 x float> poison, <2 x i32> <i32 1, i32 2>
3950 ; CHECK-NEXT: ret <2 x float> [[SHUF]]
3952 %data = call <4 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
3953 %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 1, i32 2>
3954 ret <2 x float> %shuf
3957 define amdgpu_ps <2 x float> @extract_elt2_elt3_struct_ptr_tbuffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
3958 ; CHECK-LABEL: @extract_elt2_elt3_struct_ptr_tbuffer_load_v4f32(
3959 ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v4f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
3960 ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
3961 ; CHECK-NEXT: ret <2 x float> [[SHUF]]
3963 %data = call <4 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
3964 %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 2, i32 3>
3965 ret <2 x float> %shuf
3968 define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_struct_ptr_tbuffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
3969 ; CHECK-LABEL: @extract_elt0_elt1_elt2_struct_ptr_tbuffer_load_v4f32(
3970 ; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v3f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
3971 ; CHECK-NEXT: ret <3 x float> [[DATA]]
3973 %data = call <4 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
3974 %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
3975 ret <3 x float> %shuf
3978 define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_struct_ptr_tbuffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
3979 ; CHECK-LABEL: @extract_elt1_elt2_elt3_struct_ptr_tbuffer_load_v4f32(
3980 ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v4f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
3981 ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <3 x i32> <i32 1, i32 2, i32 3>
3982 ; CHECK-NEXT: ret <3 x float> [[SHUF]]
3984 %data = call <4 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
3985 %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 1, i32 2, i32 3>
3986 ret <3 x float> %shuf
3989 define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_struct_ptr_tbuffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
3990 ; CHECK-LABEL: @extract_elt0_elt2_elt3_struct_ptr_tbuffer_load_v4f32(
3991 ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v4f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
3992 ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3>
3993 ; CHECK-NEXT: ret <3 x float> [[SHUF]]
3995 %data = call <4 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
3996 %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3>
3997 ret <3 x float> %shuf
4000 define amdgpu_ps float @extract_elt0_struct_ptr_tbuffer_load_v3f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
4001 ; CHECK-LABEL: @extract_elt0_struct_ptr_tbuffer_load_v3f32(
4002 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.struct.ptr.tbuffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
4003 ; CHECK-NEXT: ret float [[DATA]]
4005 %data = call <3 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v3f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
4006 %elt0 = extractelement <3 x float> %data, i32 0
4010 define amdgpu_ps float @extract_elt1_struct_ptr_tbuffer_load_v3f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
4011 ; CHECK-LABEL: @extract_elt1_struct_ptr_tbuffer_load_v3f32(
4012 ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
4013 ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <2 x float> [[DATA]], i64 1
4014 ; CHECK-NEXT: ret float [[ELT1]]
4016 %data = call <3 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v3f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
4017 %elt1 = extractelement <3 x float> %data, i32 1
4021 define amdgpu_ps float @extract_elt2_struct_ptr_tbuffer_load_v3f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
4022 ; CHECK-LABEL: @extract_elt2_struct_ptr_tbuffer_load_v3f32(
4023 ; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v3f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
4024 ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <3 x float> [[DATA]], i64 2
4025 ; CHECK-NEXT: ret float [[ELT1]]
4027 %data = call <3 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v3f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
4028 %elt1 = extractelement <3 x float> %data, i32 2
4032 define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_ptr_tbuffer_load_v3f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
4033 ; CHECK-LABEL: @extract_elt0_elt1_struct_ptr_tbuffer_load_v3f32(
4034 ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
4035 ; CHECK-NEXT: ret <2 x float> [[DATA]]
4037 %data = call <3 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v3f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
4038 %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 0, i32 1>
4039 ret <2 x float> %shuf
4042 define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_ptr_tbuffer_load_v3f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
4043 ; CHECK-LABEL: @extract_elt1_elt2_struct_ptr_tbuffer_load_v3f32(
4044 ; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v3f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
4045 ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <3 x float> [[DATA]], <3 x float> poison, <2 x i32> <i32 1, i32 2>
4046 ; CHECK-NEXT: ret <2 x float> [[SHUF]]
4048 %data = call <3 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v3f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
4049 %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 1, i32 2>
4050 ret <2 x float> %shuf
4053 define i32 @extract0_bitcast_struct_ptr_tbuffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
4054 ; CHECK-LABEL: @extract0_bitcast_struct_ptr_tbuffer_load_v4f32(
4055 ; CHECK-NEXT: [[VAR:%.*]] = call float @llvm.amdgcn.struct.ptr.tbuffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0)
4056 ; CHECK-NEXT: [[VAR2:%.*]] = bitcast float [[VAR]] to i32
4057 ; CHECK-NEXT: ret i32 [[VAR2]]
4059 %var = call <4 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
4060 %var1 = bitcast <4 x float> %var to <4 x i32>
4061 %var2 = extractelement <4 x i32> %var1, i32 0
4065 define amdgpu_ps float @preserve_metadata_extract_elt0_struct_ptr_tbuffer_load_v2f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
4066 ; CHECK-LABEL: @preserve_metadata_extract_elt0_struct_ptr_tbuffer_load_v2f32(
4067 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.struct.ptr.tbuffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0), !fpmath [[META0]]
4068 ; CHECK-NEXT: ret float [[DATA]]
4070 %data = call <2 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v2f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0), !fpmath !0
4071 %elt0 = extractelement <2 x float> %data, i32 0
4075 declare float @llvm.amdgcn.struct.ptr.tbuffer.load.f32(ptr addrspace(8), i32, i32, i32, i32, i32) #1
4076 declare <2 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v2f32(ptr addrspace(8), i32, i32, i32, i32, i32) #1
4077 declare <3 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v3f32(ptr addrspace(8), i32, i32, i32, i32, i32) #1
4078 declare <4 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v4f32(ptr addrspace(8), i32, i32, i32, i32, i32) #1
4080 declare <4 x i32> @llvm.amdgcn.struct.ptr.tbuffer.load.v4i32(ptr addrspace(8), i32, i32, i32, i32, i32) #1
4082 ; --------------------------------------------------------------------
4083 ; llvm.amdgcn.image.sample
4084 ; --------------------------------------------------------------------
4086 define amdgpu_ps float @extract_elt0_image_sample_1d_v4f32_f32(float %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4087 ; CHECK-LABEL: @extract_elt0_image_sample_1d_v4f32_f32(
4088 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.1d.f32.f32.v8i32.v4i32(i32 1, float [[VADDR:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4089 ; CHECK-NEXT: ret float [[DATA]]
4091 %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 15, float %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4092 %elt0 = extractelement <4 x float> %data, i32 0
4096 ; Check that the intrinsic remains unchanged in the presence of TFE or LWE
4097 define amdgpu_ps float @extract_elt0_image_sample_1d_v4f32_f32_tfe(float %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4098 ; CHECK-LABEL: @extract_elt0_image_sample_1d_v4f32_f32_tfe(
4099 ; CHECK-NEXT: [[DATA:%.*]] = call { <4 x float>, i32 } @llvm.amdgcn.image.sample.1d.sl_v4f32i32s.f32.v8i32.v4i32(i32 15, float [[VADDR:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 1, i32 0)
4100 ; CHECK-NEXT: [[DATA_VEC:%.*]] = extractvalue { <4 x float>, i32 } [[DATA]], 0
4101 ; CHECK-NEXT: [[ELT0:%.*]] = extractelement <4 x float> [[DATA_VEC]], i64 0
4102 ; CHECK-NEXT: ret float [[ELT0]]
4104 %data = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.sl_v4f32i32s.f32.v8i32.v4i32(i32 15, float %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 1, i32 0)
4105 %data.vec = extractvalue {<4 x float>,i32} %data, 0
4106 %elt0 = extractelement <4 x float> %data.vec, i32 0
4110 ; Check that the intrinsic remains unchanged in the presence of TFE or LWE
4111 define amdgpu_ps float @extract_elt0_image_sample_1d_v4f32_f32_lwe(float %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4112 ; CHECK-LABEL: @extract_elt0_image_sample_1d_v4f32_f32_lwe(
4113 ; CHECK-NEXT: [[DATA:%.*]] = call { <4 x float>, i32 } @llvm.amdgcn.image.sample.1d.sl_v4f32i32s.f32.v8i32.v4i32(i32 15, float [[VADDR:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 2, i32 0)
4114 ; CHECK-NEXT: [[DATA_VEC:%.*]] = extractvalue { <4 x float>, i32 } [[DATA]], 0
4115 ; CHECK-NEXT: [[ELT0:%.*]] = extractelement <4 x float> [[DATA_VEC]], i64 0
4116 ; CHECK-NEXT: ret float [[ELT0]]
4118 %data = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.sl_v4f32i32s.f32.v8i32.v4i32(i32 15, float %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 2, i32 0)
4119 %data.vec = extractvalue {<4 x float>,i32} %data, 0
4120 %elt0 = extractelement <4 x float> %data.vec, i32 0
4124 define amdgpu_ps float @extract_elt0_image_sample_2d_v4f32_f32(float %s, float %t, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4125 ; CHECK-LABEL: @extract_elt0_image_sample_2d_v4f32_f32(
4126 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.2d.f32.f32.v8i32.v4i32(i32 1, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4127 ; CHECK-NEXT: ret float [[DATA]]
4129 %data = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32.v8i32.v4i32(i32 15, float %s, float %t, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4130 %elt0 = extractelement <4 x float> %data, i32 0
4134 define amdgpu_ps float @extract_elt0_dmask_0000_image_sample_3d_v4f32_f32(float %s, float %t, float %r, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4135 ; CHECK-LABEL: @extract_elt0_dmask_0000_image_sample_3d_v4f32_f32(
4136 ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32.v8i32.v4i32(i32 0, float [[S:%.*]], float [[T:%.*]], float [[R:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4137 ; CHECK-NEXT: [[ELT0:%.*]] = extractelement <4 x float> [[DATA]], i64 0
4138 ; CHECK-NEXT: ret float [[ELT0]]
4140 %data = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32.v8i32.v4i32(i32 0, float %s, float %t, float %r, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4141 %elt0 = extractelement <4 x float> %data, i32 0
4145 define amdgpu_ps float @extract_elt0_dmask_0001_image_sample_1darray_v4f32_f32(float %s, float %slice, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4146 ; CHECK-LABEL: @extract_elt0_dmask_0001_image_sample_1darray_v4f32_f32(
4147 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.1darray.f32.f32.v8i32.v4i32(i32 1, float [[S:%.*]], float [[SLICE:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4148 ; CHECK-NEXT: ret float [[DATA]]
4150 %data = call <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32.v8i32.v4i32(i32 1, float %s, float %slice, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4151 %elt0 = extractelement <4 x float> %data, i32 0
4155 define amdgpu_ps float @extract_elt0_dmask_0010_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4156 ; CHECK-LABEL: @extract_elt0_dmask_0010_image_sample_1d_v4f32_f32(
4157 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.1d.f32.f32.v8i32.v4i32(i32 2, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4158 ; CHECK-NEXT: ret float [[DATA]]
4160 %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 2, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4161 %elt0 = extractelement <4 x float> %data, i32 0
4165 define amdgpu_ps float @extract_elt0_dmask_0100_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4166 ; CHECK-LABEL: @extract_elt0_dmask_0100_image_sample_1d_v4f32_f32(
4167 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.1d.f32.f32.v8i32.v4i32(i32 4, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4168 ; CHECK-NEXT: ret float [[DATA]]
4170 %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 4, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4171 %elt0 = extractelement <4 x float> %data, i32 0
4175 define amdgpu_ps float @extract_elt0_dmask_1000_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4176 ; CHECK-LABEL: @extract_elt0_dmask_1000_image_sample_1d_v4f32_f32(
4177 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.1d.f32.f32.v8i32.v4i32(i32 8, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4178 ; CHECK-NEXT: ret float [[DATA]]
4180 %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 8, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4181 %elt0 = extractelement <4 x float> %data, i32 0
4185 define amdgpu_ps float @extract_elt0_dmask_1001_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4186 ; CHECK-LABEL: @extract_elt0_dmask_1001_image_sample_1d_v4f32_f32(
4187 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.1d.f32.f32.v8i32.v4i32(i32 1, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4188 ; CHECK-NEXT: ret float [[DATA]]
4190 %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 9, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4191 %elt0 = extractelement <4 x float> %data, i32 0
4195 define amdgpu_ps float @extract_elt0_dmask_0011_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4196 ; CHECK-LABEL: @extract_elt0_dmask_0011_image_sample_1d_v4f32_f32(
4197 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.1d.f32.f32.v8i32.v4i32(i32 1, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4198 ; CHECK-NEXT: ret float [[DATA]]
4200 %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 3, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4201 %elt0 = extractelement <4 x float> %data, i32 0
4205 define amdgpu_ps float @extract_elt0_dmask_0111_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4206 ; CHECK-LABEL: @extract_elt0_dmask_0111_image_sample_1d_v4f32_f32(
4207 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.1d.f32.f32.v8i32.v4i32(i32 1, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4208 ; CHECK-NEXT: ret float [[DATA]]
4210 %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 7, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4211 %elt0 = extractelement <4 x float> %data, i32 0
4215 define amdgpu_ps <2 x float> @extract_elt0_elt1_dmask_0001_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4216 ; CHECK-LABEL: @extract_elt0_elt1_dmask_0001_image_sample_1d_v4f32_f32(
4217 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.1d.f32.f32.v8i32.v4i32(i32 1, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4218 ; CHECK-NEXT: [[SHUF:%.*]] = insertelement <2 x float> poison, float [[DATA]], i64 0
4219 ; CHECK-NEXT: ret <2 x float> [[SHUF]]
4221 %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 1, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4222 %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 0, i32 1>
4223 ret <2 x float> %shuf
4226 define amdgpu_ps <2 x float> @extract_elt0_elt1_dmask_0011_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4227 ; CHECK-LABEL: @extract_elt0_elt1_dmask_0011_image_sample_1d_v4f32_f32(
4228 ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32.v8i32.v4i32(i32 3, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4229 ; CHECK-NEXT: ret <2 x float> [[DATA]]
4231 %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 3, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4232 %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 0, i32 1>
4233 ret <2 x float> %shuf
4236 define amdgpu_ps <2 x float> @extract_elt0_elt1_dmask_0111_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4237 ; CHECK-LABEL: @extract_elt0_elt1_dmask_0111_image_sample_1d_v4f32_f32(
4238 ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32.v8i32.v4i32(i32 3, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4239 ; CHECK-NEXT: ret <2 x float> [[DATA]]
4241 %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 7, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4242 %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 0, i32 1>
4243 ret <2 x float> %shuf
4246 define amdgpu_ps <2 x float> @extract_elt0_elt1_dmask_0101_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4247 ; CHECK-LABEL: @extract_elt0_elt1_dmask_0101_image_sample_1d_v4f32_f32(
4248 ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32.v8i32.v4i32(i32 5, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4249 ; CHECK-NEXT: ret <2 x float> [[DATA]]
4251 %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 5, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4252 %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 0, i32 1>
4253 ret <2 x float> %shuf
4256 define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_dmask_0001_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4257 ; CHECK-LABEL: @extract_elt0_elt1_elt2_dmask_0001_image_sample_1d_v4f32_f32(
4258 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.1d.f32.f32.v8i32.v4i32(i32 1, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4259 ; CHECK-NEXT: [[SHUF:%.*]] = insertelement <3 x float> poison, float [[DATA]], i64 0
4260 ; CHECK-NEXT: ret <3 x float> [[SHUF]]
4262 %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 1, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4263 %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
4264 ret <3 x float> %shuf
4267 define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_dmask_0011_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4268 ; CHECK-LABEL: @extract_elt0_elt1_elt2_dmask_0011_image_sample_1d_v4f32_f32(
4269 ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32.v8i32.v4i32(i32 3, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4270 ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <2 x float> [[DATA]], <2 x float> poison, <3 x i32> <i32 0, i32 1, i32 poison>
4271 ; CHECK-NEXT: ret <3 x float> [[SHUF]]
4273 %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 3, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4274 %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
4275 ret <3 x float> %shuf
4278 define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_dmask_0101_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4279 ; CHECK-LABEL: @extract_elt0_elt1_elt2_dmask_0101_image_sample_1d_v4f32_f32(
4280 ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32.v8i32.v4i32(i32 5, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4281 ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <2 x float> [[DATA]], <2 x float> poison, <3 x i32> <i32 0, i32 1, i32 poison>
4282 ; CHECK-NEXT: ret <3 x float> [[SHUF]]
4284 %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 5, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4285 %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
4286 ret <3 x float> %shuf
4289 define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_dmask_0111_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4290 ; CHECK-LABEL: @extract_elt0_elt1_elt2_dmask_0111_image_sample_1d_v4f32_f32(
4291 ; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.image.sample.1d.v3f32.f32.v8i32.v4i32(i32 7, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4292 ; CHECK-NEXT: ret <3 x float> [[DATA]]
4294 %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 7, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4295 %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
4296 ret <3 x float> %shuf
4299 define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_dmask_1111_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4300 ; CHECK-LABEL: @extract_elt0_elt1_elt2_dmask_1111_image_sample_1d_v4f32_f32(
4301 ; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.image.sample.1d.v3f32.f32.v8i32.v4i32(i32 7, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4302 ; CHECK-NEXT: ret <3 x float> [[DATA]]
4304 %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 15, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4305 %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
4306 ret <3 x float> %shuf
4309 declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4310 declare {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.sl_v4f32i32s.f32.v8i32.v4i32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4311 declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32.v8i32.v4i32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4312 declare <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32.v8i32.v4i32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4313 declare <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32.v8i32.v4i32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4315 ; --------------------------------------------------------------------
4316 ; llvm.amdgcn.image.sample.cl
4317 ; --------------------------------------------------------------------
4319 define amdgpu_ps float @extract_elt1_image_sample_cl_2darray_v4f32_f32(float %s, float %t, float %slice, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4320 ; CHECK-LABEL: @extract_elt1_image_sample_cl_2darray_v4f32_f32(
4321 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.cl.2darray.f32.f32.v8i32.v4i32(i32 2, float [[S:%.*]], float [[T:%.*]], float [[SLICE:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4322 ; CHECK-NEXT: ret float [[DATA]]
4324 %data = call <4 x float> @llvm.amdgcn.image.sample.cl.2darray.v4f32.f32.v8i32.v4i32(i32 15, float %s, float %t, float %slice, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4325 %elt0 = extractelement <4 x float> %data, i32 1
4329 declare <4 x float> @llvm.amdgcn.image.sample.cl.2darray.v4f32.f32.v8i32.v4i32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4331 ; --------------------------------------------------------------------
4332 ; llvm.amdgcn.image.sample.d
4333 ; --------------------------------------------------------------------
4335 define amdgpu_ps float @extract_elt2_image_sample_d_cube_v4f32_f32_f32(float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %face, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4336 ; CHECK-LABEL: @extract_elt2_image_sample_d_cube_v4f32_f32_f32(
4337 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.d.cube.f32.f32.f32.v8i32.v4i32(i32 4, float [[DSDH:%.*]], float [[DTDH:%.*]], float [[DSDV:%.*]], float [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[FACE:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4338 ; CHECK-NEXT: ret float [[DATA]]
4340 %data = call <4 x float> @llvm.amdgcn.image.sample.d.cube.v4f32.f32.f32.v8i32.v4i32(i32 15, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %face, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4341 %elt0 = extractelement <4 x float> %data, i32 2
4345 declare <4 x float> @llvm.amdgcn.image.sample.d.cube.v4f32.f32.f32.v8i32.v4i32(i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4347 ; --------------------------------------------------------------------
4348 ; llvm.amdgcn.image.sample.d.cl
4349 ; --------------------------------------------------------------------
4351 define amdgpu_ps float @extract_elt3_image_sample_d_cl_1d_v4f32_f32_f32(float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4352 ; CHECK-LABEL: @extract_elt3_image_sample_d_cl_1d_v4f32_f32_f32(
4353 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.d.cl.1d.f32.f32.f32.v8i32.v4i32(i32 8, float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4354 ; CHECK-NEXT: ret float [[DATA]]
4356 %data = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4357 %elt0 = extractelement <4 x float> %data, i32 3
4361 declare <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f32.f32.v8i32.v4i32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4363 ; --------------------------------------------------------------------
4364 ; llvm.amdgcn.image.sample.l
4365 ; --------------------------------------------------------------------
4367 define amdgpu_ps float @extract_elt1_dmask_0110_image_sample_l_1d_v2f32_f32(float %s, float %lod, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4368 ; CHECK-LABEL: @extract_elt1_dmask_0110_image_sample_l_1d_v2f32_f32(
4369 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.l.1d.f32.f32.v8i32.v4i32(i32 4, float [[S:%.*]], float [[LOD:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4370 ; CHECK-NEXT: ret float [[DATA]]
4372 %data = call <2 x float> @llvm.amdgcn.image.sample.l.1d.v2f32.f32.v8i32.v4i32(i32 6, float %s, float %lod, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4373 %elt0 = extractelement <2 x float> %data, i32 1
4377 declare <2 x float> @llvm.amdgcn.image.sample.l.1d.v2f32.f32.v8i32.v4i32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4379 ; --------------------------------------------------------------------
4380 ; llvm.amdgcn.image.sample.b
4381 ; --------------------------------------------------------------------
4383 define amdgpu_ps float @extract_elt1_dmask_1001_image_sample_b_1d_v4f32_f32_f32(float %bias, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4384 ; CHECK-LABEL: @extract_elt1_dmask_1001_image_sample_b_1d_v4f32_f32_f32(
4385 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.b.1d.f32.f32.f32.v8i32.v4i32(i32 8, float [[BIAS:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4386 ; CHECK-NEXT: ret float [[DATA]]
4388 %data = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f32.v8i32.v4i32(i32 9, float %bias, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4389 %elt0 = extractelement <4 x float> %data, i32 1
4393 declare <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f32.v8i32.v4i32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4395 ; --------------------------------------------------------------------
4396 ; llvm.amdgcn.image.sample.b.cl
4397 ; --------------------------------------------------------------------
4399 define amdgpu_ps <2 x float> @extract_elt1_elt2_dmask_1101_image_sample_b_cl_1d_v4f32_f32_f32(float %bias, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4400 ; CHECK-LABEL: @extract_elt1_elt2_dmask_1101_image_sample_b_cl_1d_v4f32_f32_f32(
4401 ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.image.sample.b.cl.1d.v2f32.f32.f32.v8i32.v4i32(i32 12, float [[BIAS:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4402 ; CHECK-NEXT: ret <2 x float> [[DATA]]
4404 %data = call <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f32.v8i32.v4i32(i32 13, float %bias, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4405 %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 1, i32 2>
4406 ret <2 x float> %shuf
4409 declare <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f32.v8i32.v4i32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4411 ; --------------------------------------------------------------------
4412 ; llvm.amdgcn.image.sample.lz
4413 ; --------------------------------------------------------------------
4415 define amdgpu_ps <2 x float> @extract_elt1_elt3_image_sample_lz_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4416 ; CHECK-LABEL: @extract_elt1_elt3_image_sample_lz_1d_v4f32_f32(
4417 ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.image.sample.lz.1d.v2f32.f32.v8i32.v4i32(i32 10, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4418 ; CHECK-NEXT: ret <2 x float> [[DATA]]
4420 %data = call <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f32.v8i32.v4i32(i32 15, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4421 %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 1, i32 3>
4422 ret <2 x float> %shuf
4425 declare <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f32.v8i32.v4i32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4427 ; --------------------------------------------------------------------
4428 ; llvm.amdgcn.image.sample.cd
4429 ; --------------------------------------------------------------------
4431 define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_image_sample_cd_1d_v4f32_f32_f32(float %dsdh, float %dsdv, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4432 ; CHECK-LABEL: @extract_elt1_elt2_elt3_image_sample_cd_1d_v4f32_f32_f32(
4433 ; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.image.sample.cd.1d.v3f32.f32.f32.v8i32.v4i32(i32 14, float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4434 ; CHECK-NEXT: ret <3 x float> [[DATA]]
4436 %data = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4437 %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 1, i32 2, i32 3>
4438 ret <3 x float> %shuf
4441 declare <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f32.f32.v8i32.v4i32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4443 ; --------------------------------------------------------------------
4444 ; llvm.amdgcn.image.sample.cd.cl
4445 ; --------------------------------------------------------------------
4447 define amdgpu_ps half @extract_elt3_image_sample_cd_cl_1d_v4f16_f32_f32(float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4448 ; CHECK-LABEL: @extract_elt3_image_sample_cd_cl_1d_v4f16_f32_f32(
4449 ; CHECK-NEXT: [[DATA:%.*]] = call half @llvm.amdgcn.image.sample.cd.cl.1d.f16.f32.f32.v8i32.v4i32(i32 8, float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4450 ; CHECK-NEXT: ret half [[DATA]]
4452 %data = call <4 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v4f16.f32.f32.v8i32.v4i32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4453 %elt0 = extractelement <4 x half> %data, i32 3
4457 define amdgpu_ps half @extract_elt2_image_sample_cd_cl_1d_v4f16_f32_f32(float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4458 ; CHECK-LABEL: @extract_elt2_image_sample_cd_cl_1d_v4f16_f32_f32(
4459 ; CHECK-NEXT: [[DATA:%.*]] = call half @llvm.amdgcn.image.sample.cd.cl.1d.f16.f32.f32.v8i32.v4i32(i32 4, float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4460 ; CHECK-NEXT: ret half [[DATA]]
4462 %data = call <4 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v4f16.f32.f32.v8i32.v4i32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4463 %elt0 = extractelement <4 x half> %data, i32 2
4467 define amdgpu_ps half @extract_elt1_image_sample_cd_cl_1d_v4f16_f32_f32(float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4468 ; CHECK-LABEL: @extract_elt1_image_sample_cd_cl_1d_v4f16_f32_f32(
4469 ; CHECK-NEXT: [[DATA:%.*]] = call half @llvm.amdgcn.image.sample.cd.cl.1d.f16.f32.f32.v8i32.v4i32(i32 2, float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4470 ; CHECK-NEXT: ret half [[DATA]]
4472 %data = call <4 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v4f16.f32.f32.v8i32.v4i32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4473 %elt0 = extractelement <4 x half> %data, i32 1
4477 define amdgpu_ps <4 x half> @extract_elt_to3_image_sample_cd_cl_1d_v4f16_f32_f32(float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4478 ; CHECK-LABEL: @extract_elt_to3_image_sample_cd_cl_1d_v4f16_f32_f32(
4479 ; CHECK-NEXT: [[DATA:%.*]] = call <3 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v3f16.f32.f32.v8i32.v4i32(i32 7, float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4480 ; CHECK-NEXT: [[RES:%.*]] = shufflevector <3 x half> [[DATA]], <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
4481 ; CHECK-NEXT: ret <4 x half> [[RES]]
4483 %data = call <4 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v4f16.f32.f32.v8i32.v4i32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4484 %res = shufflevector <4 x half> %data, <4 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
4488 define amdgpu_ps <4 x half> @extract_elt_to2_image_sample_cd_cl_1d_v4f16_f32_f32(float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4489 ; CHECK-LABEL: @extract_elt_to2_image_sample_cd_cl_1d_v4f16_f32_f32(
4490 ; CHECK-NEXT: [[DATA:%.*]] = call <2 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v2f16.f32.f32.v8i32.v4i32(i32 3, float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4491 ; CHECK-NEXT: [[RES:%.*]] = shufflevector <2 x half> [[DATA]], <2 x half> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
4492 ; CHECK-NEXT: ret <4 x half> [[RES]]
4494 %data = call <4 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v4f16.f32.f32.v8i32.v4i32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4495 %res = shufflevector <4 x half> %data, <4 x half> poison, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
4499 define amdgpu_ps <4 x half> @extract_elt_to1_image_sample_cd_cl_1d_v4f16_f32_f32(float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4500 ; CHECK-LABEL: @extract_elt_to1_image_sample_cd_cl_1d_v4f16_f32_f32(
4501 ; CHECK-NEXT: [[DATA:%.*]] = call half @llvm.amdgcn.image.sample.cd.cl.1d.f16.f32.f32.v8i32.v4i32(i32 1, float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4502 ; CHECK-NEXT: [[RES:%.*]] = insertelement <4 x half> poison, half [[DATA]], i64 0
4503 ; CHECK-NEXT: ret <4 x half> [[RES]]
4505 %data = call <4 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v4f16.f32.f32.v8i32.v4i32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4506 %res = shufflevector <4 x half> %data, <4 x half> poison, <4 x i32> <i32 0, i32 4, i32 5, i32 6>
4510 define amdgpu_ps half @extract_elt0_image_sample_cd_cl_1d_v4f16_f32_f32(float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4511 ; CHECK-LABEL: @extract_elt0_image_sample_cd_cl_1d_v4f16_f32_f32(
4512 ; CHECK-NEXT: [[DATA:%.*]] = call half @llvm.amdgcn.image.sample.cd.cl.1d.f16.f32.f32.v8i32.v4i32(i32 1, float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4513 ; CHECK-NEXT: ret half [[DATA]]
4515 %data = call <4 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v4f16.f32.f32.v8i32.v4i32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4516 %elt0 = extractelement <4 x half> %data, i32 0
4520 declare <4 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v4f16.f32.f32.v8i32.v4i32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4522 ; --------------------------------------------------------------------
4523 ; llvm.amdgcn.image.sample.c
4524 ; --------------------------------------------------------------------
4526 define amdgpu_ps float @extract_elt0_image_sample_c_1d_v4f32_f32(float %zcompare, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4527 ; CHECK-LABEL: @extract_elt0_image_sample_c_1d_v4f32_f32(
4528 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.1d.f32.f32.v8i32.v4i32(i32 1, float [[ZCOMPARE:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4529 ; CHECK-NEXT: ret float [[DATA]]
4531 %data = call <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32.v8i32.v4i32(i32 15, float %zcompare, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4532 %elt0 = extractelement <4 x float> %data, i32 0
4536 declare <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32.v8i32.v4i32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4538 ; --------------------------------------------------------------------
4539 ; llvm.amdgcn.image.sample.c.cl
4540 ; --------------------------------------------------------------------
4542 define amdgpu_ps float @extract_elt0_image_sample_c_cl_1d_v4f32_f32(float %zcompare, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4543 ; CHECK-LABEL: @extract_elt0_image_sample_c_cl_1d_v4f32_f32(
4544 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.cl.1d.f32.f32.v8i32.v4i32(i32 1, float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4545 ; CHECK-NEXT: ret float [[DATA]]
4547 %data = call <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f32.v8i32.v4i32(i32 15, float %zcompare, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4548 %elt0 = extractelement <4 x float> %data, i32 0
4552 declare <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f32.v8i32.v4i32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4554 ; --------------------------------------------------------------------
4555 ; llvm.amdgcn.image.sample.c.d
4556 ; --------------------------------------------------------------------
4558 define amdgpu_ps float @extract_elt0_image_sample_c_d_1d_v4f32_f32_f32(float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4559 ; CHECK-LABEL: @extract_elt0_image_sample_c_d_1d_v4f32_f32_f32(
4560 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.d.1d.f32.f32.f32.v8i32.v4i32(i32 1, float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4561 ; CHECK-NEXT: ret float [[DATA]]
4563 %data = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4564 %elt0 = extractelement <4 x float> %data, i32 0
4568 declare <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f32.v8i32.v4i32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4570 ; --------------------------------------------------------------------
4571 ; llvm.amdgcn.image.sample.c.d.cl
4572 ; --------------------------------------------------------------------
4574 define amdgpu_ps float @extract_elt0_image_sample_c_d_cl_1d_v4f32_f32_f32(float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4575 ; CHECK-LABEL: @extract_elt0_image_sample_c_d_cl_1d_v4f32_f32_f32(
4576 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.d.cl.1d.f32.f32.f32.v8i32.v4i32(i32 1, float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4577 ; CHECK-NEXT: ret float [[DATA]]
4579 %data = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4580 %elt0 = extractelement <4 x float> %data, i32 0
4584 declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f32.v8i32.v4i32(i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4586 ; --------------------------------------------------------------------
4587 ; llvm.amdgcn.image.sample.c.l
4588 ; --------------------------------------------------------------------
4590 define amdgpu_ps float @extract_elt0_image_sample_c_l_1d_v4f32_f32(float %zcompare, float %s, float %lod, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4591 ; CHECK-LABEL: @extract_elt0_image_sample_c_l_1d_v4f32_f32(
4592 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.l.1d.f32.f32.v8i32.v4i32(i32 1, float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[LOD:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4593 ; CHECK-NEXT: ret float [[DATA]]
4595 %data = call <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f32.v8i32.v4i32(i32 15, float %zcompare, float %s, float %lod, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4596 %elt0 = extractelement <4 x float> %data, i32 0
4600 declare <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f32.v8i32.v4i32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4602 ; --------------------------------------------------------------------
4603 ; llvm.amdgcn.image.sample.c.b
4604 ; --------------------------------------------------------------------
4606 define amdgpu_ps float @extract_elt0_image_sample_c_b_1d_v4f32_f32_f32(float %bias, float %zcompare, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4607 ; CHECK-LABEL: @extract_elt0_image_sample_c_b_1d_v4f32_f32_f32(
4608 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.b.1d.f32.f32.f32.v8i32.v4i32(i32 1, float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4609 ; CHECK-NEXT: ret float [[DATA]]
4611 %data = call <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %bias, float %zcompare, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4612 %elt0 = extractelement <4 x float> %data, i32 0
4616 declare <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f32.v8i32.v4i32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4618 ; --------------------------------------------------------------------
4619 ; llvm.amdgcn.image.sample.c.b.cl
4620 ; --------------------------------------------------------------------
4622 define amdgpu_ps float @extract_elt0_image_sample_c_b_cl_1d_v4f32_f32_f32(float %bias, float %zcompare, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4623 ; CHECK-LABEL: @extract_elt0_image_sample_c_b_cl_1d_v4f32_f32_f32(
4624 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.b.cl.1d.f32.f32.f32.v8i32.v4i32(i32 1, float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4625 ; CHECK-NEXT: ret float [[DATA]]
4627 %data = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %bias, float %zcompare, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4628 %elt0 = extractelement <4 x float> %data, i32 0
4632 declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f32.v8i32.v4i32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4634 ; --------------------------------------------------------------------
4635 ; llvm.amdgcn.image.sample.c.lz
4636 ; --------------------------------------------------------------------
4638 define amdgpu_ps float @extract_elt0_image_sample_c_lz_1d_v4f32_f32(float %zcompare, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4639 ; CHECK-LABEL: @extract_elt0_image_sample_c_lz_1d_v4f32_f32(
4640 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.lz.1d.f32.f32.v8i32.v4i32(i32 1, float [[ZCOMPARE:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4641 ; CHECK-NEXT: ret float [[DATA]]
4643 %data = call <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f32.v8i32.v4i32(i32 15, float %zcompare, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4644 %elt0 = extractelement <4 x float> %data, i32 0
4648 declare <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f32.v8i32.v4i32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4650 ; --------------------------------------------------------------------
4651 ; llvm.amdgcn.image.sample.c.cd
4652 ; --------------------------------------------------------------------
4654 define amdgpu_ps float @extract_elt0_image_sample_c_cd_1d_v4f32_f32_f32(float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4655 ; CHECK-LABEL: @extract_elt0_image_sample_c_cd_1d_v4f32_f32_f32(
4656 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.cd.1d.f32.f32.f32.v8i32.v4i32(i32 1, float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4657 ; CHECK-NEXT: ret float [[DATA]]
4659 %data = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4660 %elt0 = extractelement <4 x float> %data, i32 0
4664 declare <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f32.v8i32.v4i32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4666 ; --------------------------------------------------------------------
4667 ; llvm.amdgcn.image.sample.c.cd.cl
4668 ; --------------------------------------------------------------------
4670 define amdgpu_ps float @extract_elt0_image_sample_c_cd_cl_1d_v4f32_f32_f32(float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4671 ; CHECK-LABEL: @extract_elt0_image_sample_c_cd_cl_1d_v4f32_f32_f32(
4672 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.cd.cl.1d.f32.f32.f32.v8i32.v4i32(i32 1, float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4673 ; CHECK-NEXT: ret float [[DATA]]
4675 %data = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4676 %elt0 = extractelement <4 x float> %data, i32 0
4680 declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f32.v8i32.v4i32(i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4682 ; --------------------------------------------------------------------
4683 ; llvm.amdgcn.image.sample.o
4684 ; --------------------------------------------------------------------
4686 define amdgpu_ps float @extract_elt0_image_sample_o_1d_v4f32_f32(i32 %offset, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4687 ; CHECK-LABEL: @extract_elt0_image_sample_o_1d_v4f32_f32(
4688 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.o.1d.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4689 ; CHECK-NEXT: ret float [[DATA]]
4691 %data = call <4 x float> @llvm.amdgcn.image.sample.o.1d.v4f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4692 %elt0 = extractelement <4 x float> %data, i32 0
4696 declare <4 x float> @llvm.amdgcn.image.sample.o.1d.v4f32.f32.v8i32.v4i32(i32, i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4698 ; --------------------------------------------------------------------
4699 ; llvm.amdgcn.image.sample.cl.o
4700 ; --------------------------------------------------------------------
4702 define amdgpu_ps float @extract_elt0_image_sample_cl_o_1d_v4f32_f32(i32 %offset, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4703 ; CHECK-LABEL: @extract_elt0_image_sample_cl_o_1d_v4f32_f32(
4704 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.cl.o.1d.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4705 ; CHECK-NEXT: ret float [[DATA]]
4707 %data = call <4 x float> @llvm.amdgcn.image.sample.cl.o.1d.v4f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4708 %elt0 = extractelement <4 x float> %data, i32 0
4712 declare <4 x float> @llvm.amdgcn.image.sample.cl.o.1d.v4f32.f32.v8i32.v4i32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4714 ; --------------------------------------------------------------------
4715 ; llvm.amdgcn.image.sample.d.o
4716 ; --------------------------------------------------------------------
4718 define amdgpu_ps float @extract_elt0_image_sample_d_o_1d_v4f32_f32_f32(i32 %offset, float %dsdh, float %dsdv, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4719 ; CHECK-LABEL: @extract_elt0_image_sample_d_o_1d_v4f32_f32_f32(
4720 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.d.o.1d.f32.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4721 ; CHECK-NEXT: ret float [[DATA]]
4723 %data = call <4 x float> @llvm.amdgcn.image.sample.d.o.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4724 %elt0 = extractelement <4 x float> %data, i32 0
4728 declare <4 x float> @llvm.amdgcn.image.sample.d.o.1d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4730 ; --------------------------------------------------------------------
4731 ; llvm.amdgcn.image.sample.d.cl.o
4732 ; --------------------------------------------------------------------
4734 define amdgpu_ps float @extract_elt0_image_sample_d_cl_o_1d_v4f32_f32_f32(i32 %offset, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4735 ; CHECK-LABEL: @extract_elt0_image_sample_d_cl_o_1d_v4f32_f32_f32(
4736 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.d.cl.o.1d.f32.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4737 ; CHECK-NEXT: ret float [[DATA]]
4739 %data = call <4 x float> @llvm.amdgcn.image.sample.d.cl.o.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4740 %elt0 = extractelement <4 x float> %data, i32 0
4744 declare <4 x float> @llvm.amdgcn.image.sample.d.cl.o.1d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4746 ; --------------------------------------------------------------------
4747 ; llvm.amdgcn.image.sample.l.o
4748 ; --------------------------------------------------------------------
4750 define amdgpu_ps float @extract_elt0_image_sample_l_o_1d_v4f32_f32(i32 %offset, float %s, float %lod, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4751 ; CHECK-LABEL: @extract_elt0_image_sample_l_o_1d_v4f32_f32(
4752 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.l.o.1d.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[S:%.*]], float [[LOD:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4753 ; CHECK-NEXT: ret float [[DATA]]
4755 %data = call <4 x float> @llvm.amdgcn.image.sample.l.o.1d.v4f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %s, float %lod, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4756 %elt0 = extractelement <4 x float> %data, i32 0
4760 declare <4 x float> @llvm.amdgcn.image.sample.l.o.1d.v4f32.f32.v8i32.v4i32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4762 ; --------------------------------------------------------------------
4763 ; llvm.amdgcn.image.sample.b.o
4764 ; --------------------------------------------------------------------
4766 define amdgpu_ps float @extract_elt0_image_sample_b_o_1d_v4f32_f32_f32(i32 %offset, float %bias, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4767 ; CHECK-LABEL: @extract_elt0_image_sample_b_o_1d_v4f32_f32_f32(
4768 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.b.o.1d.f32.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[BIAS:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4769 ; CHECK-NEXT: ret float [[DATA]]
4771 %data = call <4 x float> @llvm.amdgcn.image.sample.b.o.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %bias, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4772 %elt0 = extractelement <4 x float> %data, i32 0
4776 declare <4 x float> @llvm.amdgcn.image.sample.b.o.1d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4778 ; --------------------------------------------------------------------
4779 ; llvm.amdgcn.image.sample.b.cl.o
4780 ; --------------------------------------------------------------------
4782 define amdgpu_ps float @extract_elt0_image_sample_b_cl_o_1d_v4f32_f32_f32(i32 %offset, float %bias, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4783 ; CHECK-LABEL: @extract_elt0_image_sample_b_cl_o_1d_v4f32_f32_f32(
4784 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.b.cl.o.1d.f32.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[BIAS:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4785 ; CHECK-NEXT: ret float [[DATA]]
4787 %data = call <4 x float> @llvm.amdgcn.image.sample.b.cl.o.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %bias, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4788 %elt0 = extractelement <4 x float> %data, i32 0
4792 declare <4 x float> @llvm.amdgcn.image.sample.b.cl.o.1d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4794 ; --------------------------------------------------------------------
4795 ; llvm.amdgcn.image.sample.lz.o
4796 ; --------------------------------------------------------------------
4798 define amdgpu_ps float @extract_elt0_image_sample_lz_o_1d_v4f32_f32(i32 %offset, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4799 ; CHECK-LABEL: @extract_elt0_image_sample_lz_o_1d_v4f32_f32(
4800 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.lz.o.1d.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4801 ; CHECK-NEXT: ret float [[DATA]]
4803 %data = call <4 x float> @llvm.amdgcn.image.sample.lz.o.1d.v4f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4804 %elt0 = extractelement <4 x float> %data, i32 0
4808 declare <4 x float> @llvm.amdgcn.image.sample.lz.o.1d.v4f32.f32.v8i32.v4i32(i32, i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4810 ; --------------------------------------------------------------------
4811 ; llvm.amdgcn.image.sample.cd.o
4812 ; --------------------------------------------------------------------
4814 define amdgpu_ps float @extract_elt0_image_sample_cd_o_1d_v4f32_f32_f32(i32 %offset, float %dsdh, float %dsdv, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4815 ; CHECK-LABEL: @extract_elt0_image_sample_cd_o_1d_v4f32_f32_f32(
4816 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.cd.o.1d.f32.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4817 ; CHECK-NEXT: ret float [[DATA]]
4819 %data = call <4 x float> @llvm.amdgcn.image.sample.cd.o.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4820 %elt0 = extractelement <4 x float> %data, i32 0
4824 declare <4 x float> @llvm.amdgcn.image.sample.cd.o.1d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4826 ; --------------------------------------------------------------------
4827 ; llvm.amdgcn.image.sample.cd.cl.o
4828 ; --------------------------------------------------------------------
4830 define amdgpu_ps float @extract_elt0_image_sample_cd_cl_o_1d_v4f32_f32_f32(i32 %offset, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4831 ; CHECK-LABEL: @extract_elt0_image_sample_cd_cl_o_1d_v4f32_f32_f32(
4832 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.cd.cl.o.1d.f32.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4833 ; CHECK-NEXT: ret float [[DATA]]
4835 %data = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.o.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4836 %elt0 = extractelement <4 x float> %data, i32 0
4840 declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.o.1d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4842 ; --------------------------------------------------------------------
4843 ; llvm.amdgcn.image.sample.c.o
4844 ; --------------------------------------------------------------------
4846 define amdgpu_ps float @extract_elt0_image_sample_c_o_1d_v4f32_f32(i32 %offset, float %zcompare, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4847 ; CHECK-LABEL: @extract_elt0_image_sample_c_o_1d_v4f32_f32(
4848 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.o.1d.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4849 ; CHECK-NEXT: ret float [[DATA]]
4851 %data = call <4 x float> @llvm.amdgcn.image.sample.c.o.1d.v4f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %zcompare, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4852 %elt0 = extractelement <4 x float> %data, i32 0
4856 declare <4 x float> @llvm.amdgcn.image.sample.c.o.1d.v4f32.f32.v8i32.v4i32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4858 ; --------------------------------------------------------------------
4859 ; llvm.amdgcn.image.sample.c.cl.o
4860 ; --------------------------------------------------------------------
4862 define amdgpu_ps float @extract_elt0_image_sample_c_cl_o_1d_v4f32_f32(i32 %offset, float %zcompare, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4863 ; CHECK-LABEL: @extract_elt0_image_sample_c_cl_o_1d_v4f32_f32(
4864 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.cl.o.1d.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4865 ; CHECK-NEXT: ret float [[DATA]]
4867 %data = call <4 x float> @llvm.amdgcn.image.sample.c.cl.o.1d.v4f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %zcompare, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4868 %elt0 = extractelement <4 x float> %data, i32 0
4872 declare <4 x float> @llvm.amdgcn.image.sample.c.cl.o.1d.v4f32.f32.v8i32.v4i32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4874 ; --------------------------------------------------------------------
4875 ; llvm.amdgcn.image.sample.c.d.o
4876 ; --------------------------------------------------------------------
4878 define amdgpu_ps float @extract_elt0_image_sample_c_d_o_1d_v4f32_f32_f32(i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4879 ; CHECK-LABEL: @extract_elt0_image_sample_c_d_o_1d_v4f32_f32_f32(
4880 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.d.o.1d.f32.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4881 ; CHECK-NEXT: ret float [[DATA]]
4883 %data = call <4 x float> @llvm.amdgcn.image.sample.c.d.o.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4884 %elt0 = extractelement <4 x float> %data, i32 0
4888 declare <4 x float> @llvm.amdgcn.image.sample.c.d.o.1d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4890 ; --------------------------------------------------------------------
4891 ; llvm.amdgcn.image.sample.c.d.cl.o
4892 ; --------------------------------------------------------------------
4894 define amdgpu_ps float @extract_elt0_image_sample_c_d_cl_o_1d_v4f32_f32_f32(i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4895 ; CHECK-LABEL: @extract_elt0_image_sample_c_d_cl_o_1d_v4f32_f32_f32(
4896 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.d.cl.o.1d.f32.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4897 ; CHECK-NEXT: ret float [[DATA]]
4899 %data = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.o.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4900 %elt0 = extractelement <4 x float> %data, i32 0
4904 declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.o.1d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4906 ; --------------------------------------------------------------------
4907 ; llvm.amdgcn.image.sample.c.l.o
4908 ; --------------------------------------------------------------------
4910 define amdgpu_ps float @extract_elt0_image_sample_c_l_o_1d_v4f32_f32(i32 %offset, float %zcompare, float %s, float %lod, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4911 ; CHECK-LABEL: @extract_elt0_image_sample_c_l_o_1d_v4f32_f32(
4912 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.l.o.1d.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[LOD:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4913 ; CHECK-NEXT: ret float [[DATA]]
4915 %data = call <4 x float> @llvm.amdgcn.image.sample.c.l.o.1d.v4f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %zcompare, float %s, float %lod, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4916 %elt0 = extractelement <4 x float> %data, i32 0
4920 declare <4 x float> @llvm.amdgcn.image.sample.c.l.o.1d.v4f32.f32.v8i32.v4i32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4922 ; --------------------------------------------------------------------
4923 ; llvm.amdgcn.image.sample.c.b.o
4924 ; --------------------------------------------------------------------
4926 define amdgpu_ps float @extract_elt0_image_sample_c_b_o_1d_v4f32_f32_f32(i32 %offset, float %bias, float %zcompare, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4927 ; CHECK-LABEL: @extract_elt0_image_sample_c_b_o_1d_v4f32_f32_f32(
4928 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.b.o.1d.f32.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4929 ; CHECK-NEXT: ret float [[DATA]]
4931 %data = call <4 x float> @llvm.amdgcn.image.sample.c.b.o.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %bias, float %zcompare, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4932 %elt0 = extractelement <4 x float> %data, i32 0
4936 declare <4 x float> @llvm.amdgcn.image.sample.c.b.o.1d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4938 ; --------------------------------------------------------------------
4939 ; llvm.amdgcn.image.sample.c.b.cl.o
4940 ; --------------------------------------------------------------------
4942 define amdgpu_ps float @extract_elt0_image_sample_c_b_cl_o_1d_v4f32_f32_f32(i32 %offset, float %bias, float %zcompare, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4943 ; CHECK-LABEL: @extract_elt0_image_sample_c_b_cl_o_1d_v4f32_f32_f32(
4944 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.b.cl.o.1d.f32.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4945 ; CHECK-NEXT: ret float [[DATA]]
4947 %data = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.o.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %bias, float %zcompare, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4948 %elt0 = extractelement <4 x float> %data, i32 0
4952 declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.o.1d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4954 ; --------------------------------------------------------------------
4955 ; llvm.amdgcn.image.sample.c.lz.o
4956 ; --------------------------------------------------------------------
4958 define amdgpu_ps float @extract_elt0_image_sample_c_lz_o_1d_v4f32_f32(i32 %offset, float %zcompare, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4959 ; CHECK-LABEL: @extract_elt0_image_sample_c_lz_o_1d_v4f32_f32(
4960 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.lz.o.1d.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4961 ; CHECK-NEXT: ret float [[DATA]]
4963 %data = call <4 x float> @llvm.amdgcn.image.sample.c.lz.o.1d.v4f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %zcompare, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4964 %elt0 = extractelement <4 x float> %data, i32 0
4968 declare <4 x float> @llvm.amdgcn.image.sample.c.lz.o.1d.v4f32.f32.v8i32.v4i32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4970 ; --------------------------------------------------------------------
4971 ; llvm.amdgcn.image.sample.c.cd.o
4972 ; --------------------------------------------------------------------
4974 define amdgpu_ps float @extract_elt0_image_sample_c_cd_o_1d_v4f32_f32_f32(i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4975 ; CHECK-LABEL: @extract_elt0_image_sample_c_cd_o_1d_v4f32_f32_f32(
4976 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.cd.o.1d.f32.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4977 ; CHECK-NEXT: ret float [[DATA]]
4979 %data = call <4 x float> @llvm.amdgcn.image.sample.c.cd.o.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4980 %elt0 = extractelement <4 x float> %data, i32 0
4984 declare <4 x float> @llvm.amdgcn.image.sample.c.cd.o.1d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
4986 ; --------------------------------------------------------------------
4987 ; llvm.amdgcn.image.sample.c.cd.cl.o
4988 ; --------------------------------------------------------------------
4990 define amdgpu_ps float @extract_elt0_image_sample_c_cd_cl_o_1d_v4f32_f32_f32(i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
4991 ; CHECK-LABEL: @extract_elt0_image_sample_c_cd_cl_o_1d_v4f32_f32_f32(
4992 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.cd.cl.o.1d.f32.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4993 ; CHECK-NEXT: ret float [[DATA]]
4995 %data = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.o.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
4996 %elt0 = extractelement <4 x float> %data, i32 0
5000 declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.o.1d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
5002 ; --------------------------------------------------------------------
5003 ; llvm.amdgcn.image.gather4
5004 ; --------------------------------------------------------------------
5006 ; Don't handle gather4*
5008 define amdgpu_ps float @extract_elt0_image_gather4_2d_v4f32_f32(float %s, float %t, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
5009 ; CHECK-LABEL: @extract_elt0_image_gather4_2d_v4f32_f32(
5010 ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f32.v8i32.v4i32(i32 1, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
5011 ; CHECK-NEXT: [[ELT0:%.*]] = extractelement <4 x float> [[DATA]], i64 0
5012 ; CHECK-NEXT: ret float [[ELT0]]
5014 %data = call <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f32.v8i32(i32 1, float %s, float %t, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
5015 %elt0 = extractelement <4 x float> %data, i32 0
5019 declare <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f32.v8i32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
5021 ; --------------------------------------------------------------------
5022 ; llvm.amdgcn.image.gather4.cl
5023 ; --------------------------------------------------------------------
5025 define amdgpu_ps float @extract_elt0_image_gather4_cl_2d_v4f32_f32(float %s, float %t, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
5026 ; CHECK-LABEL: @extract_elt0_image_gather4_cl_2d_v4f32_f32(
5027 ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.cl.2d.v4f32.f32.v8i32.v4i32(i32 2, float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
5028 ; CHECK-NEXT: [[ELT0:%.*]] = extractelement <4 x float> [[DATA]], i64 0
5029 ; CHECK-NEXT: ret float [[ELT0]]
5031 %data = call <4 x float> @llvm.amdgcn.image.gather4.cl.2d.v4f32.f32.v8i32(i32 2, float %s, float %t, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
5032 %elt0 = extractelement <4 x float> %data, i32 0
5036 declare <4 x float> @llvm.amdgcn.image.gather4.cl.2d.v4f32.f32.v8i32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
5038 ; --------------------------------------------------------------------
5039 ; llvm.amdgcn.image.gather4.l
5040 ; --------------------------------------------------------------------
5042 define amdgpu_ps float @extract_elt0_image_gather4_l_2d_v4f32_f32(float %s, float %t, float %lod, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
5043 ; CHECK-LABEL: @extract_elt0_image_gather4_l_2d_v4f32_f32(
5044 ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f32.v8i32.v4i32(i32 4, float [[S:%.*]], float [[T:%.*]], float [[LOD:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
5045 ; CHECK-NEXT: [[ELT0:%.*]] = extractelement <4 x float> [[DATA]], i64 0
5046 ; CHECK-NEXT: ret float [[ELT0]]
5048 %data = call <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f32.v8i32(i32 4, float %s, float %t, float %lod, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
5049 %elt0 = extractelement <4 x float> %data, i32 0
5053 declare <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f32.v8i32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
5055 ; --------------------------------------------------------------------
5056 ; llvm.amdgcn.image.gather4.b
5057 ; --------------------------------------------------------------------
5059 define amdgpu_ps float @extract_elt0_image_gather4_b_2darray_v4f32_f32_f32(float %bias, float %s, float %t, float %slice, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
5060 ; CHECK-LABEL: @extract_elt0_image_gather4_b_2darray_v4f32_f32_f32(
5061 ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.b.2darray.v4f32.f32.f32.v8i32.v4i32(i32 8, float [[BIAS:%.*]], float [[S:%.*]], float [[T:%.*]], float [[SLICE:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
5062 ; CHECK-NEXT: [[ELT0:%.*]] = extractelement <4 x float> [[DATA]], i64 0
5063 ; CHECK-NEXT: ret float [[ELT0]]
5065 %data = call <4 x float> @llvm.amdgcn.image.gather4.b.2darray.v4f32.f32.f32.v8i32(i32 8, float %bias, float %s, float %t, float %slice, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
5066 %elt0 = extractelement <4 x float> %data, i32 0
5070 declare <4 x float> @llvm.amdgcn.image.gather4.b.2darray.v4f32.f32.f32.v8i32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
5072 ; --------------------------------------------------------------------
5073 ; llvm.amdgcn.image.gather4.b.cl
5074 ; --------------------------------------------------------------------
5076 define amdgpu_ps float @extract_elt0_image_gather4_b_cl_cube_v4f32_f32_f32(float %bias, float %s, float %t, float %face, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
5077 ; CHECK-LABEL: @extract_elt0_image_gather4_b_cl_cube_v4f32_f32_f32(
5078 ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.cube.v4f32.f32.f32.v8i32.v4i32(i32 1, float [[BIAS:%.*]], float [[S:%.*]], float [[T:%.*]], float [[FACE:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
5079 ; CHECK-NEXT: [[ELT0:%.*]] = extractelement <4 x float> [[DATA]], i64 0
5080 ; CHECK-NEXT: ret float [[ELT0]]
5082 %data = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.cube.v4f32.f32.f32.v8i32(i32 1, float %bias, float %s, float %t, float %face, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
5083 %elt0 = extractelement <4 x float> %data, i32 0
5087 declare <4 x float> @llvm.amdgcn.image.gather4.b.cl.cube.v4f32.f32.f32.v8i32(i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
5089 ; --------------------------------------------------------------------
5090 ; llvm.amdgcn.image.gather4.lz
5091 ; --------------------------------------------------------------------
5093 define amdgpu_ps float @extract_elt0_image_gather4_lz_2d_v4f32_f16(half %s, half %t, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
5094 ; CHECK-LABEL: @extract_elt0_image_gather4_lz_2d_v4f32_f16(
5095 ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f16.v8i32.v4i32(i32 1, half [[S:%.*]], half [[T:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
5096 ; CHECK-NEXT: [[ELT0:%.*]] = extractelement <4 x float> [[DATA]], i64 0
5097 ; CHECK-NEXT: ret float [[ELT0]]
5099 %data = call <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f16.v8i32(i32 1, half %s, half %t, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
5100 %elt0 = extractelement <4 x float> %data, i32 0
5104 declare <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f16.v8i32(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
5106 ; --------------------------------------------------------------------
5107 ; llvm.amdgcn.image.gather4.o
5108 ; --------------------------------------------------------------------
5110 define amdgpu_ps float @extract_elt0_image_gather4_o_2d_v4f32_f32(i32 %offset, float %s, float %t, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
5111 ; CHECK-LABEL: @extract_elt0_image_gather4_o_2d_v4f32_f32(
5112 ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.o.2d.v4f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[GATHER4R:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
5113 ; CHECK-NEXT: [[ELT0:%.*]] = extractelement <4 x float> [[DATA]], i64 0
5114 ; CHECK-NEXT: ret float [[ELT0]]
5116 %data = call <4 x float> @llvm.amdgcn.image.gather4.o.2d.v4f32.f32.v8i32(i32 1, i32 %offset, float %s, float %t, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
5117 %elt0 = extractelement <4 x float> %data, i32 0
5121 declare <4 x float> @llvm.amdgcn.image.gather4.o.2d.v4f32.f32.v8i32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
5123 ; --------------------------------------------------------------------
5124 ; llvm.amdgcn.image.gather4.cl.o
5125 ; --------------------------------------------------------------------
5127 define amdgpu_ps float @extract_elt0_image_gather4_cl_o_2d_v4f32_f32(i32 %offset, float %s, float %t, float %clamp, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
5128 ; CHECK-LABEL: @extract_elt0_image_gather4_cl_o_2d_v4f32_f32(
5129 ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.cl.o.2d.v4f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[GATHER4R:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
5130 ; CHECK-NEXT: [[ELT0:%.*]] = extractelement <4 x float> [[DATA]], i64 0
5131 ; CHECK-NEXT: ret float [[ELT0]]
5133 %data = call <4 x float> @llvm.amdgcn.image.gather4.cl.o.2d.v4f32.f32.v8i32(i32 1, i32 %offset, float %s, float %t, float %clamp, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
5134 %elt0 = extractelement <4 x float> %data, i32 0
5138 declare <4 x float> @llvm.amdgcn.image.gather4.cl.o.2d.v4f32.f32.v8i32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
5140 ; --------------------------------------------------------------------
5141 ; llvm.amdgcn.image.gather4.l.o
5142 ; --------------------------------------------------------------------
5144 define amdgpu_ps float @extract_elt0_image_gather4_l_o_2d_v4f32_f32(i32 %offset, float %s, float %t, float %lod, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
5145 ; CHECK-LABEL: @extract_elt0_image_gather4_l_o_2d_v4f32_f32(
5146 ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.l.o.2d.v4f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[S:%.*]], float [[T:%.*]], float [[LOD:%.*]], <8 x i32> [[GATHER4R:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
5147 ; CHECK-NEXT: [[ELT0:%.*]] = extractelement <4 x float> [[DATA]], i64 0
5148 ; CHECK-NEXT: ret float [[ELT0]]
5150 %data = call <4 x float> @llvm.amdgcn.image.gather4.l.o.2d.v4f32.f32.v8i32(i32 1, i32 %offset, float %s, float %t, float %lod, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
5151 %elt0 = extractelement <4 x float> %data, i32 0
5155 declare <4 x float> @llvm.amdgcn.image.gather4.l.o.2d.v4f32.f32.v8i32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
5157 ; --------------------------------------------------------------------
5158 ; llvm.amdgcn.image.gather4.b.o
5159 ; --------------------------------------------------------------------
5161 define amdgpu_ps float @extract_elt0_image_gather4_b_o_2d_v4f32_f32_f32(i32 %offset, float %bias, float %s, float %t, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
5162 ; CHECK-LABEL: @extract_elt0_image_gather4_b_o_2d_v4f32_f32_f32(
5163 ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.b.o.2d.v4f32.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[BIAS:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[GATHER4R:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
5164 ; CHECK-NEXT: [[ELT0:%.*]] = extractelement <4 x float> [[DATA]], i64 0
5165 ; CHECK-NEXT: ret float [[ELT0]]
5167 %data = call <4 x float> @llvm.amdgcn.image.gather4.b.o.2d.v4f32.f32.f32.v8i32(i32 1, i32 %offset, float %bias, float %s, float %t, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
5168 %elt0 = extractelement <4 x float> %data, i32 0
5172 declare <4 x float> @llvm.amdgcn.image.gather4.b.o.2d.v4f32.f32.f32.v8i32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
5174 ; --------------------------------------------------------------------
5175 ; llvm.amdgcn.image.gather4.b.cl.o
5176 ; --------------------------------------------------------------------
5178 define amdgpu_ps float @extract_elt0_image_gather4_b_cl_o_2d_v4f32_f32_f32(i32 %offset, float %bias, float %s, float %t, float %clamp, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
5179 ; CHECK-LABEL: @extract_elt0_image_gather4_b_cl_o_2d_v4f32_f32_f32(
5180 ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.o.2d.v4f32.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[BIAS:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[GATHER4R:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
5181 ; CHECK-NEXT: [[ELT0:%.*]] = extractelement <4 x float> [[DATA]], i64 0
5182 ; CHECK-NEXT: ret float [[ELT0]]
5184 %data = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.o.2d.v4f32.f32.f32.v8i32(i32 1, i32 %offset, float %bias, float %s, float %t, float %clamp, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
5185 %elt0 = extractelement <4 x float> %data, i32 0
5189 declare <4 x float> @llvm.amdgcn.image.gather4.b.cl.o.2d.v4f32.f32.f32.v8i32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
5191 ; --------------------------------------------------------------------
5192 ; llvm.amdgcn.image.gather4.lz.o
5193 ; --------------------------------------------------------------------
5195 define amdgpu_ps float @extract_elt0_image_gather4_lz_o_2d_v4f32_f32(i32 %offset, float %s, float %t, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
5196 ; CHECK-LABEL: @extract_elt0_image_gather4_lz_o_2d_v4f32_f32(
5197 ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.lz.o.2d.v4f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[GATHER4R:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
5198 ; CHECK-NEXT: [[ELT0:%.*]] = extractelement <4 x float> [[DATA]], i64 0
5199 ; CHECK-NEXT: ret float [[ELT0]]
5201 %data = call <4 x float> @llvm.amdgcn.image.gather4.lz.o.2d.v4f32.f32.v8i32(i32 1, i32 %offset, float %s, float %t, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
5202 %elt0 = extractelement <4 x float> %data, i32 0
5206 declare <4 x float> @llvm.amdgcn.image.gather4.lz.o.2d.v4f32.f32.v8i32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
5208 ; --------------------------------------------------------------------
5209 ; llvm.amdgcn.image.gather4.c.o
5210 ; --------------------------------------------------------------------
5212 define amdgpu_ps float @extract_elt0_image_gather4_c_o_2d_v4f32_f32(i32 %offset, float %zcompare, float %s, float %t, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
5213 ; CHECK-LABEL: @extract_elt0_image_gather4_c_o_2d_v4f32_f32(
5214 ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.c.o.2d.v4f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[GATHER4R:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
5215 ; CHECK-NEXT: [[ELT0:%.*]] = extractelement <4 x float> [[DATA]], i64 0
5216 ; CHECK-NEXT: ret float [[ELT0]]
5218 %data = call <4 x float> @llvm.amdgcn.image.gather4.c.o.2d.v4f32.f32.v8i32(i32 1, i32 %offset, float %zcompare, float %s, float %t, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
5219 %elt0 = extractelement <4 x float> %data, i32 0
5223 declare <4 x float> @llvm.amdgcn.image.gather4.c.o.2d.v4f32.f32.v8i32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
5225 ; --------------------------------------------------------------------
5226 ; llvm.amdgcn.image.gather4.c.cl.o
5227 ; --------------------------------------------------------------------
5229 define amdgpu_ps float @extract_elt0_image_gather4_c_cl_o_2d_v4f32_f32(i32 %offset, float %zcompare, float %s, float %t, float %clamp, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
5230 ; CHECK-LABEL: @extract_elt0_image_gather4_c_cl_o_2d_v4f32_f32(
5231 ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.c.cl.o.2d.v4f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[GATHER4R:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
5232 ; CHECK-NEXT: [[ELT0:%.*]] = extractelement <4 x float> [[DATA]], i64 0
5233 ; CHECK-NEXT: ret float [[ELT0]]
5235 %data = call <4 x float> @llvm.amdgcn.image.gather4.c.cl.o.2d.v4f32.f32.v8i32(i32 1, i32 %offset, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
5236 %elt0 = extractelement <4 x float> %data, i32 0
5240 declare <4 x float> @llvm.amdgcn.image.gather4.c.cl.o.2d.v4f32.f32.v8i32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
5242 ; --------------------------------------------------------------------
5243 ; llvm.amdgcn.image.gather4.c.l.o
5244 ; --------------------------------------------------------------------
5246 define amdgpu_ps float @extract_elt0_image_gather4_c_l_o_2d_v4f32_f32(i32 %offset, float %zcompare, float %s, float %t, float %lod, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
5247 ; CHECK-LABEL: @extract_elt0_image_gather4_c_l_o_2d_v4f32_f32(
5248 ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2d.v4f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], float [[LOD:%.*]], <8 x i32> [[GATHER4R:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
5249 ; CHECK-NEXT: [[ELT0:%.*]] = extractelement <4 x float> [[DATA]], i64 0
5250 ; CHECK-NEXT: ret float [[ELT0]]
5252 %data = call <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2d.v4f32.f32.v8i32(i32 1, i32 %offset, float %zcompare, float %s, float %t, float %lod, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
5253 %elt0 = extractelement <4 x float> %data, i32 0
5257 declare <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2d.v4f32.f32.v8i32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
5259 ; --------------------------------------------------------------------
5260 ; llvm.amdgcn.image.gather4.c.b.o
5261 ; --------------------------------------------------------------------
5263 define amdgpu_ps float @extract_elt0_image_gather4_c_b_o_2d_v4f32_f32_f32(i32 %offset, float %bias, float %zcompare, float %s, float %t, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
5264 ; CHECK-LABEL: @extract_elt0_image_gather4_c_b_o_2d_v4f32_f32_f32(
5265 ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.c.b.o.2d.v4f32.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[GATHER4R:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
5266 ; CHECK-NEXT: [[ELT0:%.*]] = extractelement <4 x float> [[DATA]], i64 0
5267 ; CHECK-NEXT: ret float [[ELT0]]
5269 %data = call <4 x float> @llvm.amdgcn.image.gather4.c.b.o.2d.v4f32.f32.f32.v8i32(i32 1, i32 %offset, float %bias, float %zcompare, float %s, float %t, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
5270 %elt0 = extractelement <4 x float> %data, i32 0
5274 declare <4 x float> @llvm.amdgcn.image.gather4.c.b.o.2d.v4f32.f32.f32.v8i32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
5276 ; --------------------------------------------------------------------
5277 ; llvm.amdgcn.image.gather4.c.b.cl.o
5278 ; --------------------------------------------------------------------
5280 define amdgpu_ps float @extract_elt0_image_gather4_c_b_cl_o_2d_v4f32_f32_f32(i32 %offset, float %bias, float %zcompare, float %s, float %t, float %clamp, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
5281 ; CHECK-LABEL: @extract_elt0_image_gather4_c_b_cl_o_2d_v4f32_f32_f32(
5282 ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.o.2d.v4f32.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[GATHER4R:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
5283 ; CHECK-NEXT: [[ELT0:%.*]] = extractelement <4 x float> [[DATA]], i64 0
5284 ; CHECK-NEXT: ret float [[ELT0]]
5286 %data = call <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.o.2d.v4f32.f32.f32.v8i32(i32 1, i32 %offset, float %bias, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
5287 %elt0 = extractelement <4 x float> %data, i32 0
5291 declare <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.o.2d.v4f32.f32.f32.v8i32(i32, i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
5293 ; --------------------------------------------------------------------
5294 ; llvm.amdgcn.image.gather4.c.lz.o
5295 ; --------------------------------------------------------------------
5297 define amdgpu_ps float @extract_elt0_image_gather4_c_lz_o_2d_v4f32_f32(i32 %offset, float %zcompare, float %s, float %t, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
5298 ; CHECK-LABEL: @extract_elt0_image_gather4_c_lz_o_2d_v4f32_f32(
5299 ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.c.lz.o.2d.v4f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[GATHER4R:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
5300 ; CHECK-NEXT: [[ELT0:%.*]] = extractelement <4 x float> [[DATA]], i64 0
5301 ; CHECK-NEXT: ret float [[ELT0]]
5303 %data = call <4 x float> @llvm.amdgcn.image.gather4.c.lz.o.2d.v4f32.f32.v8i32(i32 1, i32 %offset, float %zcompare, float %s, float %t, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
5304 %elt0 = extractelement <4 x float> %data, i32 0
5308 declare <4 x float> @llvm.amdgcn.image.gather4.c.lz.o.2d.v4f32.f32.v8i32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
5310 ; --------------------------------------------------------------------
5311 ; llvm.amdgcn.image.getlod
5312 ; --------------------------------------------------------------------
5314 define amdgpu_ps float @extract_elt0_image_getlod_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
5315 ; CHECK-LABEL: @extract_elt0_image_getlod_1d_v4f32_f32(
5316 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.getlod.1d.f32.f32.v8i32.v4i32(i32 1, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
5317 ; CHECK-NEXT: ret float [[DATA]]
5319 %data = call <4 x float> @llvm.amdgcn.image.getlod.1d.v4f32.f32.v8i32(i32 15, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
5320 %elt0 = extractelement <4 x float> %data, i32 0
5324 declare <4 x float> @llvm.amdgcn.image.getlod.1d.v4f32.f32.v8i32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
5326 ; --------------------------------------------------------------------
5327 ; llvm.amdgcn.image.load
5328 ; --------------------------------------------------------------------
5330 define amdgpu_ps float @extract_elt0_image_load_2dmsaa_v4f32_i32(i32 %s, i32 %t, i32 %sample, <8 x i32> inreg %sampler) #0 {
5331 ; CHECK-LABEL: @extract_elt0_image_load_2dmsaa_v4f32_i32(
5332 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S:%.*]], i32 [[T:%.*]], i32 [[SAMPLE:%.*]], <8 x i32> [[SAMPLER:%.*]], i32 0, i32 0)
5333 ; CHECK-NEXT: ret float [[DATA]]
5335 %data = call <4 x float> @llvm.amdgcn.image.load.2dmsaa.v4f32.i32.v8i32(i32 15, i32 %s, i32 %t, i32 %sample, <8 x i32> %sampler, i32 0, i32 0)
5336 %elt0 = extractelement <4 x float> %data, i32 0
5340 declare <4 x float> @llvm.amdgcn.image.load.2dmsaa.v4f32.i32.v8i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
5342 ; --------------------------------------------------------------------
5343 ; llvm.amdgcn.image.load.mip
5344 ; --------------------------------------------------------------------
5346 define amdgpu_ps float @extract_elt0_image_load_mip_1d_v4f32_i32(i32 %s, i32 %mip, <8 x i32> inreg %sampler) #0 {
5347 ; CHECK-LABEL: @extract_elt0_image_load_mip_1d_v4f32_i32(
5348 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.load.mip.1d.f32.i32.v8i32(i32 1, i32 [[S:%.*]], i32 [[MIP:%.*]], <8 x i32> [[SAMPLER:%.*]], i32 0, i32 0)
5349 ; CHECK-NEXT: ret float [[DATA]]
5351 %data = call <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32.v8i32(i32 15, i32 %s, i32 %mip, <8 x i32> %sampler, i32 0, i32 0)
5352 %elt0 = extractelement <4 x float> %data, i32 0
5356 declare <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32.v8i32(i32, i32, i32, <8 x i32>, i32, i32) #1
5358 ; --------------------------------------------------------------------
5359 ; llvm.amdgcn.image.getresinfo
5360 ; --------------------------------------------------------------------
5362 define amdgpu_ps float @extract_elt0_image_getresinfo_1d_v4f32_i32(i32 %mip, <8 x i32> inreg %sampler) #0 {
5363 ; CHECK-LABEL: @extract_elt0_image_getresinfo_1d_v4f32_i32(
5364 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.getresinfo.1d.f32.i32.v8i32(i32 1, i32 [[MIP:%.*]], <8 x i32> [[SAMPLER:%.*]], i32 0, i32 0)
5365 ; CHECK-NEXT: ret float [[DATA]]
5367 %data = call <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i32.v8i32(i32 15, i32 %mip, <8 x i32> %sampler, i32 0, i32 0)
5368 %elt0 = extractelement <4 x float> %data, i32 0
5372 declare <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i32.v8i32(i32, i32, <8 x i32>, i32, i32) #1
5374 ; --------------------------------------------------------------------
5376 ; --------------------------------------------------------------------
5378 define amdgpu_ps float @extract_elt0_tfe_image_load_1d_v4f32i32_i32(i32 %s, <8 x i32> inreg %rsrc) #0 {
5379 ; CHECK-LABEL: @extract_elt0_tfe_image_load_1d_v4f32i32_i32(
5380 ; CHECK-NEXT: [[DATA:%.*]] = call { <4 x float>, i32 } @llvm.amdgcn.image.load.1d.sl_v4f32i32s.i32.v8i32(i32 15, i32 [[S:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 1)
5381 ; CHECK-NEXT: [[RGBA:%.*]] = extractvalue { <4 x float>, i32 } [[DATA]], 0
5382 ; CHECK-NEXT: [[ELT0:%.*]] = extractelement <4 x float> [[RGBA]], i64 0
5383 ; CHECK-NEXT: ret float [[ELT0]]
5385 %data = call { <4 x float>, i32 } @llvm.amdgcn.image.load.1d.sl_v4f32i32s.i32.v8i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 1)
5386 %rgba = extractvalue { <4 x float>, i32 } %data, 0
5387 %elt0 = extractelement <4 x float> %rgba, i32 0
5391 declare {<4 x float>, i32} @llvm.amdgcn.image.load.1d.sl_v4f32i32s.i32.v8i32(i32, i32, <8 x i32>, i32, i32) #1
5393 define amdgpu_hs float @tfe_check_assert() #0 {
5394 ; CHECK-LABEL: @tfe_check_assert(
5395 ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.load.2d.f32.i32.v8i32(i32 1, i32 undef, i32 undef, <8 x i32> undef, i32 0, i32 1)
5396 ; CHECK-NEXT: ret float [[DATA]]
5398 %data = call nsz <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32.v8i32(i32 15, i32 undef, i32 undef, <8 x i32> undef, i32 0, i32 1) #2
5399 %elt0 = extractelement <4 x float> %data, i32 0
5403 declare <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32.v8i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #1
5405 attributes #0 = { nounwind }
5406 attributes #1 = { nounwind readonly }
5408 !0 = !{float 2.500000e+00}