1 ; Test the bitcast operation for big-endian and little-endian.
3 ; RUN: llc -march=mips -mattr=+msa,+fp64,+mips32r2 < %s | FileCheck -check-prefix=BIGENDIAN %s
4 ; RUN: llc -march=mipsel -mattr=+msa,+fp64,+mips32r2 < %s | FileCheck -check-prefix=LITENDIAN %s
6 define void @v16i8_to_v16i8(<16 x i8>* %src, <16 x i8>* %dst) nounwind {
8 %0 = load volatile <16 x i8>, <16 x i8>* %src
9 %1 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %0, <16 x i8> %0)
10 %2 = bitcast <16 x i8> %1 to <16 x i8>
11 %3 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %2, <16 x i8> %2)
12 store <16 x i8> %3, <16 x i8>* %dst
16 ; LITENDIAN: v16i8_to_v16i8:
17 ; LITENDIAN: ld.b [[R1:\$w[0-9]+]],
18 ; LITENDIAN: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]]
19 ; LITENDIAN: addv.b [[R3:\$w[0-9]+]], [[R2]], [[R2]]
20 ; LITENDIAN: st.b [[R3]],
21 ; LITENDIAN: .size v16i8_to_v16i8
23 ; BIGENDIAN: v16i8_to_v16i8:
24 ; BIGENDIAN: ld.b [[R1:\$w[0-9]+]],
25 ; BIGENDIAN: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]]
26 ; BIGENDIAN: addv.b [[R3:\$w[0-9]+]], [[R2]], [[R2]]
27 ; BIGENDIAN: st.b [[R3]],
28 ; BIGENDIAN: .size v16i8_to_v16i8
30 define void @v16i8_to_v8i16(<16 x i8>* %src, <8 x i16>* %dst) nounwind {
32 %0 = load volatile <16 x i8>, <16 x i8>* %src
33 %1 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %0, <16 x i8> %0)
34 %2 = bitcast <16 x i8> %1 to <8 x i16>
35 %3 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %2, <8 x i16> %2)
36 store <8 x i16> %3, <8 x i16>* %dst
40 ; LITENDIAN: v16i8_to_v8i16:
41 ; LITENDIAN: ld.b [[R1:\$w[0-9]+]],
42 ; LITENDIAN: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]]
43 ; LITENDIAN: addv.h [[R3:\$w[0-9]+]], [[R2]], [[R2]]
44 ; LITENDIAN: st.h [[R3]],
45 ; LITENDIAN: .size v16i8_to_v8i16
47 ; BIGENDIAN: v16i8_to_v8i16:
48 ; BIGENDIAN: ld.b [[R1:\$w[0-9]+]],
49 ; BIGENDIAN: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]]
50 ; BIGENDIAN: shf.b [[R3:\$w[0-9]+]], [[R2]], 177
51 ; BIGENDIAN: addv.h [[R4:\$w[0-9]+]], [[R3]], [[R3]]
52 ; BIGENDIAN: st.h [[R4]],
53 ; BIGENDIAN: .size v16i8_to_v8i16
55 ; We can't prevent the (store (bitcast X), Y) DAG Combine here because there
56 ; are no operations for v8f16 to put in the way.
57 define void @v16i8_to_v8f16(<16 x i8>* %src, <8 x half>* %dst) nounwind {
59 %0 = load volatile <16 x i8>, <16 x i8>* %src
60 %1 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %0, <16 x i8> %0)
61 %2 = bitcast <16 x i8> %1 to <8 x half>
62 store <8 x half> %2, <8 x half>* %dst
66 ; LITENDIAN: v16i8_to_v8f16:
67 ; LITENDIAN: ld.b [[R1:\$w[0-9]+]],
68 ; LITENDIAN: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]]
69 ; LITENDIAN: st.b [[R2]],
70 ; LITENDIAN: .size v16i8_to_v8f16
72 ; BIGENDIAN: v16i8_to_v8f16:
73 ; BIGENDIAN: ld.b [[R1:\$w[0-9]+]],
74 ; BIGENDIAN: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]]
75 ; BIGENDIAN: st.b [[R2]],
76 ; BIGENDIAN: .size v16i8_to_v8f16
78 define void @v16i8_to_v4i32(<16 x i8>* %src, <4 x i32>* %dst) nounwind {
80 %0 = load volatile <16 x i8>, <16 x i8>* %src
81 %1 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %0, <16 x i8> %0)
82 %2 = bitcast <16 x i8> %1 to <4 x i32>
83 %3 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %2, <4 x i32> %2)
84 store <4 x i32> %3, <4 x i32>* %dst
88 ; LITENDIAN: v16i8_to_v4i32:
89 ; LITENDIAN: ld.b [[R1:\$w[0-9]+]],
90 ; LITENDIAN: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]]
91 ; LITENDIAN: addv.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
92 ; LITENDIAN: st.w [[R3]],
93 ; LITENDIAN: .size v16i8_to_v4i32
95 ; BIGENDIAN: v16i8_to_v4i32:
96 ; BIGENDIAN: ld.b [[R1:\$w[0-9]+]],
97 ; BIGENDIAN: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]]
98 ; BIGENDIAN: shf.b [[R3:\$w[0-9]+]], [[R2]], 27
99 ; BIGENDIAN: addv.w [[R4:\$w[0-9]+]], [[R3]], [[R3]]
100 ; BIGENDIAN: st.w [[R4]],
101 ; BIGENDIAN: .size v16i8_to_v4i32
103 define void @v16i8_to_v4f32(<16 x i8>* %src, <4 x float>* %dst) nounwind {
105 %0 = load volatile <16 x i8>, <16 x i8>* %src
106 %1 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %0, <16 x i8> %0)
107 %2 = bitcast <16 x i8> %1 to <4 x float>
108 %3 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %2, <4 x float> %2)
109 store <4 x float> %3, <4 x float>* %dst
113 ; LITENDIAN: v16i8_to_v4f32:
114 ; LITENDIAN: ld.b [[R1:\$w[0-9]+]],
115 ; LITENDIAN: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]]
116 ; LITENDIAN: fadd.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
117 ; LITENDIAN: st.w [[R3]],
118 ; LITENDIAN: .size v16i8_to_v4f32
120 ; BIGENDIAN: v16i8_to_v4f32:
121 ; BIGENDIAN: ld.b [[R1:\$w[0-9]+]],
122 ; BIGENDIAN: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]]
123 ; BIGENDIAN: shf.b [[R3:\$w[0-9]+]], [[R2]], 27
124 ; BIGENDIAN: fadd.w [[R4:\$w[0-9]+]], [[R3]], [[R3]]
125 ; BIGENDIAN: st.w [[R4]],
126 ; BIGENDIAN: .size v16i8_to_v4f32
128 define void @v16i8_to_v2i64(<16 x i8>* %src, <2 x i64>* %dst) nounwind {
130 %0 = load volatile <16 x i8>, <16 x i8>* %src
131 %1 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %0, <16 x i8> %0)
132 %2 = bitcast <16 x i8> %1 to <2 x i64>
133 %3 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %2, <2 x i64> %2)
134 store <2 x i64> %3, <2 x i64>* %dst
138 ; LITENDIAN: v16i8_to_v2i64:
139 ; LITENDIAN: ld.b [[R1:\$w[0-9]+]],
140 ; LITENDIAN: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]]
141 ; LITENDIAN: addv.d [[R3:\$w[0-9]+]], [[R2]], [[R2]]
142 ; LITENDIAN: st.d [[R3]],
143 ; LITENDIAN: .size v16i8_to_v2i64
145 ; BIGENDIAN: v16i8_to_v2i64:
146 ; BIGENDIAN: ld.b [[R1:\$w[0-9]+]],
147 ; BIGENDIAN: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]]
148 ; BIGENDIAN: shf.b [[R3:\$w[0-9]+]], [[R2]], 27
149 ; BIGENDIAN: shf.w [[R3:\$w[0-9]+]], [[R3]], 177
150 ; BIGENDIAN: addv.d [[R4:\$w[0-9]+]], [[R3]], [[R3]]
151 ; BIGENDIAN: st.d [[R4]],
152 ; BIGENDIAN: .size v16i8_to_v2i64
154 define void @v16i8_to_v2f64(<16 x i8>* %src, <2 x double>* %dst) nounwind {
156 %0 = load volatile <16 x i8>, <16 x i8>* %src
157 %1 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %0, <16 x i8> %0)
158 %2 = bitcast <16 x i8> %1 to <2 x double>
159 %3 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %2, <2 x double> %2)
160 store <2 x double> %3, <2 x double>* %dst
164 ; LITENDIAN: v16i8_to_v2f64:
165 ; LITENDIAN: ld.b [[R1:\$w[0-9]+]],
166 ; LITENDIAN: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]]
167 ; LITENDIAN: fadd.d [[R3:\$w[0-9]+]], [[R2]], [[R2]]
168 ; LITENDIAN: st.d [[R3]],
169 ; LITENDIAN: .size v16i8_to_v2f64
171 ; BIGENDIAN: v16i8_to_v2f64:
172 ; BIGENDIAN: ld.b [[R1:\$w[0-9]+]],
173 ; BIGENDIAN: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]]
174 ; BIGENDIAN: shf.b [[R3:\$w[0-9]+]], [[R2]], 27
175 ; BIGENDIAN: shf.w [[R3:\$w[0-9]+]], [[R3]], 177
176 ; BIGENDIAN: fadd.d [[R4:\$w[0-9]+]], [[R3]], [[R3]]
177 ; BIGENDIAN: st.d [[R4]],
178 ; BIGENDIAN: .size v16i8_to_v2f64
180 define void @v8i16_to_v16i8(<8 x i16>* %src, <16 x i8>* %dst) nounwind {
182 %0 = load volatile <8 x i16>, <8 x i16>* %src
183 %1 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %0, <8 x i16> %0)
184 %2 = bitcast <8 x i16> %1 to <16 x i8>
185 %3 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %2, <16 x i8> %2)
186 store <16 x i8> %3, <16 x i8>* %dst
190 ; LITENDIAN: v8i16_to_v16i8:
191 ; LITENDIAN: ld.h [[R1:\$w[0-9]+]],
192 ; LITENDIAN: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
193 ; LITENDIAN: addv.b [[R3:\$w[0-9]+]], [[R2]], [[R2]]
194 ; LITENDIAN: st.b [[R3]],
195 ; LITENDIAN: .size v8i16_to_v16i8
197 ; BIGENDIAN: v8i16_to_v16i8:
198 ; BIGENDIAN: ld.h [[R1:\$w[0-9]+]],
199 ; BIGENDIAN: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
200 ; BIGENDIAN: shf.b [[R3:\$w[0-9]+]], [[R2]], 177
201 ; BIGENDIAN: addv.b [[R4:\$w[0-9]+]], [[R2]], [[R2]]
202 ; BIGENDIAN: st.b [[R4]],
203 ; BIGENDIAN: .size v8i16_to_v16i8
205 define void @v8i16_to_v8i16(<8 x i16>* %src, <8 x i16>* %dst) nounwind {
207 %0 = load volatile <8 x i16>, <8 x i16>* %src
208 %1 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %0, <8 x i16> %0)
209 %2 = bitcast <8 x i16> %1 to <8 x i16>
210 %3 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %2, <8 x i16> %2)
211 store <8 x i16> %3, <8 x i16>* %dst
215 ; LITENDIAN: v8i16_to_v8i16:
216 ; LITENDIAN: ld.h [[R1:\$w[0-9]+]],
217 ; LITENDIAN: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
218 ; LITENDIAN: addv.h [[R3:\$w[0-9]+]], [[R2]], [[R2]]
219 ; LITENDIAN: st.h [[R3]],
220 ; LITENDIAN: .size v8i16_to_v8i16
222 ; BIGENDIAN: v8i16_to_v8i16:
223 ; BIGENDIAN: ld.h [[R1:\$w[0-9]+]],
224 ; BIGENDIAN: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
225 ; BIGENDIAN: addv.h [[R3:\$w[0-9]+]], [[R2]], [[R2]]
226 ; BIGENDIAN: st.h [[R3]],
227 ; BIGENDIAN: .size v8i16_to_v8i16
229 ; We can't prevent the (store (bitcast X), Y) DAG Combine here because there
230 ; are no operations for v8f16 to put in the way.
231 define void @v8i16_to_v8f16(<8 x i16>* %src, <8 x half>* %dst) nounwind {
233 %0 = load volatile <8 x i16>, <8 x i16>* %src
234 %1 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %0, <8 x i16> %0)
235 %2 = bitcast <8 x i16> %1 to <8 x half>
236 store <8 x half> %2, <8 x half>* %dst
240 ; LITENDIAN: v8i16_to_v8f16:
241 ; LITENDIAN: ld.h [[R1:\$w[0-9]+]],
242 ; LITENDIAN: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
243 ; LITENDIAN: st.h [[R2]],
244 ; LITENDIAN: .size v8i16_to_v8f16
246 ; BIGENDIAN: v8i16_to_v8f16:
247 ; BIGENDIAN: ld.h [[R1:\$w[0-9]+]],
248 ; BIGENDIAN: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
249 ; BIGENDIAN: st.h [[R2]],
250 ; BIGENDIAN: .size v8i16_to_v8f16
252 define void @v8i16_to_v4i32(<8 x i16>* %src, <4 x i32>* %dst) nounwind {
254 %0 = load volatile <8 x i16>, <8 x i16>* %src
255 %1 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %0, <8 x i16> %0)
256 %2 = bitcast <8 x i16> %1 to <4 x i32>
257 %3 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %2, <4 x i32> %2)
258 store <4 x i32> %3, <4 x i32>* %dst
262 ; LITENDIAN: v8i16_to_v4i32:
263 ; LITENDIAN: ld.h [[R1:\$w[0-9]+]],
264 ; LITENDIAN: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
265 ; LITENDIAN: addv.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
266 ; LITENDIAN: st.w [[R3]],
267 ; LITENDIAN: .size v8i16_to_v4i32
269 ; BIGENDIAN: v8i16_to_v4i32:
270 ; BIGENDIAN: ld.h [[R1:\$w[0-9]+]],
271 ; BIGENDIAN: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
272 ; BIGENDIAN: shf.h [[R3:\$w[0-9]+]], [[R2]], 177
273 ; BIGENDIAN: addv.w [[R4:\$w[0-9]+]], [[R3]], [[R3]]
274 ; BIGENDIAN: st.w [[R4]],
275 ; BIGENDIAN: .size v8i16_to_v4i32
277 define void @v8i16_to_v4f32(<8 x i16>* %src, <4 x float>* %dst) nounwind {
279 %0 = load volatile <8 x i16>, <8 x i16>* %src
280 %1 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %0, <8 x i16> %0)
281 %2 = bitcast <8 x i16> %1 to <4 x float>
282 %3 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %2, <4 x float> %2)
283 store <4 x float> %3, <4 x float>* %dst
287 ; LITENDIAN: v8i16_to_v4f32:
288 ; LITENDIAN: ld.h [[R1:\$w[0-9]+]],
289 ; LITENDIAN: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
290 ; LITENDIAN: fadd.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
291 ; LITENDIAN: st.w [[R3]],
292 ; LITENDIAN: .size v8i16_to_v4f32
294 ; BIGENDIAN: v8i16_to_v4f32:
295 ; BIGENDIAN: ld.h [[R1:\$w[0-9]+]],
296 ; BIGENDIAN: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
297 ; BIGENDIAN: shf.h [[R3:\$w[0-9]+]], [[R2]], 177
298 ; BIGENDIAN: fadd.w [[R4:\$w[0-9]+]], [[R3]], [[R3]]
299 ; BIGENDIAN: st.w [[R4]],
300 ; BIGENDIAN: .size v8i16_to_v4f32
302 define void @v8i16_to_v2i64(<8 x i16>* %src, <2 x i64>* %dst) nounwind {
304 %0 = load volatile <8 x i16>, <8 x i16>* %src
305 %1 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %0, <8 x i16> %0)
306 %2 = bitcast <8 x i16> %1 to <2 x i64>
307 %3 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %2, <2 x i64> %2)
308 store <2 x i64> %3, <2 x i64>* %dst
312 ; LITENDIAN: v8i16_to_v2i64:
313 ; LITENDIAN: ld.h [[R1:\$w[0-9]+]],
314 ; LITENDIAN: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
315 ; LITENDIAN: addv.d [[R3:\$w[0-9]+]], [[R2]], [[R2]]
316 ; LITENDIAN: st.d [[R3]],
317 ; LITENDIAN: .size v8i16_to_v2i64
319 ; BIGENDIAN: v8i16_to_v2i64:
320 ; BIGENDIAN: ld.h [[R1:\$w[0-9]+]],
321 ; BIGENDIAN: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
322 ; BIGENDIAN: shf.h [[R3:\$w[0-9]+]], [[R2]], 27
323 ; BIGENDIAN: addv.d [[R4:\$w[0-9]+]], [[R3]], [[R3]]
324 ; BIGENDIAN: st.d [[R4]],
325 ; BIGENDIAN: .size v8i16_to_v2i64
327 define void @v8i16_to_v2f64(<8 x i16>* %src, <2 x double>* %dst) nounwind {
329 %0 = load volatile <8 x i16>, <8 x i16>* %src
330 %1 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %0, <8 x i16> %0)
331 %2 = bitcast <8 x i16> %1 to <2 x double>
332 %3 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %2, <2 x double> %2)
333 store <2 x double> %3, <2 x double>* %dst
337 ; LITENDIAN: v8i16_to_v2f64:
338 ; LITENDIAN: ld.h [[R1:\$w[0-9]+]],
339 ; LITENDIAN: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
340 ; LITENDIAN: fadd.d [[R3:\$w[0-9]+]], [[R2]], [[R2]]
341 ; LITENDIAN: st.d [[R3]],
342 ; LITENDIAN: .size v8i16_to_v2f64
344 ; BIGENDIAN: v8i16_to_v2f64:
345 ; BIGENDIAN: ld.h [[R1:\$w[0-9]+]],
346 ; BIGENDIAN: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
347 ; BIGENDIAN: shf.h [[R3:\$w[0-9]+]], [[R2]], 27
348 ; BIGENDIAN: fadd.d [[R4:\$w[0-9]+]], [[R3]], [[R3]]
349 ; BIGENDIAN: st.d [[R4]],
350 ; BIGENDIAN: .size v8i16_to_v2f64
353 ; We can't prevent the (bitcast (load X)) DAG Combine here because there
354 ; are no operations for v8f16 to put in the way.
355 define void @v8f16_to_v16i8(<8 x half>* %src, <16 x i8>* %dst) nounwind {
357 %0 = load volatile <8 x half>, <8 x half>* %src
358 %1 = bitcast <8 x half> %0 to <16 x i8>
359 %2 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %1, <16 x i8> %1)
360 store <16 x i8> %2, <16 x i8>* %dst
364 ; LITENDIAN: v8f16_to_v16i8:
365 ; LITENDIAN: ld.b [[R1:\$w[0-9]+]],
366 ; LITENDIAN: addv.b [[R3:\$w[0-9]+]], [[R1]], [[R1]]
367 ; LITENDIAN: st.b [[R3]],
368 ; LITENDIAN: .size v8f16_to_v16i8
370 ; BIGENDIAN: v8f16_to_v16i8:
371 ; BIGENDIAN: ld.b [[R1:\$w[0-9]+]],
372 ; BIGENDIAN: addv.b [[R4:\$w[0-9]+]], [[R2]], [[R2]]
373 ; BIGENDIAN: st.b [[R4]],
374 ; BIGENDIAN: .size v8f16_to_v16i8
376 ; We can't prevent the (bitcast (load X)) DAG Combine here because there
377 ; are no operations for v8f16 to put in the way.
378 define void @v8f16_to_v8i16(<8 x half>* %src, <8 x i16>* %dst) nounwind {
380 %0 = load volatile <8 x half>, <8 x half>* %src
381 %1 = bitcast <8 x half> %0 to <8 x i16>
382 %2 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %1, <8 x i16> %1)
383 store <8 x i16> %2, <8 x i16>* %dst
387 ; LITENDIAN: v8f16_to_v8i16:
388 ; LITENDIAN: ld.h [[R1:\$w[0-9]+]],
389 ; LITENDIAN: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
390 ; LITENDIAN: st.h [[R2]],
391 ; LITENDIAN: .size v8f16_to_v8i16
393 ; BIGENDIAN: v8f16_to_v8i16:
394 ; BIGENDIAN: ld.h [[R1:\$w[0-9]+]],
395 ; BIGENDIAN: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
396 ; BIGENDIAN: st.h [[R2]],
397 ; BIGENDIAN: .size v8f16_to_v8i16
399 ; We can't prevent the (bitcast (load X)) DAG Combine here because there
400 ; are no operations for v8f16 to put in the way.
401 ; We can't prevent the (store (bitcast X), Y) DAG Combine here because there
402 ; are no operations for v8f16 to put in the way.
403 define void @v8f16_to_v8f16(<8 x half>* %src, <8 x half>* %dst) nounwind {
405 %0 = load volatile <8 x half>, <8 x half>* %src
406 %1 = bitcast <8 x half> %0 to <8 x half>
407 store <8 x half> %1, <8 x half>* %dst
411 ; LITENDIAN: v8f16_to_v8f16:
412 ; LITENDIAN: ld.h [[R1:\$w[0-9]+]],
413 ; LITENDIAN: st.h [[R1]],
414 ; LITENDIAN: .size v8f16_to_v8f16
416 ; BIGENDIAN: v8f16_to_v8f16:
417 ; BIGENDIAN: ld.h [[R1:\$w[0-9]+]],
418 ; BIGENDIAN: st.h [[R1]],
419 ; BIGENDIAN: .size v8f16_to_v8f16
421 ; We can't prevent the (bitcast (load X)) DAG Combine here because there
422 ; are no operations for v8f16 to put in the way.
423 define void @v8f16_to_v4i32(<8 x half>* %src, <4 x i32>* %dst) nounwind {
425 %0 = load volatile <8 x half>, <8 x half>* %src
426 %1 = bitcast <8 x half> %0 to <4 x i32>
427 %2 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %1, <4 x i32> %1)
428 store <4 x i32> %2, <4 x i32>* %dst
432 ; LITENDIAN: v8f16_to_v4i32:
433 ; LITENDIAN: ld.w [[R1:\$w[0-9]+]],
434 ; LITENDIAN: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
435 ; LITENDIAN: st.w [[R2]],
436 ; LITENDIAN: .size v8f16_to_v4i32
438 ; BIGENDIAN: v8f16_to_v4i32:
439 ; BIGENDIAN: ld.w [[R1:\$w[0-9]+]],
440 ; BIGENDIAN: addv.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
441 ; BIGENDIAN: st.w [[R3]],
442 ; BIGENDIAN: .size v8f16_to_v4i32
444 ; We can't prevent the (bitcast (load X)) DAG Combine here because there
445 ; are no operations for v8f16 to put in the way.
446 define void @v8f16_to_v4f32(<8 x half>* %src, <4 x float>* %dst) nounwind {
448 %0 = load volatile <8 x half>, <8 x half>* %src
449 %1 = bitcast <8 x half> %0 to <4 x float>
450 %2 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %1, <4 x float> %1)
451 store <4 x float> %2, <4 x float>* %dst
455 ; LITENDIAN: v8f16_to_v4f32:
456 ; LITENDIAN: ld.w [[R1:\$w[0-9]+]],
457 ; LITENDIAN: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
458 ; LITENDIAN: st.w [[R2]],
459 ; LITENDIAN: .size v8f16_to_v4f32
461 ; BIGENDIAN: v8f16_to_v4f32:
462 ; BIGENDIAN: ld.w [[R1:\$w[0-9]+]],
463 ; BIGENDIAN: fadd.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
464 ; BIGENDIAN: st.w [[R3]],
465 ; BIGENDIAN: .size v8f16_to_v4f32
467 ; We can't prevent the (bitcast (load X)) DAG Combine here because there
468 ; are no operations for v8f16 to put in the way.
469 define void @v8f16_to_v2i64(<8 x half>* %src, <2 x i64>* %dst) nounwind {
471 %0 = load volatile <8 x half>, <8 x half>* %src
472 %1 = bitcast <8 x half> %0 to <2 x i64>
473 %2 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %1, <2 x i64> %1)
474 store <2 x i64> %2, <2 x i64>* %dst
478 ; LITENDIAN: v8f16_to_v2i64:
479 ; LITENDIAN: ld.d [[R1:\$w[0-9]+]],
480 ; LITENDIAN: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
481 ; LITENDIAN: st.d [[R2]],
482 ; LITENDIAN: .size v8f16_to_v2i64
484 ; BIGENDIAN: v8f16_to_v2i64:
485 ; BIGENDIAN: ld.d [[R1:\$w[0-9]+]],
486 ; BIGENDIAN: addv.d [[R3:\$w[0-9]+]], [[R2]], [[R2]]
487 ; BIGENDIAN: st.d [[R3]],
488 ; BIGENDIAN: .size v8f16_to_v2i64
490 ; We can't prevent the (bitcast (load X)) DAG Combine here because there
491 ; are no operations for v8f16 to put in the way.
492 define void @v8f16_to_v2f64(<8 x half>* %src, <2 x double>* %dst) nounwind {
494 %0 = load volatile <8 x half>, <8 x half>* %src
495 %1 = bitcast <8 x half> %0 to <2 x double>
496 %2 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %1, <2 x double> %1)
497 store <2 x double> %2, <2 x double>* %dst
501 ; LITENDIAN: v8f16_to_v2f64:
502 ; LITENDIAN: ld.d [[R1:\$w[0-9]+]],
503 ; LITENDIAN: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
504 ; LITENDIAN: st.d [[R2]],
505 ; LITENDIAN: .size v8f16_to_v2f64
507 ; BIGENDIAN: v8f16_to_v2f64:
508 ; BIGENDIAN: ld.d [[R1:\$w[0-9]+]],
509 ; BIGENDIAN: fadd.d [[R3:\$w[0-9]+]], [[R2]], [[R2]]
510 ; BIGENDIAN: st.d [[R3]],
511 ; BIGENDIAN: .size v8f16_to_v2f64
514 define void @v4i32_to_v16i8(<4 x i32>* %src, <16 x i8>* %dst) nounwind {
516 %0 = load volatile <4 x i32>, <4 x i32>* %src
517 %1 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %0, <4 x i32> %0)
518 %2 = bitcast <4 x i32> %1 to <16 x i8>
519 %3 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %2, <16 x i8> %2)
520 store <16 x i8> %3, <16 x i8>* %dst
524 ; LITENDIAN: v4i32_to_v16i8:
525 ; LITENDIAN: ld.w [[R1:\$w[0-9]+]],
526 ; LITENDIAN: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
527 ; LITENDIAN: addv.b [[R3:\$w[0-9]+]], [[R2]], [[R2]]
528 ; LITENDIAN: st.b [[R3]],
529 ; LITENDIAN: .size v4i32_to_v16i8
531 ; BIGENDIAN: v4i32_to_v16i8:
532 ; BIGENDIAN: ld.w [[R1:\$w[0-9]+]],
533 ; BIGENDIAN: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
534 ; BIGENDIAN: shf.b [[R3:\$w[0-9]+]], [[R2]], 27
535 ; BIGENDIAN: addv.b [[R4:\$w[0-9]+]], [[R3]], [[R3]]
536 ; BIGENDIAN: st.b [[R4]],
537 ; BIGENDIAN: .size v4i32_to_v16i8
539 define void @v4i32_to_v8i16(<4 x i32>* %src, <8 x i16>* %dst) nounwind {
541 %0 = load volatile <4 x i32>, <4 x i32>* %src
542 %1 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %0, <4 x i32> %0)
543 %2 = bitcast <4 x i32> %1 to <8 x i16>
544 %3 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %2, <8 x i16> %2)
545 store <8 x i16> %3, <8 x i16>* %dst
549 ; LITENDIAN: v4i32_to_v8i16:
550 ; LITENDIAN: ld.w [[R1:\$w[0-9]+]],
551 ; LITENDIAN: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
552 ; LITENDIAN: addv.h [[R3:\$w[0-9]+]], [[R2]], [[R2]]
553 ; LITENDIAN: st.h [[R3]],
554 ; LITENDIAN: .size v4i32_to_v8i16
556 ; BIGENDIAN: v4i32_to_v8i16:
557 ; BIGENDIAN: ld.w [[R1:\$w[0-9]+]],
558 ; BIGENDIAN: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
559 ; BIGENDIAN: shf.h [[R3:\$w[0-9]+]], [[R2]], 177
560 ; BIGENDIAN: addv.h [[R4:\$w[0-9]+]], [[R3]], [[R3]]
561 ; BIGENDIAN: st.h [[R4]],
562 ; BIGENDIAN: .size v4i32_to_v8i16
564 ; We can't prevent the (store (bitcast X), Y) DAG Combine here because there
565 ; are no operations for v8f16 to put in the way.
566 define void @v4i32_to_v8f16(<4 x i32>* %src, <8 x half>* %dst) nounwind {
568 %0 = load volatile <4 x i32>, <4 x i32>* %src
569 %1 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %0, <4 x i32> %0)
570 %2 = bitcast <4 x i32> %1 to <8 x half>
571 store <8 x half> %2, <8 x half>* %dst
575 ; LITENDIAN: v4i32_to_v8f16:
576 ; LITENDIAN: ld.w [[R1:\$w[0-9]+]],
577 ; LITENDIAN: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
578 ; LITENDIAN: st.w [[R2]],
579 ; LITENDIAN: .size v4i32_to_v8f16
581 ; BIGENDIAN: v4i32_to_v8f16:
582 ; BIGENDIAN: ld.w [[R1:\$w[0-9]+]],
583 ; BIGENDIAN: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
584 ; BIGENDIAN: st.w [[R2]],
585 ; BIGENDIAN: .size v4i32_to_v8f16
587 define void @v4i32_to_v4i32(<4 x i32>* %src, <4 x i32>* %dst) nounwind {
589 %0 = load volatile <4 x i32>, <4 x i32>* %src
590 %1 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %0, <4 x i32> %0)
591 %2 = bitcast <4 x i32> %1 to <4 x i32>
592 %3 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %2, <4 x i32> %2)
593 store <4 x i32> %3, <4 x i32>* %dst
597 ; LITENDIAN: v4i32_to_v4i32:
598 ; LITENDIAN: ld.w [[R1:\$w[0-9]+]],
599 ; LITENDIAN: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
600 ; LITENDIAN: addv.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
601 ; LITENDIAN: st.w [[R3]],
602 ; LITENDIAN: .size v4i32_to_v4i32
604 ; BIGENDIAN: v4i32_to_v4i32:
605 ; BIGENDIAN: ld.w [[R1:\$w[0-9]+]],
606 ; BIGENDIAN: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
607 ; BIGENDIAN: addv.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
608 ; BIGENDIAN: st.w [[R3]],
609 ; BIGENDIAN: .size v4i32_to_v4i32
611 define void @v4i32_to_v4f32(<4 x i32>* %src, <4 x float>* %dst) nounwind {
613 %0 = load volatile <4 x i32>, <4 x i32>* %src
614 %1 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %0, <4 x i32> %0)
615 %2 = bitcast <4 x i32> %1 to <4 x float>
616 %3 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %2, <4 x float> %2)
617 store <4 x float> %3, <4 x float>* %dst
621 ; LITENDIAN: v4i32_to_v4f32:
622 ; LITENDIAN: ld.w [[R1:\$w[0-9]+]],
623 ; LITENDIAN: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
624 ; LITENDIAN: fadd.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
625 ; LITENDIAN: st.w [[R3]],
626 ; LITENDIAN: .size v4i32_to_v4f32
628 ; BIGENDIAN: v4i32_to_v4f32:
629 ; BIGENDIAN: ld.w [[R1:\$w[0-9]+]],
630 ; BIGENDIAN: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
631 ; BIGENDIAN: fadd.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
632 ; BIGENDIAN: st.w [[R3]],
633 ; BIGENDIAN: .size v4i32_to_v4f32
635 define void @v4i32_to_v2i64(<4 x i32>* %src, <2 x i64>* %dst) nounwind {
637 %0 = load volatile <4 x i32>, <4 x i32>* %src
638 %1 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %0, <4 x i32> %0)
639 %2 = bitcast <4 x i32> %1 to <2 x i64>
640 %3 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %2, <2 x i64> %2)
641 store <2 x i64> %3, <2 x i64>* %dst
645 ; LITENDIAN: v4i32_to_v2i64:
646 ; LITENDIAN: ld.w [[R1:\$w[0-9]+]],
647 ; LITENDIAN: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
648 ; LITENDIAN: addv.d [[R3:\$w[0-9]+]], [[R2]], [[R2]]
649 ; LITENDIAN: st.d [[R3]],
650 ; LITENDIAN: .size v4i32_to_v2i64
652 ; BIGENDIAN: v4i32_to_v2i64:
653 ; BIGENDIAN: ld.w [[R1:\$w[0-9]+]],
654 ; BIGENDIAN: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
655 ; BIGENDIAN: shf.w [[R3:\$w[0-9]+]], [[R2]], 177
656 ; BIGENDIAN: addv.d [[R4:\$w[0-9]+]], [[R3]], [[R3]]
657 ; BIGENDIAN: st.d [[R4]],
658 ; BIGENDIAN: .size v4i32_to_v2i64
660 define void @v4i32_to_v2f64(<4 x i32>* %src, <2 x double>* %dst) nounwind {
662 %0 = load volatile <4 x i32>, <4 x i32>* %src
663 %1 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %0, <4 x i32> %0)
664 %2 = bitcast <4 x i32> %1 to <2 x double>
665 %3 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %2, <2 x double> %2)
666 store <2 x double> %3, <2 x double>* %dst
670 ; LITENDIAN: v4i32_to_v2f64:
671 ; LITENDIAN: ld.w [[R1:\$w[0-9]+]],
672 ; LITENDIAN: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
673 ; LITENDIAN: fadd.d [[R3:\$w[0-9]+]], [[R2]], [[R2]]
674 ; LITENDIAN: st.d [[R3]],
675 ; LITENDIAN: .size v4i32_to_v2f64
677 ; BIGENDIAN: v4i32_to_v2f64:
678 ; BIGENDIAN: ld.w [[R1:\$w[0-9]+]],
679 ; BIGENDIAN: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
680 ; BIGENDIAN: shf.w [[R3:\$w[0-9]+]], [[R2]], 177
681 ; BIGENDIAN: fadd.d [[R4:\$w[0-9]+]], [[R3]], [[R3]]
682 ; BIGENDIAN: st.d [[R4]],
683 ; BIGENDIAN: .size v4i32_to_v2f64
685 define void @v4f32_to_v16i8(<4 x float>* %src, <16 x i8>* %dst) nounwind {
687 %0 = load volatile <4 x float>, <4 x float>* %src
688 %1 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %0, <4 x float> %0)
689 %2 = bitcast <4 x float> %1 to <16 x i8>
690 %3 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %2, <16 x i8> %2)
691 store <16 x i8> %3, <16 x i8>* %dst
695 ; LITENDIAN: v4f32_to_v16i8:
696 ; LITENDIAN: ld.w [[R1:\$w[0-9]+]],
697 ; LITENDIAN: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
698 ; LITENDIAN: addv.b [[R3:\$w[0-9]+]], [[R2]], [[R2]]
699 ; LITENDIAN: st.b [[R3]],
700 ; LITENDIAN: .size v4f32_to_v16i8
702 ; BIGENDIAN: v4f32_to_v16i8:
703 ; BIGENDIAN: ld.w [[R1:\$w[0-9]+]],
704 ; BIGENDIAN: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
705 ; BIGENDIAN: shf.b [[R3:\$w[0-9]+]], [[R2]], 27
706 ; BIGENDIAN: addv.b [[R4:\$w[0-9]+]], [[R3]], [[R3]]
707 ; BIGENDIAN: st.b [[R4]],
708 ; BIGENDIAN: .size v4f32_to_v16i8
710 define void @v4f32_to_v8i16(<4 x float>* %src, <8 x i16>* %dst) nounwind {
712 %0 = load volatile <4 x float>, <4 x float>* %src
713 %1 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %0, <4 x float> %0)
714 %2 = bitcast <4 x float> %1 to <8 x i16>
715 %3 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %2, <8 x i16> %2)
716 store <8 x i16> %3, <8 x i16>* %dst
720 ; LITENDIAN: v4f32_to_v8i16:
721 ; LITENDIAN: ld.w [[R1:\$w[0-9]+]],
722 ; LITENDIAN: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
723 ; LITENDIAN: addv.h [[R3:\$w[0-9]+]], [[R2]], [[R2]]
724 ; LITENDIAN: st.h [[R3]],
725 ; LITENDIAN: .size v4f32_to_v8i16
727 ; BIGENDIAN: v4f32_to_v8i16:
728 ; BIGENDIAN: ld.w [[R1:\$w[0-9]+]],
729 ; BIGENDIAN: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
730 ; BIGENDIAN: shf.h [[R3:\$w[0-9]+]], [[R2]], 177
731 ; BIGENDIAN: addv.h [[R4:\$w[0-9]+]], [[R3]], [[R3]]
732 ; BIGENDIAN: st.h [[R4]],
733 ; BIGENDIAN: .size v4f32_to_v8i16
735 ; We can't prevent the (store (bitcast X), Y) DAG Combine here because there
736 ; are no operations for v8f16 to put in the way.
737 define void @v4f32_to_v8f16(<4 x float>* %src, <8 x half>* %dst) nounwind {
739 %0 = load volatile <4 x float>, <4 x float>* %src
740 %1 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %0, <4 x float> %0)
741 %2 = bitcast <4 x float> %1 to <8 x half>
742 store <8 x half> %2, <8 x half>* %dst
746 ; LITENDIAN: v4f32_to_v8f16:
747 ; LITENDIAN: ld.w [[R1:\$w[0-9]+]],
748 ; LITENDIAN: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
749 ; LITENDIAN: st.w [[R2]],
750 ; LITENDIAN: .size v4f32_to_v8f16
752 ; BIGENDIAN: v4f32_to_v8f16:
753 ; BIGENDIAN: ld.w [[R1:\$w[0-9]+]],
754 ; BIGENDIAN: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
755 ; BIGENDIAN: st.w [[R2]],
756 ; BIGENDIAN: .size v4f32_to_v8f16
758 define void @v4f32_to_v4i32(<4 x float>* %src, <4 x i32>* %dst) nounwind {
760 %0 = load volatile <4 x float>, <4 x float>* %src
761 %1 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %0, <4 x float> %0)
762 %2 = bitcast <4 x float> %1 to <4 x i32>
763 %3 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %2, <4 x i32> %2)
764 store <4 x i32> %3, <4 x i32>* %dst
768 ; LITENDIAN: v4f32_to_v4i32:
769 ; LITENDIAN: ld.w [[R1:\$w[0-9]+]],
770 ; LITENDIAN: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
771 ; LITENDIAN: addv.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
772 ; LITENDIAN: st.w [[R3]],
773 ; LITENDIAN: .size v4f32_to_v4i32
775 ; BIGENDIAN: v4f32_to_v4i32:
776 ; BIGENDIAN: ld.w [[R1:\$w[0-9]+]],
777 ; BIGENDIAN: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
778 ; BIGENDIAN: addv.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
779 ; BIGENDIAN: st.w [[R3]],
780 ; BIGENDIAN: .size v4f32_to_v4i32
782 define void @v4f32_to_v4f32(<4 x float>* %src, <4 x float>* %dst) nounwind {
784 %0 = load volatile <4 x float>, <4 x float>* %src
785 %1 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %0, <4 x float> %0)
786 %2 = bitcast <4 x float> %1 to <4 x float>
787 %3 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %2, <4 x float> %2)
788 store <4 x float> %3, <4 x float>* %dst
792 ; LITENDIAN: v4f32_to_v4f32:
793 ; LITENDIAN: ld.w [[R1:\$w[0-9]+]],
794 ; LITENDIAN: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
795 ; LITENDIAN: fadd.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
796 ; LITENDIAN: st.w [[R3]],
797 ; LITENDIAN: .size v4f32_to_v4f32
799 ; BIGENDIAN: v4f32_to_v4f32:
800 ; BIGENDIAN: ld.w [[R1:\$w[0-9]+]],
801 ; BIGENDIAN: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
802 ; BIGENDIAN: fadd.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
803 ; BIGENDIAN: st.w [[R3]],
804 ; BIGENDIAN: .size v4f32_to_v4f32
806 define void @v4f32_to_v2i64(<4 x float>* %src, <2 x i64>* %dst) nounwind {
808 %0 = load volatile <4 x float>, <4 x float>* %src
809 %1 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %0, <4 x float> %0)
810 %2 = bitcast <4 x float> %1 to <2 x i64>
811 %3 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %2, <2 x i64> %2)
812 store <2 x i64> %3, <2 x i64>* %dst
816 ; LITENDIAN: v4f32_to_v2i64:
817 ; LITENDIAN: ld.w [[R1:\$w[0-9]+]],
818 ; LITENDIAN: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
819 ; LITENDIAN: addv.d [[R3:\$w[0-9]+]], [[R2]], [[R2]]
820 ; LITENDIAN: st.d [[R3]],
821 ; LITENDIAN: .size v4f32_to_v2i64
823 ; BIGENDIAN: v4f32_to_v2i64:
824 ; BIGENDIAN: ld.w [[R1:\$w[0-9]+]],
825 ; BIGENDIAN: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
826 ; BIGENDIAN: shf.w [[R3:\$w[0-9]+]], [[R2]], 177
827 ; BIGENDIAN: addv.d [[R4:\$w[0-9]+]], [[R3]], [[R3]]
828 ; BIGENDIAN: st.d [[R4]],
829 ; BIGENDIAN: .size v4f32_to_v2i64
831 define void @v4f32_to_v2f64(<4 x float>* %src, <2 x double>* %dst) nounwind {
833 %0 = load volatile <4 x float>, <4 x float>* %src
834 %1 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %0, <4 x float> %0)
835 %2 = bitcast <4 x float> %1 to <2 x double>
836 %3 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %2, <2 x double> %2)
837 store <2 x double> %3, <2 x double>* %dst
841 ; LITENDIAN: v4f32_to_v2f64:
842 ; LITENDIAN: ld.w [[R1:\$w[0-9]+]],
843 ; LITENDIAN: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
844 ; LITENDIAN: fadd.d [[R3:\$w[0-9]+]], [[R2]], [[R2]]
845 ; LITENDIAN: st.d [[R3]],
846 ; LITENDIAN: .size v4f32_to_v2f64
848 ; BIGENDIAN: v4f32_to_v2f64:
849 ; BIGENDIAN: ld.w [[R1:\$w[0-9]+]],
850 ; BIGENDIAN: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
851 ; BIGENDIAN: shf.w [[R3:\$w[0-9]+]], [[R2]], 177
852 ; BIGENDIAN: fadd.d [[R4:\$w[0-9]+]], [[R3]], [[R3]]
853 ; BIGENDIAN: st.d [[R4]],
854 ; BIGENDIAN: .size v4f32_to_v2f64
856 define void @v2i64_to_v16i8(<2 x i64>* %src, <16 x i8>* %dst) nounwind {
858 %0 = load volatile <2 x i64>, <2 x i64>* %src
859 %1 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %0, <2 x i64> %0)
860 %2 = bitcast <2 x i64> %1 to <16 x i8>
861 %3 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %2, <16 x i8> %2)
862 store <16 x i8> %3, <16 x i8>* %dst
866 ; LITENDIAN: v2i64_to_v16i8:
867 ; LITENDIAN: ld.d [[R1:\$w[0-9]+]],
868 ; LITENDIAN: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
869 ; LITENDIAN: addv.b [[R3:\$w[0-9]+]], [[R2]], [[R2]]
870 ; LITENDIAN: st.b [[R3]],
871 ; LITENDIAN: .size v2i64_to_v16i8
873 ; BIGENDIAN: v2i64_to_v16i8:
874 ; BIGENDIAN: ld.d [[R1:\$w[0-9]+]],
875 ; BIGENDIAN: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
876 ; BIGENDIAN: shf.b [[R3:\$w[0-9]+]], [[R2]], 27
877 ; BIGENDIAN: shf.w [[R3:\$w[0-9]+]], [[R3]], 177
878 ; BIGENDIAN: addv.b [[R4:\$w[0-9]+]], [[R3]], [[R3]]
879 ; BIGENDIAN: st.b [[R4]],
880 ; BIGENDIAN: .size v2i64_to_v16i8
882 define void @v2i64_to_v8i16(<2 x i64>* %src, <8 x i16>* %dst) nounwind {
884 %0 = load volatile <2 x i64>, <2 x i64>* %src
885 %1 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %0, <2 x i64> %0)
886 %2 = bitcast <2 x i64> %1 to <8 x i16>
887 %3 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %2, <8 x i16> %2)
888 store <8 x i16> %3, <8 x i16>* %dst
892 ; LITENDIAN: v2i64_to_v8i16:
893 ; LITENDIAN: ld.d [[R1:\$w[0-9]+]],
894 ; LITENDIAN: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
895 ; LITENDIAN: addv.h [[R3:\$w[0-9]+]], [[R2]], [[R2]]
896 ; LITENDIAN: st.h [[R3]],
897 ; LITENDIAN: .size v2i64_to_v8i16
899 ; BIGENDIAN: v2i64_to_v8i16:
900 ; BIGENDIAN: ld.d [[R1:\$w[0-9]+]],
901 ; BIGENDIAN: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
902 ; BIGENDIAN: shf.h [[R3:\$w[0-9]+]], [[R2]], 27
903 ; BIGENDIAN: addv.h [[R4:\$w[0-9]+]], [[R3]], [[R3]]
904 ; BIGENDIAN: st.h [[R4]],
905 ; BIGENDIAN: .size v2i64_to_v8i16
907 ; We can't prevent the (store (bitcast X), Y) DAG Combine here because there
908 ; are no operations for v8f16 to put in the way.
909 define void @v2i64_to_v8f16(<2 x i64>* %src, <8 x half>* %dst) nounwind {
911 %0 = load volatile <2 x i64>, <2 x i64>* %src
912 %1 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %0, <2 x i64> %0)
913 %2 = bitcast <2 x i64> %1 to <8 x half>
914 store <8 x half> %2, <8 x half>* %dst
918 ; LITENDIAN: v2i64_to_v8f16:
919 ; LITENDIAN: ld.d [[R1:\$w[0-9]+]],
920 ; LITENDIAN: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
921 ; LITENDIAN: st.d [[R2]],
922 ; LITENDIAN: .size v2i64_to_v8f16
924 ; BIGENDIAN: v2i64_to_v8f16:
925 ; BIGENDIAN: ld.d [[R1:\$w[0-9]+]],
926 ; BIGENDIAN: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
927 ; BIGENDIAN: st.d [[R2]],
928 ; BIGENDIAN: .size v2i64_to_v8f16
930 define void @v2i64_to_v4i32(<2 x i64>* %src, <4 x i32>* %dst) nounwind {
932 %0 = load volatile <2 x i64>, <2 x i64>* %src
933 %1 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %0, <2 x i64> %0)
934 %2 = bitcast <2 x i64> %1 to <4 x i32>
935 %3 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %2, <4 x i32> %2)
936 store <4 x i32> %3, <4 x i32>* %dst
940 ; LITENDIAN: v2i64_to_v4i32:
941 ; LITENDIAN: ld.d [[R1:\$w[0-9]+]],
942 ; LITENDIAN: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
943 ; LITENDIAN: addv.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
944 ; LITENDIAN: st.w [[R3]],
945 ; LITENDIAN: .size v2i64_to_v4i32
947 ; BIGENDIAN: v2i64_to_v4i32:
948 ; BIGENDIAN: ld.d [[R1:\$w[0-9]+]],
949 ; BIGENDIAN: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
950 ; BIGENDIAN: shf.w [[R3:\$w[0-9]+]], [[R2]], 177
951 ; BIGENDIAN: addv.w [[R4:\$w[0-9]+]], [[R3]], [[R3]]
952 ; BIGENDIAN: st.w [[R4]],
953 ; BIGENDIAN: .size v2i64_to_v4i32
955 define void @v2i64_to_v4f32(<2 x i64>* %src, <4 x float>* %dst) nounwind {
957 %0 = load volatile <2 x i64>, <2 x i64>* %src
958 %1 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %0, <2 x i64> %0)
959 %2 = bitcast <2 x i64> %1 to <4 x float>
960 %3 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %2, <4 x float> %2)
961 store <4 x float> %3, <4 x float>* %dst
965 ; LITENDIAN: v2i64_to_v4f32:
966 ; LITENDIAN: ld.d [[R1:\$w[0-9]+]],
967 ; LITENDIAN: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
968 ; LITENDIAN: fadd.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
969 ; LITENDIAN: st.w [[R3]],
970 ; LITENDIAN: .size v2i64_to_v4f32
972 ; BIGENDIAN: v2i64_to_v4f32:
973 ; BIGENDIAN: ld.d [[R1:\$w[0-9]+]],
974 ; BIGENDIAN: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
975 ; BIGENDIAN: shf.w [[R3:\$w[0-9]+]], [[R2]], 177
976 ; BIGENDIAN: fadd.w [[R4:\$w[0-9]+]], [[R3]], [[R3]]
977 ; BIGENDIAN: st.w [[R4]],
978 ; BIGENDIAN: .size v2i64_to_v4f32
980 define void @v2i64_to_v2i64(<2 x i64>* %src, <2 x i64>* %dst) nounwind {
982 %0 = load volatile <2 x i64>, <2 x i64>* %src
983 %1 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %0, <2 x i64> %0)
984 %2 = bitcast <2 x i64> %1 to <2 x i64>
985 %3 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %2, <2 x i64> %2)
986 store <2 x i64> %3, <2 x i64>* %dst
990 ; LITENDIAN: v2i64_to_v2i64:
991 ; LITENDIAN: ld.d [[R1:\$w[0-9]+]],
992 ; LITENDIAN: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
993 ; LITENDIAN: addv.d [[R3:\$w[0-9]+]], [[R2]], [[R2]]
994 ; LITENDIAN: st.d [[R3]],
995 ; LITENDIAN: .size v2i64_to_v2i64
997 ; BIGENDIAN: v2i64_to_v2i64:
998 ; BIGENDIAN: ld.d [[R1:\$w[0-9]+]],
999 ; BIGENDIAN: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
1000 ; BIGENDIAN: addv.d [[R3:\$w[0-9]+]], [[R2]], [[R2]]
1001 ; BIGENDIAN: st.d [[R3]],
1002 ; BIGENDIAN: .size v2i64_to_v2i64
1004 define void @v2i64_to_v2f64(<2 x i64>* %src, <2 x double>* %dst) nounwind {
1006 %0 = load volatile <2 x i64>, <2 x i64>* %src
1007 %1 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %0, <2 x i64> %0)
1008 %2 = bitcast <2 x i64> %1 to <2 x double>
1009 %3 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %2, <2 x double> %2)
1010 store <2 x double> %3, <2 x double>* %dst
1014 ; LITENDIAN: v2i64_to_v2f64:
1015 ; LITENDIAN: ld.d [[R1:\$w[0-9]+]],
1016 ; LITENDIAN: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
1017 ; LITENDIAN: fadd.d [[R3:\$w[0-9]+]], [[R2]], [[R2]]
1018 ; LITENDIAN: st.d [[R3]],
1019 ; LITENDIAN: .size v2i64_to_v2f64
1021 ; BIGENDIAN: v2i64_to_v2f64:
1022 ; BIGENDIAN: ld.d [[R1:\$w[0-9]+]],
1023 ; BIGENDIAN: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
1024 ; BIGENDIAN: fadd.d [[R3:\$w[0-9]+]], [[R2]], [[R2]]
1025 ; BIGENDIAN: st.d [[R3]],
1026 ; BIGENDIAN: .size v2i64_to_v2f64
1028 define void @v2f64_to_v16i8(<2 x double>* %src, <16 x i8>* %dst) nounwind {
1030 %0 = load volatile <2 x double>, <2 x double>* %src
1031 %1 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %0, <2 x double> %0)
1032 %2 = bitcast <2 x double> %1 to <16 x i8>
1033 %3 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %2, <16 x i8> %2)
1034 store <16 x i8> %3, <16 x i8>* %dst
1038 ; LITENDIAN: v2f64_to_v16i8:
1039 ; LITENDIAN: ld.d [[R1:\$w[0-9]+]],
1040 ; LITENDIAN: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
1041 ; LITENDIAN: addv.b [[R3:\$w[0-9]+]], [[R2]], [[R2]]
1042 ; LITENDIAN: st.b [[R3]],
1043 ; LITENDIAN: .size v2f64_to_v16i8
1045 ; BIGENDIAN: v2f64_to_v16i8:
1046 ; BIGENDIAN: ld.d [[R1:\$w[0-9]+]],
1047 ; BIGENDIAN: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
1048 ; BIGENDIAN: shf.b [[R3:\$w[0-9]+]], [[R2]], 27
1049 ; BIGENDIAN: shf.w [[R3:\$w[0-9]+]], [[R3]], 177
1050 ; BIGENDIAN: addv.b [[R4:\$w[0-9]+]], [[R3]], [[R3]]
1051 ; BIGENDIAN: st.b [[R4]],
1052 ; BIGENDIAN: .size v2f64_to_v16i8
1054 define void @v2f64_to_v8i16(<2 x double>* %src, <8 x i16>* %dst) nounwind {
1056 %0 = load volatile <2 x double>, <2 x double>* %src
1057 %1 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %0, <2 x double> %0)
1058 %2 = bitcast <2 x double> %1 to <8 x i16>
1059 %3 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %2, <8 x i16> %2)
1060 store <8 x i16> %3, <8 x i16>* %dst
1064 ; LITENDIAN: v2f64_to_v8i16:
1065 ; LITENDIAN: ld.d [[R1:\$w[0-9]+]],
1066 ; LITENDIAN: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
1067 ; LITENDIAN: addv.h [[R3:\$w[0-9]+]], [[R2]], [[R2]]
1068 ; LITENDIAN: st.h [[R3]],
1069 ; LITENDIAN: .size v2f64_to_v8i16
1071 ; BIGENDIAN: v2f64_to_v8i16:
1072 ; BIGENDIAN: ld.d [[R1:\$w[0-9]+]],
1073 ; BIGENDIAN: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
1074 ; BIGENDIAN: shf.h [[R3:\$w[0-9]+]], [[R2]], 27
1075 ; BIGENDIAN: addv.h [[R4:\$w[0-9]+]], [[R3]], [[R3]]
1076 ; BIGENDIAN: st.h [[R4]],
1077 ; BIGENDIAN: .size v2f64_to_v8i16
1079 ; We can't prevent the (store (bitcast X), Y) DAG Combine here because there
1080 ; are no operations for v8f16 to put in the way.
1081 define void @v2f64_to_v8f16(<2 x double>* %src, <8 x half>* %dst) nounwind {
1083 %0 = load volatile <2 x double>, <2 x double>* %src
1084 %1 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %0, <2 x double> %0)
1085 %2 = bitcast <2 x double> %1 to <8 x half>
1086 store <8 x half> %2, <8 x half>* %dst
1090 ; LITENDIAN: v2f64_to_v8f16:
1091 ; LITENDIAN: ld.d [[R1:\$w[0-9]+]],
1092 ; LITENDIAN: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
1093 ; LITENDIAN: st.d [[R2]],
1094 ; LITENDIAN: .size v2f64_to_v8f16
1096 ; BIGENDIAN: v2f64_to_v8f16:
1097 ; BIGENDIAN: ld.d [[R1:\$w[0-9]+]],
1098 ; BIGENDIAN: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
1099 ; BIGENDIAN: st.d [[R2]],
1100 ; BIGENDIAN: .size v2f64_to_v8f16
1102 define void @v2f64_to_v4i32(<2 x double>* %src, <4 x i32>* %dst) nounwind {
1104 %0 = load volatile <2 x double>, <2 x double>* %src
1105 %1 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %0, <2 x double> %0)
1106 %2 = bitcast <2 x double> %1 to <4 x i32>
1107 %3 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %2, <4 x i32> %2)
1108 store <4 x i32> %3, <4 x i32>* %dst
1112 ; LITENDIAN: v2f64_to_v4i32:
1113 ; LITENDIAN: ld.d [[R1:\$w[0-9]+]],
1114 ; LITENDIAN: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
1115 ; LITENDIAN: addv.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
1116 ; LITENDIAN: st.w [[R3]],
1117 ; LITENDIAN: .size v2f64_to_v4i32
1119 ; BIGENDIAN: v2f64_to_v4i32:
1120 ; BIGENDIAN: ld.d [[R1:\$w[0-9]+]],
1121 ; BIGENDIAN: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
1122 ; BIGENDIAN: shf.w [[R3:\$w[0-9]+]], [[R2]], 177
1123 ; BIGENDIAN: addv.w [[R4:\$w[0-9]+]], [[R3]], [[R3]]
1124 ; BIGENDIAN: st.w [[R4]],
1125 ; BIGENDIAN: .size v2f64_to_v4i32
1127 define void @v2f64_to_v4f32(<2 x double>* %src, <4 x float>* %dst) nounwind {
1129 %0 = load volatile <2 x double>, <2 x double>* %src
1130 %1 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %0, <2 x double> %0)
1131 %2 = bitcast <2 x double> %1 to <4 x float>
1132 %3 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %2, <4 x float> %2)
1133 store <4 x float> %3, <4 x float>* %dst
1137 ; LITENDIAN: v2f64_to_v4f32:
1138 ; LITENDIAN: ld.d [[R1:\$w[0-9]+]],
1139 ; LITENDIAN: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
1140 ; LITENDIAN: fadd.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
1141 ; LITENDIAN: st.w [[R3]],
1142 ; LITENDIAN: .size v2f64_to_v4f32
1144 ; BIGENDIAN: v2f64_to_v4f32:
1145 ; BIGENDIAN: ld.d [[R1:\$w[0-9]+]],
1146 ; BIGENDIAN: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
1147 ; BIGENDIAN: shf.w [[R3:\$w[0-9]+]], [[R2]], 177
1148 ; BIGENDIAN: fadd.w [[R4:\$w[0-9]+]], [[R3]], [[R3]]
1149 ; BIGENDIAN: st.w [[R4]],
1150 ; BIGENDIAN: .size v2f64_to_v4f32
1152 define void @v2f64_to_v2i64(<2 x double>* %src, <2 x i64>* %dst) nounwind {
1154 %0 = load volatile <2 x double>, <2 x double>* %src
1155 %1 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %0, <2 x double> %0)
1156 %2 = bitcast <2 x double> %1 to <2 x i64>
1157 %3 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %2, <2 x i64> %2)
1158 store <2 x i64> %3, <2 x i64>* %dst
1162 ; LITENDIAN: v2f64_to_v2i64:
1163 ; LITENDIAN: ld.d [[R1:\$w[0-9]+]],
1164 ; LITENDIAN: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
1165 ; LITENDIAN: addv.d [[R3:\$w[0-9]+]], [[R2]], [[R2]]
1166 ; LITENDIAN: st.d [[R3]],
1167 ; LITENDIAN: .size v2f64_to_v2i64
1169 ; BIGENDIAN: v2f64_to_v2i64:
1170 ; BIGENDIAN: ld.d [[R1:\$w[0-9]+]],
1171 ; BIGENDIAN: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
1172 ; BIGENDIAN: addv.d [[R3:\$w[0-9]+]], [[R2]], [[R2]]
1173 ; BIGENDIAN: st.d [[R3]],
1174 ; BIGENDIAN: .size v2f64_to_v2i64
1176 define void @v2f64_to_v2f64(<2 x double>* %src, <2 x double>* %dst) nounwind {
1178 %0 = load volatile <2 x double>, <2 x double>* %src
1179 %1 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %0, <2 x double> %0)
1180 %2 = bitcast <2 x double> %1 to <2 x double>
1181 %3 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %2, <2 x double> %2)
1182 store <2 x double> %3, <2 x double>* %dst
1186 ; LITENDIAN: v2f64_to_v2f64:
1187 ; LITENDIAN: ld.d [[R1:\$w[0-9]+]],
1188 ; LITENDIAN: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
1189 ; LITENDIAN: fadd.d [[R3:\$w[0-9]+]], [[R2]], [[R2]]
1190 ; LITENDIAN: st.d [[R3]],
1191 ; LITENDIAN: .size v2f64_to_v2f64
1193 ; BIGENDIAN: v2f64_to_v2f64:
1194 ; BIGENDIAN: ld.d [[R1:\$w[0-9]+]],
1195 ; BIGENDIAN: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
1196 ; BIGENDIAN: fadd.d [[R3:\$w[0-9]+]], [[R2]], [[R2]]
1197 ; BIGENDIAN: st.d [[R3]],
1198 ; BIGENDIAN: .size v2f64_to_v2f64
1200 declare <16 x i8> @llvm.mips.addv.b(<16 x i8>, <16 x i8>) nounwind
1201 declare <8 x i16> @llvm.mips.addv.h(<8 x i16>, <8 x i16>) nounwind
1202 declare <4 x i32> @llvm.mips.addv.w(<4 x i32>, <4 x i32>) nounwind
1203 declare <2 x i64> @llvm.mips.addv.d(<2 x i64>, <2 x i64>) nounwind
1204 declare <4 x float> @llvm.mips.fadd.w(<4 x float>, <4 x float>) nounwind
1205 declare <2 x double> @llvm.mips.fadd.d(<2 x double>, <2 x double>) nounwind