1 ; RUN: llc -verify-machineinstrs < %s | FileCheck %s
2 target datalayout = "E-m:e-i64:64-n32:64"
3 target triple = "powerpc64-unknown-linux-gnu"
5 define <16 x i8> @test_l_v16i8(<16 x i8>* %p) #0 {
7 %r = load <16 x i8>, <16 x i8>* %p, align 1
10 ; CHECK-LABEL: @test_l_v16i8
11 ; CHECK-DAG: li [[REG1:[0-9]+]], 15
12 ; CHECK-DAG: lvsl [[REG2:[0-9]+]], 0, 3
13 ; CHECK-DAG: lvx [[REG3:[0-9]+]], 3, [[REG1]]
14 ; CHECK-DAG: lvx [[REG4:[0-9]+]], 0, 3
15 ; CHECK: vperm 2, [[REG4]], [[REG3]], [[REG2]]
19 define <32 x i8> @test_l_v32i8(<32 x i8>* %p) #0 {
21 %r = load <32 x i8>, <32 x i8>* %p, align 1
24 ; CHECK-LABEL: @test_l_v32i8
25 ; CHECK-DAG: li [[REG1:[0-9]+]], 31
26 ; CHECK-DAG: li [[REG2:[0-9]+]], 16
27 ; CHECK-DAG: lvsl [[REG3:[0-9]+]], 0, 3
28 ; CHECK-DAG: lvx [[REG4:[0-9]+]], 3, [[REG1]]
29 ; CHECK-DAG: lvx [[REG5:[0-9]+]], 3, [[REG2]]
30 ; CHECK-DAG: lvx [[REG6:[0-9]+]], 0, 3
31 ; CHECK-DAG: vperm 3, {{[0-9]+}}, {{[0-9]+}}, [[REG3]]
32 ; CHECK-DAG: vperm 2, {{[0-9]+}}, {{[0-9]+}}, [[REG3]]
36 define <8 x i16> @test_l_v8i16(<8 x i16>* %p) #0 {
38 %r = load <8 x i16>, <8 x i16>* %p, align 2
41 ; CHECK-LABEL: @test_l_v8i16
42 ; CHECK-DAG: li [[REG1:[0-9]+]], 15
43 ; CHECK-DAG: lvsl [[REG2:[0-9]+]], 0, 3
44 ; CHECK-DAG: lvx [[REG3:[0-9]+]], 3, [[REG1]]
45 ; CHECK-DAG: lvx [[REG4:[0-9]+]], 0, 3
46 ; CHECK: vperm 2, [[REG4]], [[REG3]], [[REG2]]
50 define <16 x i16> @test_l_v16i16(<16 x i16>* %p) #0 {
52 %r = load <16 x i16>, <16 x i16>* %p, align 2
55 ; CHECK-LABEL: @test_l_v16i16
56 ; CHECK-DAG: li [[REG1:[0-9]+]], 31
57 ; CHECK-DAG: li [[REG2:[0-9]+]], 16
58 ; CHECK-DAG: lvsl [[REG3:[0-9]+]], 0, 3
59 ; CHECK-DAG: lvx [[REG4:[0-9]+]], 3, [[REG1]]
60 ; CHECK-DAG: lvx [[REG5:[0-9]+]], 3, [[REG2]]
61 ; CHECK-DAG: lvx [[REG6:[0-9]+]], 0, 3
62 ; CHECK-DAG: vperm 3, {{[0-9]+}}, {{[0-9]+}}, [[REG3]]
63 ; CHECK-DAG: vperm 2, {{[0-9]+}}, {{[0-9]+}}, [[REG3]]
67 define <4 x i32> @test_l_v4i32(<4 x i32>* %p) #0 {
69 %r = load <4 x i32>, <4 x i32>* %p, align 4
72 ; CHECK-LABEL: @test_l_v4i32
73 ; CHECK-DAG: li [[REG1:[0-9]+]], 15
74 ; CHECK-DAG: lvsl [[REG2:[0-9]+]], 0, 3
75 ; CHECK-DAG: lvx [[REG3:[0-9]+]], 3, [[REG1]]
76 ; CHECK-DAG: lvx [[REG4:[0-9]+]], 0, 3
77 ; CHECK: vperm 2, [[REG4]], [[REG3]], [[REG2]]
81 define <8 x i32> @test_l_v8i32(<8 x i32>* %p) #0 {
83 %r = load <8 x i32>, <8 x i32>* %p, align 4
86 ; CHECK-LABEL: @test_l_v8i32
87 ; CHECK-DAG: li [[REG1:[0-9]+]], 31
88 ; CHECK-DAG: li [[REG2:[0-9]+]], 16
89 ; CHECK-DAG: lvsl [[REG3:[0-9]+]], 0, 3
90 ; CHECK-DAG: lvx [[REG4:[0-9]+]], 3, [[REG1]]
91 ; CHECK-DAG: lvx [[REG5:[0-9]+]], 3, [[REG2]]
92 ; CHECK-DAG: lvx [[REG6:[0-9]+]], 0, 3
93 ; CHECK-DAG: vperm 3, {{[0-9]+}}, {{[0-9]+}}, [[REG3]]
94 ; CHECK-DAG: vperm 2, {{[0-9]+}}, {{[0-9]+}}, [[REG3]]
98 define <2 x i64> @test_l_v2i64(<2 x i64>* %p) #0 {
100 %r = load <2 x i64>, <2 x i64>* %p, align 8
103 ; CHECK-LABEL: @test_l_v2i64
104 ; CHECK: lxvd2x 34, 0, 3
108 define <4 x i64> @test_l_v4i64(<4 x i64>* %p) #0 {
110 %r = load <4 x i64>, <4 x i64>* %p, align 8
113 ; CHECK-LABEL: @test_l_v4i64
114 ; CHECK-DAG: li [[REG1:[0-9]+]], 16
115 ; CHECK-DAG: lxvd2x 34, 0, 3
116 ; CHECK-DAG: lxvd2x 35, 3, [[REG1]]
120 define <4 x float> @test_l_v4float(<4 x float>* %p) #0 {
122 %r = load <4 x float>, <4 x float>* %p, align 4
125 ; CHECK-LABEL: @test_l_v4float
126 ; CHECK-DAG: li [[REG1:[0-9]+]], 15
127 ; CHECK-DAG: lvsl [[REG2:[0-9]+]], 0, 3
128 ; CHECK-DAG: lvx [[REG3:[0-9]+]], 3, [[REG1]]
129 ; CHECK-DAG: lvx [[REG4:[0-9]+]], 0, 3
130 ; CHECK: vperm 2, [[REG4]], [[REG3]], [[REG2]]
134 define <8 x float> @test_l_v8float(<8 x float>* %p) #0 {
136 %r = load <8 x float>, <8 x float>* %p, align 4
139 ; CHECK-LABEL: @test_l_v8float
140 ; CHECK-DAG: li [[REG1:[0-9]+]], 31
141 ; CHECK-DAG: li [[REG2:[0-9]+]], 16
142 ; CHECK-DAG: lvsl [[REG3:[0-9]+]], 0, 3
143 ; CHECK-DAG: lvx [[REG4:[0-9]+]], 3, [[REG1]]
144 ; CHECK-DAG: lvx [[REG5:[0-9]+]], 3, [[REG2]]
145 ; CHECK-DAG: lvx [[REG6:[0-9]+]], 0, 3
146 ; CHECK-DAG: vperm 3, {{[0-9]+}}, {{[0-9]+}}, [[REG3]]
147 ; CHECK-DAG: vperm 2, {{[0-9]+}}, {{[0-9]+}}, [[REG3]]
151 define <2 x double> @test_l_v2double(<2 x double>* %p) #0 {
153 %r = load <2 x double>, <2 x double>* %p, align 8
156 ; CHECK-LABEL: @test_l_v2double
157 ; CHECK: lxvd2x 34, 0, 3
161 define <4 x double> @test_l_v4double(<4 x double>* %p) #0 {
163 %r = load <4 x double>, <4 x double>* %p, align 8
166 ; CHECK-LABEL: @test_l_v4double
167 ; CHECK-DAG: li [[REG1:[0-9]+]], 16
168 ; CHECK-DAG: lxvd2x 34, 0, 3
169 ; CHECK-DAG: lxvd2x 35, 3, [[REG1]]
173 define <16 x i8> @test_l_p8v16i8(<16 x i8>* %p) #2 {
175 %r = load <16 x i8>, <16 x i8>* %p, align 1
178 ; CHECK-LABEL: @test_l_p8v16i8
179 ; CHECK: lxvw4x 34, 0, 3
183 define <32 x i8> @test_l_p8v32i8(<32 x i8>* %p) #2 {
185 %r = load <32 x i8>, <32 x i8>* %p, align 1
188 ; CHECK-LABEL: @test_l_p8v32i8
189 ; CHECK-DAG: li [[REG1:[0-9]+]], 16
190 ; CHECK-DAG: lxvw4x 34, 0, 3
191 ; CHECK-DAG: lxvw4x 35, 3, [[REG1]]
195 define <8 x i16> @test_l_p8v8i16(<8 x i16>* %p) #2 {
197 %r = load <8 x i16>, <8 x i16>* %p, align 2
200 ; CHECK-LABEL: @test_l_p8v8i16
201 ; CHECK: lxvw4x 34, 0, 3
205 define <16 x i16> @test_l_p8v16i16(<16 x i16>* %p) #2 {
207 %r = load <16 x i16>, <16 x i16>* %p, align 2
210 ; CHECK-LABEL: @test_l_p8v16i16
211 ; CHECK-DAG: li [[REG1:[0-9]+]], 16
212 ; CHECK-DAG: lxvw4x 34, 0, 3
213 ; CHECK-DAG: lxvw4x 35, 3, [[REG1]]
217 define <4 x i32> @test_l_p8v4i32(<4 x i32>* %p) #2 {
219 %r = load <4 x i32>, <4 x i32>* %p, align 4
222 ; CHECK-LABEL: @test_l_p8v4i32
223 ; CHECK: lxvw4x 34, 0, 3
227 define <8 x i32> @test_l_p8v8i32(<8 x i32>* %p) #2 {
229 %r = load <8 x i32>, <8 x i32>* %p, align 4
232 ; CHECK-LABEL: @test_l_p8v8i32
233 ; CHECK-DAG: li [[REG1:[0-9]+]], 16
234 ; CHECK-DAG: lxvw4x 34, 0, 3
235 ; CHECK-DAG: lxvw4x 35, 3, [[REG1]]
239 define <2 x i64> @test_l_p8v2i64(<2 x i64>* %p) #2 {
241 %r = load <2 x i64>, <2 x i64>* %p, align 8
244 ; CHECK-LABEL: @test_l_p8v2i64
245 ; CHECK: lxvd2x 34, 0, 3
249 define <4 x i64> @test_l_p8v4i64(<4 x i64>* %p) #2 {
251 %r = load <4 x i64>, <4 x i64>* %p, align 8
254 ; CHECK-LABEL: @test_l_p8v4i64
255 ; CHECK-DAG: li [[REG1:[0-9]+]], 16
256 ; CHECK-DAG: lxvd2x 34, 0, 3
257 ; CHECK-DAG: lxvd2x 35, 3, [[REG1]]
261 define <4 x float> @test_l_p8v4float(<4 x float>* %p) #2 {
263 %r = load <4 x float>, <4 x float>* %p, align 4
266 ; CHECK-LABEL: @test_l_p8v4float
267 ; CHECK: lxvw4x 34, 0, 3
271 define <8 x float> @test_l_p8v8float(<8 x float>* %p) #2 {
273 %r = load <8 x float>, <8 x float>* %p, align 4
276 ; CHECK-LABEL: @test_l_p8v8float
277 ; CHECK-DAG: li [[REG1:[0-9]+]], 16
278 ; CHECK-DAG: lxvw4x 34, 0, 3
279 ; CHECK-DAG: lxvw4x 35, 3, [[REG1]]
283 define <2 x double> @test_l_p8v2double(<2 x double>* %p) #2 {
285 %r = load <2 x double>, <2 x double>* %p, align 8
288 ; CHECK-LABEL: @test_l_p8v2double
289 ; CHECK: lxvd2x 34, 0, 3
293 define <4 x double> @test_l_p8v4double(<4 x double>* %p) #2 {
295 %r = load <4 x double>, <4 x double>* %p, align 8
298 ; CHECK-LABEL: @test_l_p8v4double
299 ; CHECK-DAG: li [[REG1:[0-9]+]], 16
300 ; CHECK-DAG: lxvd2x 34, 0, 3
301 ; CHECK-DAG: lxvd2x 35, 3, [[REG1]]
305 define <4 x float> @test_l_qv4float(<4 x float>* %p) #1 {
307 %r = load <4 x float>, <4 x float>* %p, align 4
310 ; CHECK-LABEL: @test_l_qv4float
311 ; CHECK-DAG: li [[REG1:[0-9]+]], 15
312 ; CHECK-DAG: qvlpclsx 0, 0, 3
313 ; CHECK-DAG: qvlfsx [[REG2:[0-9]+]], 3, [[REG1]]
314 ; CHECK-DAG: qvlfsx [[REG3:[0-9]+]], 0, 3
315 ; CHECK: qvfperm 1, [[REG3]], [[REG2]], 0
319 define <8 x float> @test_l_qv8float(<8 x float>* %p) #1 {
321 %r = load <8 x float>, <8 x float>* %p, align 4
324 ; CHECK-LABEL: @test_l_qv8float
325 ; CHECK-DAG: li [[REG1:[0-9]+]], 31
326 ; CHECK-DAG: li [[REG2:[0-9]+]], 16
327 ; CHECK-DAG: qvlfsx [[REG3:[0-9]+]], 3, [[REG1]]
328 ; CHECK-DAG: qvlfsx [[REG4:[0-9]+]], 3, [[REG2]]
329 ; CHECK-DAG: qvlpclsx [[REG5:[0-5]+]], 0, 3
330 ; CHECK-DAG: qvlfsx [[REG6:[0-9]+]], 0, 3
331 ; CHECK-DAG: qvfperm 2, {{[0-9]+}}, {{[0-9]+}}, [[REG5]]
332 ; CHECK-DAG: qvfperm 1, {{[0-9]+}}, {{[0-9]+}}, [[REG5]]
336 define <4 x double> @test_l_qv4double(<4 x double>* %p) #1 {
338 %r = load <4 x double>, <4 x double>* %p, align 8
341 ; CHECK-LABEL: @test_l_qv4double
342 ; CHECK-DAG: li [[REG1:[0-9]+]], 31
343 ; CHECK-DAG: qvlpcldx 0, 0, 3
344 ; CHECK-DAG: qvlfdx [[REG2:[0-9]+]], 3, [[REG1]]
345 ; CHECK-DAG: qvlfdx [[REG3:[0-9]+]], 0, 3
346 ; CHECK: qvfperm 1, [[REG3]], [[REG2]], 0
350 define <8 x double> @test_l_qv8double(<8 x double>* %p) #1 {
352 %r = load <8 x double>, <8 x double>* %p, align 8
355 ; CHECK-LABEL: @test_l_qv8double
356 ; CHECK-DAG: li [[REG1:[0-9]+]], 63
357 ; CHECK-DAG: li [[REG2:[0-9]+]], 32
358 ; CHECK-DAG: qvlfdx [[REG3:[0-9]+]], 3, [[REG1]]
359 ; CHECK-DAG: qvlfdx [[REG4:[0-9]+]], 3, [[REG2]]
360 ; CHECK-DAG: qvlpcldx [[REG5:[0-5]+]], 0, 3
361 ; CHECK-DAG: qvlfdx [[REG6:[0-9]+]], 0, 3
362 ; CHECK-DAG: qvfperm 2, {{[0-9]+}}, {{[0-9]+}}, [[REG5]]
363 ; CHECK-DAG: qvfperm 1, {{[0-9]+}}, {{[0-9]+}}, [[REG5]]
367 define void @test_s_v16i8(<16 x i8>* %p, <16 x i8> %v) #0 {
369 store <16 x i8> %v, <16 x i8>* %p, align 1
372 ; CHECK-LABEL: @test_s_v16i8
373 ; CHECK: stxvw4x 34, 0, 3
377 define void @test_s_v32i8(<32 x i8>* %p, <32 x i8> %v) #0 {
379 store <32 x i8> %v, <32 x i8>* %p, align 1
382 ; CHECK-LABEL: @test_s_v32i8
383 ; CHECK-DAG: li [[REG1:[0-9]+]], 16
384 ; CHECK-DAG: stxvw4x 35, 3, [[REG1]]
385 ; CHECK-DAG: stxvw4x 34, 0, 3
389 define void @test_s_v8i16(<8 x i16>* %p, <8 x i16> %v) #0 {
391 store <8 x i16> %v, <8 x i16>* %p, align 2
394 ; CHECK-LABEL: @test_s_v8i16
395 ; CHECK: stxvw4x 34, 0, 3
399 define void @test_s_v16i16(<16 x i16>* %p, <16 x i16> %v) #0 {
401 store <16 x i16> %v, <16 x i16>* %p, align 2
404 ; CHECK-LABEL: @test_s_v16i16
405 ; CHECK-DAG: li [[REG1:[0-9]+]], 16
406 ; CHECK-DAG: stxvw4x 35, 3, [[REG1]]
407 ; CHECK-DAG: stxvw4x 34, 0, 3
411 define void @test_s_v4i32(<4 x i32>* %p, <4 x i32> %v) #0 {
413 store <4 x i32> %v, <4 x i32>* %p, align 4
416 ; CHECK-LABEL: @test_s_v4i32
417 ; CHECK: stxvw4x 34, 0, 3
421 define void @test_s_v8i32(<8 x i32>* %p, <8 x i32> %v) #0 {
423 store <8 x i32> %v, <8 x i32>* %p, align 4
426 ; CHECK-LABEL: @test_s_v8i32
427 ; CHECK-DAG: li [[REG1:[0-9]+]], 16
428 ; CHECK-DAG: stxvw4x 35, 3, [[REG1]]
429 ; CHECK-DAG: stxvw4x 34, 0, 3
433 define void @test_s_v2i64(<2 x i64>* %p, <2 x i64> %v) #0 {
435 store <2 x i64> %v, <2 x i64>* %p, align 8
438 ; CHECK-LABEL: @test_s_v2i64
439 ; CHECK: stxvd2x 34, 0, 3
443 define void @test_s_v4i64(<4 x i64>* %p, <4 x i64> %v) #0 {
445 store <4 x i64> %v, <4 x i64>* %p, align 8
448 ; CHECK-LABEL: @test_s_v4i64
449 ; CHECK-DAG: li [[REG1:[0-9]+]], 16
450 ; CHECK-DAG: stxvd2x 35, 3, [[REG1]]
451 ; CHECK-DAG: stxvd2x 34, 0, 3
455 define void @test_s_v4float(<4 x float>* %p, <4 x float> %v) #0 {
457 store <4 x float> %v, <4 x float>* %p, align 4
460 ; CHECK-LABEL: @test_s_v4float
461 ; CHECK: stxvw4x 34, 0, 3
465 define void @test_s_v8float(<8 x float>* %p, <8 x float> %v) #0 {
467 store <8 x float> %v, <8 x float>* %p, align 4
470 ; CHECK-LABEL: @test_s_v8float
471 ; CHECK-DAG: li [[REG1:[0-9]+]], 16
472 ; CHECK-DAG: stxvw4x 35, 3, [[REG1]]
473 ; CHECK-DAG: stxvw4x 34, 0, 3
477 define void @test_s_v2double(<2 x double>* %p, <2 x double> %v) #0 {
479 store <2 x double> %v, <2 x double>* %p, align 8
482 ; CHECK-LABEL: @test_s_v2double
483 ; CHECK: stxvd2x 34, 0, 3
487 define void @test_s_v4double(<4 x double>* %p, <4 x double> %v) #0 {
489 store <4 x double> %v, <4 x double>* %p, align 8
492 ; CHECK-LABEL: @test_s_v4double
493 ; CHECK-DAG: li [[REG1:[0-9]+]], 16
494 ; CHECK-DAG: stxvd2x 35, 3, [[REG1]]
495 ; CHECK-DAG: stxvd2x 34, 0, 3
499 define void @test_s_qv4float(<4 x float>* %p, <4 x float> %v) #1 {
501 store <4 x float> %v, <4 x float>* %p, align 4
504 ; CHECK-LABEL: @test_s_qv4float
505 ; CHECK-DAG: qvesplati [[REG1:[0-9]+]], 1, 3
506 ; CHECK-DAG: qvesplati [[REG2:[0-9]+]], 1, 2
507 ; CHECK-DAG: qvesplati [[REG3:[0-9]+]], 1, 1
508 ; CHECK-DAG: stfs 1, 0(3)
509 ; CHECK-DAG: stfs [[REG1]], 12(3)
510 ; CHECK-DAG: stfs [[REG2]], 8(3)
511 ; CHECK-DAG: stfs [[REG3]], 4(3)
515 define void @test_s_qv8float(<8 x float>* %p, <8 x float> %v) #1 {
517 store <8 x float> %v, <8 x float>* %p, align 4
520 ; CHECK-LABEL: @test_s_qv8float
521 ; CHECK-DAG: qvesplati [[REG1:[0-9]+]], 2, 3
522 ; CHECK-DAG: qvesplati [[REG2:[0-9]+]], 2, 2
523 ; CHECK-DAG: qvesplati [[REG3:[0-9]+]], 2, 1
524 ; CHECK-DAG: qvesplati [[REG4:[0-9]+]], 1, 3
525 ; CHECK-DAG: qvesplati [[REG5:[0-9]+]], 1, 2
526 ; CHECK-DAG: qvesplati [[REG6:[0-9]+]], 1, 1
527 ; CHECK-DAG: stfs 2, 16(3)
528 ; CHECK-DAG: stfs 1, 0(3)
529 ; CHECK-DAG: stfs [[REG1]], 28(3)
530 ; CHECK-DAG: stfs [[REG2]], 24(3)
531 ; CHECK-DAG: stfs [[REG3]], 20(3)
532 ; CHECK-DAG: stfs [[REG4]], 12(3)
533 ; CHECK-DAG: stfs [[REG5]], 8(3)
534 ; CHECK-DAG: stfs [[REG6]], 4(3)
538 define void @test_s_qv4double(<4 x double>* %p, <4 x double> %v) #1 {
540 store <4 x double> %v, <4 x double>* %p, align 8
543 ; CHECK-LABEL: @test_s_qv4double
544 ; CHECK-DAG: qvesplati [[REG1:[0-9]+]], 1, 3
545 ; CHECK-DAG: qvesplati [[REG2:[0-9]+]], 1, 2
546 ; CHECK-DAG: qvesplati [[REG3:[0-9]+]], 1, 1
547 ; CHECK-DAG: stfd 1, 0(3)
548 ; CHECK-DAG: stfd [[REG1]], 24(3)
549 ; CHECK-DAG: stfd [[REG2]], 16(3)
550 ; CHECK-DAG: stfd [[REG3]], 8(3)
554 define void @test_s_qv8double(<8 x double>* %p, <8 x double> %v) #1 {
556 store <8 x double> %v, <8 x double>* %p, align 8
559 ; CHECK-LABEL: @test_s_qv8double
560 ; CHECK-DAG: qvesplati [[REG1:[0-9]+]], 2, 3
561 ; CHECK-DAG: qvesplati [[REG2:[0-9]+]], 2, 2
562 ; CHECK-DAG: qvesplati [[REG3:[0-9]+]], 2, 1
563 ; CHECK-DAG: qvesplati [[REG4:[0-9]+]], 1, 3
564 ; CHECK-DAG: qvesplati [[REG5:[0-9]+]], 1, 2
565 ; CHECK-DAG: qvesplati [[REG6:[0-9]+]], 1, 1
566 ; CHECK-DAG: stfd 2, 32(3)
567 ; CHECK-DAG: stfd 1, 0(3)
568 ; CHECK-DAG: stfd [[REG1]], 56(3)
569 ; CHECK-DAG: stfd [[REG2]], 48(3)
570 ; CHECK-DAG: stfd [[REG3]], 40(3)
571 ; CHECK-DAG: stfd [[REG4]], 24(3)
572 ; CHECK-DAG: stfd [[REG5]], 16(3)
573 ; CHECK-DAG: stfd [[REG6]], 8(3)
577 attributes #0 = { nounwind "target-cpu"="pwr7" }
578 attributes #1 = { nounwind "target-cpu"="a2q" }
579 attributes #2 = { nounwind "target-cpu"="pwr8" }