1 ; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu -O3 < %s | FileCheck %s
3 ; These tests verify that VSX swap optimization works for various
4 ; manipulations of <2 x double> vectors.
6 @x = global <2 x double> <double 9.970000e+01, double -1.032220e+02>, align 16
7 @z = global <2 x double> <double 2.332000e+01, double 3.111111e+01>, align 16
9 define void @bar0(double %y) {
11 %0 = load <2 x double>, <2 x double>* @x, align 16
12 %vecins = insertelement <2 x double> %0, double %y, i32 0
13 store <2 x double> %vecins, <2 x double>* @z, align 16
18 ; CHECK-DAG: lxvd2x [[REG1:[0-9]+]]
19 ; CHECK-DAG: xxspltd [[REG2:[0-9]+]]
20 ; CHECK: xxpermdi [[REG3:[0-9]+]], [[REG2]], [[REG1]], 1
21 ; CHECK: stxvd2x [[REG3]]
24 define void @bar1(double %y) {
26 %0 = load <2 x double>, <2 x double>* @x, align 16
27 %vecins = insertelement <2 x double> %0, double %y, i32 1
28 store <2 x double> %vecins, <2 x double>* @z, align 16
33 ; CHECK-DAG: lxvd2x [[REG1:[0-9]+]]
34 ; CHECK-DAG: xxspltd [[REG2:[0-9]+]]
35 ; CHECK: xxmrghd [[REG3:[0-9]+]], [[REG1]], [[REG2]]
36 ; CHECK: stxvd2x [[REG3]]
41 %0 = load <2 x double>, <2 x double>* @z, align 16
42 %1 = load <2 x double>, <2 x double>* @x, align 16
43 %vecins = shufflevector <2 x double> %0, <2 x double> %1, <2 x i32> <i32 0, i32 2>
44 store <2 x double> %vecins, <2 x double>* @z, align 16
57 %0 = load <2 x double>, <2 x double>* @z, align 16
58 %1 = load <2 x double>, <2 x double>* @x, align 16
59 %vecins = shufflevector <2 x double> %0, <2 x double> %1, <2 x i32> <i32 3, i32 1>
60 store <2 x double> %vecins, <2 x double>* @z, align 16