1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt -slp-vectorizer -S -mtriple=aarch64-apple-ios -mcpu=cyclone -o - %s | FileCheck %s
4 define void @f1(<2 x i16> %x, i16* %a) {
6 ; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i16> [[X:%.*]], <2 x i16> undef, <4 x i32> <i32 0, i32 1, i32 1, i32 0>
7 ; CHECK-NEXT: [[PTR0:%.*]] = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 0
8 ; CHECK-NEXT: [[PTR1:%.*]] = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 1
9 ; CHECK-NEXT: [[PTR2:%.*]] = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 2
10 ; CHECK-NEXT: [[PTR3:%.*]] = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 3
11 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i16> [[SHUFFLE]], i32 0
12 ; CHECK-NEXT: store i16 [[TMP1]], i16* [[A:%.*]]
13 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16* [[PTR0]] to <4 x i16>*
14 ; CHECK-NEXT: store <4 x i16> [[SHUFFLE]], <4 x i16>* [[TMP2]], align 2
15 ; CHECK-NEXT: ret void
17 %t2 = extractelement <2 x i16> %x, i32 0
18 %t3 = extractelement <2 x i16> %x, i32 1
19 %ptr0 = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 0
20 %ptr1 = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 1
21 %ptr2 = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 2
22 %ptr3 = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 3
23 store i16 %t2, i16* %a
24 store i16 %t2, i16* %ptr0
25 store i16 %t3, i16* %ptr1
26 store i16 %t3, i16* %ptr2
27 store i16 %t2, i16* %ptr3
31 define void @f2(<2 x i16> %x, i16* %a) {
34 ; CHECK-NEXT: br label [[CONT:%.*]]
36 ; CHECK-NEXT: [[XX:%.*]] = phi <2 x i16> [ [[X:%.*]], [[ENTRY:%.*]] ], [ undef, [[CONT]] ]
37 ; CHECK-NEXT: [[AA:%.*]] = phi i16* [ [[A:%.*]], [[ENTRY]] ], [ undef, [[CONT]] ]
38 ; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i16> [[XX]], <2 x i16> undef, <4 x i32> <i32 0, i32 1, i32 1, i32 0>
39 ; CHECK-NEXT: [[PTR0:%.*]] = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 0
40 ; CHECK-NEXT: [[PTR1:%.*]] = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 1
41 ; CHECK-NEXT: [[PTR2:%.*]] = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 2
42 ; CHECK-NEXT: [[PTR3:%.*]] = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 3
43 ; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x i16> [[SHUFFLE]], i32 0
44 ; CHECK-NEXT: store i16 [[TMP0]], i16* [[A]]
45 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[PTR0]] to <4 x i16>*
46 ; CHECK-NEXT: store <4 x i16> [[SHUFFLE]], <4 x i16>* [[TMP1]], align 2
47 ; CHECK-NEXT: [[A_VAL:%.*]] = load i16, i16* [[A]], align 2
48 ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i16 [[A_VAL]], 0
49 ; CHECK-NEXT: br i1 [[CMP]], label [[CONT]], label [[EXIT:%.*]]
51 ; CHECK-NEXT: ret void
56 cont: ; preds = %entry, %cont
57 %xx = phi <2 x i16> [ %x, %entry ], [ undef, %cont ]
58 %aa = phi i16* [ %a, %entry ], [ undef, %cont ]
59 %t2 = extractelement <2 x i16> %xx, i32 0
60 %t3 = extractelement <2 x i16> %xx, i32 1
61 %ptr0 = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 0
62 %ptr1 = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 1
63 %ptr2 = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 2
64 %ptr3 = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 3
65 store i16 %t2, i16* %a
66 store i16 %t2, i16* %ptr0
67 store i16 %t3, i16* %ptr1
68 store i16 %t3, i16* %ptr2
69 store i16 %t2, i16* %ptr3
70 %a_val = load i16, i16* %a, align 2
71 %cmp = icmp eq i16 %a_val, 0
72 br i1 %cmp, label %cont, label %exit
78 define void @f3(<2 x i16> %x, i16* %a) {
81 ; CHECK-NEXT: br label [[CONT:%.*]]
83 ; CHECK-NEXT: [[XX:%.*]] = phi <2 x i16> [ [[X:%.*]], [[ENTRY:%.*]] ], [ undef, [[CONT]] ]
84 ; CHECK-NEXT: [[AA:%.*]] = phi i16* [ [[A:%.*]], [[ENTRY]] ], [ undef, [[CONT]] ]
85 ; CHECK-NEXT: [[REORDER_SHUFFLE:%.*]] = shufflevector <2 x i16> [[XX]], <2 x i16> undef, <2 x i32> <i32 1, i32 0>
86 ; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i16> [[REORDER_SHUFFLE]], <2 x i16> undef, <4 x i32> <i32 0, i32 1, i32 1, i32 0>
87 ; CHECK-NEXT: [[PTR0:%.*]] = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 0
88 ; CHECK-NEXT: [[PTR1:%.*]] = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 1
89 ; CHECK-NEXT: [[PTR2:%.*]] = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 2
90 ; CHECK-NEXT: [[PTR3:%.*]] = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 3
91 ; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x i16> [[SHUFFLE]], i32 0
92 ; CHECK-NEXT: store i16 [[TMP0]], i16* [[A]]
93 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[PTR0]] to <4 x i16>*
94 ; CHECK-NEXT: store <4 x i16> [[SHUFFLE]], <4 x i16>* [[TMP1]], align 2
95 ; CHECK-NEXT: [[A_VAL:%.*]] = load i16, i16* [[A]], align 2
96 ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i16 [[A_VAL]], 0
97 ; CHECK-NEXT: br i1 [[CMP]], label [[CONT]], label [[EXIT:%.*]]
99 ; CHECK-NEXT: ret void
104 cont: ; preds = %entry, %cont
105 %xx = phi <2 x i16> [ %x, %entry ], [ undef, %cont ]
106 %aa = phi i16* [ %a, %entry ], [ undef, %cont ]
107 %t2 = extractelement <2 x i16> %xx, i32 0
108 %t3 = extractelement <2 x i16> %xx, i32 1
109 %ptr0 = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 0
110 %ptr1 = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 1
111 %ptr2 = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 2
112 %ptr3 = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 3
113 store i16 %t3, i16* %a
114 store i16 %t3, i16* %ptr0
115 store i16 %t2, i16* %ptr1
116 store i16 %t2, i16* %ptr2
117 store i16 %t3, i16* %ptr3
118 %a_val = load i16, i16* %a, align 2
119 %cmp = icmp eq i16 %a_val, 0
120 br i1 %cmp, label %cont, label %exit
122 exit: ; preds = %cont