1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt -reassociate -slp-vectorizer -slp-vectorize-hor -slp-vectorize-hor-store -S < %s -mtriple=x86_64-apple-macosx -mcpu=corei7-avx -mattr=+avx2 | FileCheck %s
4 define signext i8 @Foo(<32 x i8>* %__v) {
7 ; CHECK-NEXT: [[TMP0:%.*]] = load <32 x i8>, <32 x i8>* [[__V:%.*]], align 32
8 ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <32 x i8> [[TMP0]], <32 x i8> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
9 ; CHECK-NEXT: [[BIN_RDX:%.*]] = add <32 x i8> [[TMP0]], [[RDX_SHUF]]
10 ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <32 x i8> [[BIN_RDX]], <32 x i8> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
11 ; CHECK-NEXT: [[BIN_RDX2:%.*]] = add <32 x i8> [[BIN_RDX]], [[RDX_SHUF1]]
12 ; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <32 x i8> [[BIN_RDX2]], <32 x i8> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
13 ; CHECK-NEXT: [[BIN_RDX4:%.*]] = add <32 x i8> [[BIN_RDX2]], [[RDX_SHUF3]]
14 ; CHECK-NEXT: [[RDX_SHUF5:%.*]] = shufflevector <32 x i8> [[BIN_RDX4]], <32 x i8> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
15 ; CHECK-NEXT: [[BIN_RDX6:%.*]] = add <32 x i8> [[BIN_RDX4]], [[RDX_SHUF5]]
16 ; CHECK-NEXT: [[RDX_SHUF7:%.*]] = shufflevector <32 x i8> [[BIN_RDX6]], <32 x i8> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
17 ; CHECK-NEXT: [[BIN_RDX8:%.*]] = add <32 x i8> [[BIN_RDX6]], [[RDX_SHUF7]]
18 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <32 x i8> [[BIN_RDX8]], i32 0
19 ; CHECK-NEXT: ret i8 [[TMP1]]
22 %0 = load <32 x i8>, <32 x i8>* %__v, align 32
23 %vecext.i.i.i = extractelement <32 x i8> %0, i64 0
24 %vecext.i.i.1.i = extractelement <32 x i8> %0, i64 1
25 %add.i.1.i = add i8 %vecext.i.i.1.i, %vecext.i.i.i
26 %vecext.i.i.2.i = extractelement <32 x i8> %0, i64 2
27 %add.i.2.i = add i8 %vecext.i.i.2.i, %add.i.1.i
28 %vecext.i.i.3.i = extractelement <32 x i8> %0, i64 3
29 %add.i.3.i = add i8 %vecext.i.i.3.i, %add.i.2.i
30 %vecext.i.i.4.i = extractelement <32 x i8> %0, i64 4
31 %add.i.4.i = add i8 %vecext.i.i.4.i, %add.i.3.i
32 %vecext.i.i.5.i = extractelement <32 x i8> %0, i64 5
33 %add.i.5.i = add i8 %vecext.i.i.5.i, %add.i.4.i
34 %vecext.i.i.6.i = extractelement <32 x i8> %0, i64 6
35 %add.i.6.i = add i8 %vecext.i.i.6.i, %add.i.5.i
36 %vecext.i.i.7.i = extractelement <32 x i8> %0, i64 7
37 %add.i.7.i = add i8 %vecext.i.i.7.i, %add.i.6.i
38 %vecext.i.i.8.i = extractelement <32 x i8> %0, i64 8
39 %add.i.8.i = add i8 %vecext.i.i.8.i, %add.i.7.i
40 %vecext.i.i.9.i = extractelement <32 x i8> %0, i64 9
41 %add.i.9.i = add i8 %vecext.i.i.9.i, %add.i.8.i
42 %vecext.i.i.10.i = extractelement <32 x i8> %0, i64 10
43 %add.i.10.i = add i8 %vecext.i.i.10.i, %add.i.9.i
44 %vecext.i.i.11.i = extractelement <32 x i8> %0, i64 11
45 %add.i.11.i = add i8 %vecext.i.i.11.i, %add.i.10.i
46 %vecext.i.i.12.i = extractelement <32 x i8> %0, i64 12
47 %add.i.12.i = add i8 %vecext.i.i.12.i, %add.i.11.i
48 %vecext.i.i.13.i = extractelement <32 x i8> %0, i64 13
49 %add.i.13.i = add i8 %vecext.i.i.13.i, %add.i.12.i
50 %vecext.i.i.14.i = extractelement <32 x i8> %0, i64 14
51 %add.i.14.i = add i8 %vecext.i.i.14.i, %add.i.13.i
52 %vecext.i.i.15.i = extractelement <32 x i8> %0, i64 15
53 %add.i.15.i = add i8 %vecext.i.i.15.i, %add.i.14.i
54 %vecext.i.i.16.i = extractelement <32 x i8> %0, i64 16
55 %add.i.16.i = add i8 %vecext.i.i.16.i, %add.i.15.i
56 %vecext.i.i.17.i = extractelement <32 x i8> %0, i64 17
57 %add.i.17.i = add i8 %vecext.i.i.17.i, %add.i.16.i
58 %vecext.i.i.18.i = extractelement <32 x i8> %0, i64 18
59 %add.i.18.i = add i8 %vecext.i.i.18.i, %add.i.17.i
60 %vecext.i.i.19.i = extractelement <32 x i8> %0, i64 19
61 %add.i.19.i = add i8 %vecext.i.i.19.i, %add.i.18.i
62 %vecext.i.i.20.i = extractelement <32 x i8> %0, i64 20
63 %add.i.20.i = add i8 %vecext.i.i.20.i, %add.i.19.i
64 %vecext.i.i.21.i = extractelement <32 x i8> %0, i64 21
65 %add.i.21.i = add i8 %vecext.i.i.21.i, %add.i.20.i
66 %vecext.i.i.22.i = extractelement <32 x i8> %0, i64 22
67 %add.i.22.i = add i8 %vecext.i.i.22.i, %add.i.21.i
68 %vecext.i.i.23.i = extractelement <32 x i8> %0, i64 23
69 %add.i.23.i = add i8 %vecext.i.i.23.i, %add.i.22.i
70 %vecext.i.i.24.i = extractelement <32 x i8> %0, i64 24
71 %add.i.24.i = add i8 %vecext.i.i.24.i, %add.i.23.i
72 %vecext.i.i.25.i = extractelement <32 x i8> %0, i64 25
73 %add.i.25.i = add i8 %vecext.i.i.25.i, %add.i.24.i
74 %vecext.i.i.26.i = extractelement <32 x i8> %0, i64 26
75 %add.i.26.i = add i8 %vecext.i.i.26.i, %add.i.25.i
76 %vecext.i.i.27.i = extractelement <32 x i8> %0, i64 27
77 %add.i.27.i = add i8 %vecext.i.i.27.i, %add.i.26.i
78 %vecext.i.i.28.i = extractelement <32 x i8> %0, i64 28
79 %add.i.28.i = add i8 %vecext.i.i.28.i, %add.i.27.i
80 %vecext.i.i.29.i = extractelement <32 x i8> %0, i64 29
81 %add.i.29.i = add i8 %vecext.i.i.29.i, %add.i.28.i
82 %vecext.i.i.30.i = extractelement <32 x i8> %0, i64 30
83 %add.i.30.i = add i8 %vecext.i.i.30.i, %add.i.29.i
84 %vecext.i.i.31.i = extractelement <32 x i8> %0, i64 31
85 %add.i.31.i = add i8 %vecext.i.i.31.i, %add.i.30.i