1 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
2 ; RUN: opt < %s -cost-model -costmodel-reduxcost=true -mtriple=x86_64-apple-darwin -analyze -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,SSE,SSE2
3 ; RUN: opt < %s -cost-model -costmodel-reduxcost=true -mtriple=x86_64-apple-darwin -analyze -mattr=+ssse3 | FileCheck %s --check-prefixes=CHECK,SSE,SSSE3
4 ; RUN: opt < %s -cost-model -costmodel-reduxcost=true -mtriple=x86_64-apple-darwin -analyze -mattr=+sse4.2 | FileCheck %s --check-prefixes=CHECK,SSE,SSE42
5 ; RUN: opt < %s -cost-model -costmodel-reduxcost=true -mtriple=x86_64-apple-darwin -analyze -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX,AVX1
6 ; RUN: opt < %s -cost-model -costmodel-reduxcost=true -mtriple=x86_64-apple-darwin -analyze -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX,AVX2
8 ; Check that we recognize the tree starting at the extractelement as a
10 ; NOTE: We're only really interested in the extractelement cost, which represents the entire reduction.
12 define fastcc float @reduction_cost_float(<4 x float> %rdx) {
13 ; SSE2-LABEL: 'reduction_cost_float'
14 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
15 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx = fadd <4 x float> %rdx, %rdx.shuf
16 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf7 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
17 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx8 = fadd <4 x float> %bin.rdx, %rdx.shuf7
18 ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = extractelement <4 x float> %bin.rdx8, i32 0
19 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r
21 ; SSSE3-LABEL: 'reduction_cost_float'
22 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
23 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx = fadd <4 x float> %rdx, %rdx.shuf
24 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf7 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
25 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx8 = fadd <4 x float> %bin.rdx, %rdx.shuf7
26 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = extractelement <4 x float> %bin.rdx8, i32 0
27 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r
29 ; SSE42-LABEL: 'reduction_cost_float'
30 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
31 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx = fadd <4 x float> %rdx, %rdx.shuf
32 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf7 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
33 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = fadd <4 x float> %bin.rdx, %rdx.shuf7
34 ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = extractelement <4 x float> %bin.rdx8, i32 0
35 ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r
37 ; AVX-LABEL: 'reduction_cost_float'
38 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
39 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx = fadd <4 x float> %rdx, %rdx.shuf
40 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf7 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
41 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = fadd <4 x float> %bin.rdx, %rdx.shuf7
42 ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r = extractelement <4 x float> %bin.rdx8, i32 0
43 ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r
45 %rdx.shuf = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
46 %bin.rdx = fadd <4 x float> %rdx, %rdx.shuf
47 %rdx.shuf7 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
48 %bin.rdx8 = fadd <4 x float> %bin.rdx, %rdx.shuf7
50 %r = extractelement <4 x float> %bin.rdx8, i32 0
54 define fastcc i32 @reduction_cost_int(<8 x i32> %rdx) {
55 ; SSE-LABEL: 'reduction_cost_int'
56 ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf = shufflevector <8 x i32> %rdx, <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
57 ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx = add <8 x i32> %rdx, %rdx.shuf
58 ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.2 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
59 ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx.2 = add <8 x i32> %bin.rdx, %rdx.shuf.2
60 ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.3 = shufflevector <8 x i32> %bin.rdx.2, <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
61 ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx.3 = add <8 x i32> %bin.rdx.2, %rdx.shuf.3
62 ; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r = extractelement <8 x i32> %bin.rdx.3, i32 0
63 ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %r
65 ; AVX1-LABEL: 'reduction_cost_int'
66 ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf = shufflevector <8 x i32> %rdx, <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
67 ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx = add <8 x i32> %rdx, %rdx.shuf
68 ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.2 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
69 ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx.2 = add <8 x i32> %bin.rdx, %rdx.shuf.2
70 ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.3 = shufflevector <8 x i32> %bin.rdx.2, <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
71 ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx.3 = add <8 x i32> %bin.rdx.2, %rdx.shuf.3
72 ; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %r = extractelement <8 x i32> %bin.rdx.3, i32 0
73 ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %r
75 ; AVX2-LABEL: 'reduction_cost_int'
76 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <8 x i32> %rdx, <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
77 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx = add <8 x i32> %rdx, %rdx.shuf
78 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.2 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
79 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx.2 = add <8 x i32> %bin.rdx, %rdx.shuf.2
80 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.3 = shufflevector <8 x i32> %bin.rdx.2, <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
81 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx.3 = add <8 x i32> %bin.rdx.2, %rdx.shuf.3
82 ; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %r = extractelement <8 x i32> %bin.rdx.3, i32 0
83 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %r
85 %rdx.shuf = shufflevector <8 x i32> %rdx, <8 x i32> undef,
86 <8 x i32> <i32 4 , i32 5, i32 6, i32 7,
87 i32 undef, i32 undef, i32 undef, i32 undef>
88 %bin.rdx = add <8 x i32> %rdx, %rdx.shuf
89 %rdx.shuf.2 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef,
90 <8 x i32> <i32 2 , i32 3, i32 undef, i32 undef,
91 i32 undef, i32 undef, i32 undef, i32 undef>
92 %bin.rdx.2 = add <8 x i32> %bin.rdx, %rdx.shuf.2
93 %rdx.shuf.3 = shufflevector <8 x i32> %bin.rdx.2, <8 x i32> undef,
94 <8 x i32> <i32 1 , i32 undef, i32 undef, i32 undef,
95 i32 undef, i32 undef, i32 undef, i32 undef>
96 %bin.rdx.3 = add <8 x i32> %bin.rdx.2, %rdx.shuf.3
98 %r = extractelement <8 x i32> %bin.rdx.3, i32 0
102 define fastcc float @pairwise_hadd(<4 x float> %rdx, float %f1) {
103 ; SSE2-LABEL: 'pairwise_hadd'
104 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> <i32 0, i32 2, i32 undef, i32 undef>
105 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
106 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1
107 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
108 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
109 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx.1 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1
110 ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = extractelement <4 x float> %bin.rdx.1, i32 0
111 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r2 = fadd float %r, %f1
112 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r2
114 ; SSSE3-LABEL: 'pairwise_hadd'
115 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> <i32 0, i32 2, i32 undef, i32 undef>
116 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
117 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1
118 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
119 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
120 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx.1 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1
121 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = extractelement <4 x float> %bin.rdx.1, i32 0
122 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r2 = fadd float %r, %f1
123 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r2
125 ; SSE42-LABEL: 'pairwise_hadd'
126 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> <i32 0, i32 2, i32 undef, i32 undef>
127 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
128 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1
129 ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
130 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
131 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx.1 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1
132 ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = extractelement <4 x float> %bin.rdx.1, i32 0
133 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r2 = fadd float %r, %f1
134 ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r2
136 ; AVX-LABEL: 'pairwise_hadd'
137 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> <i32 0, i32 2, i32 undef, i32 undef>
138 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
139 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1
140 ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
141 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
142 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx.1 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1
143 ; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = extractelement <4 x float> %bin.rdx.1, i32 0
144 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r2 = fadd float %r, %f1
145 ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r2
147 %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef,
148 <4 x i32> <i32 0, i32 2 , i32 undef, i32 undef>
149 %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef,
150 <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
151 %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1
152 %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef,
153 <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
154 %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef,
155 <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
156 %bin.rdx.1 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1
158 %r = extractelement <4 x float> %bin.rdx.1, i32 0
159 %r2 = fadd float %r, %f1
163 define fastcc float @pairwise_hadd_assoc(<4 x float> %rdx, float %f1) {
164 ; SSE2-LABEL: 'pairwise_hadd_assoc'
165 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> <i32 0, i32 2, i32 undef, i32 undef>
166 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
167 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.1, %rdx.shuf.0.0
168 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
169 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
170 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx.1 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1
171 ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = extractelement <4 x float> %bin.rdx.1, i32 0
172 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r2 = fadd float %r, %f1
173 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r2
175 ; SSSE3-LABEL: 'pairwise_hadd_assoc'
176 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> <i32 0, i32 2, i32 undef, i32 undef>
177 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
178 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.1, %rdx.shuf.0.0
179 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
180 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
181 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx.1 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1
182 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = extractelement <4 x float> %bin.rdx.1, i32 0
183 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r2 = fadd float %r, %f1
184 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r2
186 ; SSE42-LABEL: 'pairwise_hadd_assoc'
187 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> <i32 0, i32 2, i32 undef, i32 undef>
188 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
189 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.1, %rdx.shuf.0.0
190 ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
191 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
192 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx.1 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1
193 ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = extractelement <4 x float> %bin.rdx.1, i32 0
194 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r2 = fadd float %r, %f1
195 ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r2
197 ; AVX-LABEL: 'pairwise_hadd_assoc'
198 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> <i32 0, i32 2, i32 undef, i32 undef>
199 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
200 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.1, %rdx.shuf.0.0
201 ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
202 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
203 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx.1 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1
204 ; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = extractelement <4 x float> %bin.rdx.1, i32 0
205 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r2 = fadd float %r, %f1
206 ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r2
208 %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef,
209 <4 x i32> <i32 0, i32 2 , i32 undef, i32 undef>
210 %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef,
211 <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
212 %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.1, %rdx.shuf.0.0
213 %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef,
214 <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
215 %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef,
216 <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
217 %bin.rdx.1 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1
219 %r = extractelement <4 x float> %bin.rdx.1, i32 0
220 %r2 = fadd float %r, %f1
224 define fastcc float @pairwise_hadd_skip_first(<4 x float> %rdx, float %f1) {
225 ; SSE2-LABEL: 'pairwise_hadd_skip_first'
226 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> <i32 0, i32 2, i32 undef, i32 undef>
227 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
228 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1
229 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
230 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx.1 = fadd <4 x float> %bin.rdx.0, %rdx.shuf.1.1
231 ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = extractelement <4 x float> %bin.rdx.1, i32 0
232 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r2 = fadd float %r, %f1
233 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r2
235 ; SSSE3-LABEL: 'pairwise_hadd_skip_first'
236 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> <i32 0, i32 2, i32 undef, i32 undef>
237 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
238 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1
239 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
240 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx.1 = fadd <4 x float> %bin.rdx.0, %rdx.shuf.1.1
241 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = extractelement <4 x float> %bin.rdx.1, i32 0
242 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r2 = fadd float %r, %f1
243 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r2
245 ; SSE42-LABEL: 'pairwise_hadd_skip_first'
246 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> <i32 0, i32 2, i32 undef, i32 undef>
247 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
248 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1
249 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
250 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx.1 = fadd <4 x float> %bin.rdx.0, %rdx.shuf.1.1
251 ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = extractelement <4 x float> %bin.rdx.1, i32 0
252 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r2 = fadd float %r, %f1
253 ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r2
255 ; AVX-LABEL: 'pairwise_hadd_skip_first'
256 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> <i32 0, i32 2, i32 undef, i32 undef>
257 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
258 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1
259 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
260 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx.1 = fadd <4 x float> %bin.rdx.0, %rdx.shuf.1.1
261 ; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = extractelement <4 x float> %bin.rdx.1, i32 0
262 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r2 = fadd float %r, %f1
263 ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r2
265 %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef,
266 <4 x i32> <i32 0, i32 2 , i32 undef, i32 undef>
267 %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef,
268 <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
269 %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1
270 %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef,
271 <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
272 %bin.rdx.1 = fadd <4 x float> %bin.rdx.0, %rdx.shuf.1.1
274 %r = extractelement <4 x float> %bin.rdx.1, i32 0
275 %r2 = fadd float %r, %f1
279 define fastcc double @no_pairwise_reduction2double(<2 x double> %rdx, double %f1) {
280 ; SSE2-LABEL: 'no_pairwise_reduction2double'
281 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <2 x double> %rdx, <2 x double> undef, <2 x i32> <i32 1, i32 undef>
282 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx = fadd <2 x double> %rdx, %rdx.shuf
283 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r = extractelement <2 x double> %bin.rdx, i32 0
284 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %r
286 ; SSSE3-LABEL: 'no_pairwise_reduction2double'
287 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <2 x double> %rdx, <2 x double> undef, <2 x i32> <i32 1, i32 undef>
288 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx = fadd <2 x double> %rdx, %rdx.shuf
289 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r = extractelement <2 x double> %bin.rdx, i32 0
290 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %r
292 ; SSE42-LABEL: 'no_pairwise_reduction2double'
293 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <2 x double> %rdx, <2 x double> undef, <2 x i32> <i32 1, i32 undef>
294 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx = fadd <2 x double> %rdx, %rdx.shuf
295 ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r = extractelement <2 x double> %bin.rdx, i32 0
296 ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %r
298 ; AVX-LABEL: 'no_pairwise_reduction2double'
299 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <2 x double> %rdx, <2 x double> undef, <2 x i32> <i32 1, i32 undef>
300 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx = fadd <2 x double> %rdx, %rdx.shuf
301 ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r = extractelement <2 x double> %bin.rdx, i32 0
302 ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %r
304 %rdx.shuf = shufflevector <2 x double> %rdx, <2 x double> undef, <2 x i32> <i32 1, i32 undef>
305 %bin.rdx = fadd <2 x double> %rdx, %rdx.shuf
307 %r = extractelement <2 x double> %bin.rdx, i32 0
311 define fastcc float @no_pairwise_reduction4float(<4 x float> %rdx, float %f1) {
312 ; SSE2-LABEL: 'no_pairwise_reduction4float'
313 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
314 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx = fadd <4 x float> %rdx, %rdx.shuf
315 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf7 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
316 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx8 = fadd <4 x float> %bin.rdx, %rdx.shuf7
317 ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = extractelement <4 x float> %bin.rdx8, i32 0
318 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r
320 ; SSSE3-LABEL: 'no_pairwise_reduction4float'
321 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
322 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx = fadd <4 x float> %rdx, %rdx.shuf
323 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf7 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
324 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx8 = fadd <4 x float> %bin.rdx, %rdx.shuf7
325 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = extractelement <4 x float> %bin.rdx8, i32 0
326 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r
328 ; SSE42-LABEL: 'no_pairwise_reduction4float'
329 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
330 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx = fadd <4 x float> %rdx, %rdx.shuf
331 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf7 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
332 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = fadd <4 x float> %bin.rdx, %rdx.shuf7
333 ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = extractelement <4 x float> %bin.rdx8, i32 0
334 ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r
336 ; AVX-LABEL: 'no_pairwise_reduction4float'
337 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
338 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx = fadd <4 x float> %rdx, %rdx.shuf
339 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf7 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
340 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = fadd <4 x float> %bin.rdx, %rdx.shuf7
341 ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r = extractelement <4 x float> %bin.rdx8, i32 0
342 ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r
344 %rdx.shuf = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
345 %bin.rdx = fadd <4 x float> %rdx, %rdx.shuf
346 %rdx.shuf7 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
347 %bin.rdx8 = fadd <4 x float> %bin.rdx, %rdx.shuf7
349 %r = extractelement <4 x float> %bin.rdx8, i32 0
353 define fastcc double @no_pairwise_reduction4double(<4 x double> %rdx, double %f1) {
354 ; SSE2-LABEL: 'no_pairwise_reduction4double'
355 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf = shufflevector <4 x double> %rdx, <4 x double> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
356 ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx = fadd <4 x double> %rdx, %rdx.shuf
357 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf7 = shufflevector <4 x double> %bin.rdx, <4 x double> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
358 ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx8 = fadd <4 x double> %bin.rdx, %rdx.shuf7
359 ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = extractelement <4 x double> %bin.rdx8, i32 0
360 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %r
362 ; SSSE3-LABEL: 'no_pairwise_reduction4double'
363 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf = shufflevector <4 x double> %rdx, <4 x double> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
364 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx = fadd <4 x double> %rdx, %rdx.shuf
365 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf7 = shufflevector <4 x double> %bin.rdx, <4 x double> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
366 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx8 = fadd <4 x double> %bin.rdx, %rdx.shuf7
367 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = extractelement <4 x double> %bin.rdx8, i32 0
368 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %r
370 ; SSE42-LABEL: 'no_pairwise_reduction4double'
371 ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf = shufflevector <4 x double> %rdx, <4 x double> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
372 ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx = fadd <4 x double> %rdx, %rdx.shuf
373 ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf7 = shufflevector <4 x double> %bin.rdx, <4 x double> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
374 ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx8 = fadd <4 x double> %bin.rdx, %rdx.shuf7
375 ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = extractelement <4 x double> %bin.rdx8, i32 0
376 ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %r
378 ; AVX1-LABEL: 'no_pairwise_reduction4double'
379 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf = shufflevector <4 x double> %rdx, <4 x double> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
380 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx = fadd <4 x double> %rdx, %rdx.shuf
381 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf7 = shufflevector <4 x double> %bin.rdx, <4 x double> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
382 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx8 = fadd <4 x double> %bin.rdx, %rdx.shuf7
383 ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r = extractelement <4 x double> %bin.rdx8, i32 0
384 ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %r
386 ; AVX2-LABEL: 'no_pairwise_reduction4double'
387 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <4 x double> %rdx, <4 x double> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
388 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx = fadd <4 x double> %rdx, %rdx.shuf
389 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf7 = shufflevector <4 x double> %bin.rdx, <4 x double> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
390 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = fadd <4 x double> %bin.rdx, %rdx.shuf7
391 ; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r = extractelement <4 x double> %bin.rdx8, i32 0
392 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %r
394 %rdx.shuf = shufflevector <4 x double> %rdx, <4 x double> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
395 %bin.rdx = fadd <4 x double> %rdx, %rdx.shuf
396 %rdx.shuf7 = shufflevector <4 x double> %bin.rdx, <4 x double> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
397 %bin.rdx8 = fadd <4 x double> %bin.rdx, %rdx.shuf7
399 %r = extractelement <4 x double> %bin.rdx8, i32 0
403 define fastcc float @no_pairwise_reduction8float(<8 x float> %rdx, float %f1) {
404 ; SSE2-LABEL: 'no_pairwise_reduction8float'
405 ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf3 = shufflevector <8 x float> %rdx, <8 x float> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
406 ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx4 = fadd <8 x float> %rdx, %rdx.shuf3
407 ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf = shufflevector <8 x float> %bin.rdx4, <8 x float> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
408 ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx = fadd <8 x float> %bin.rdx4, %rdx.shuf
409 ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf7 = shufflevector <8 x float> %bin.rdx, <8 x float> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
410 ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx8 = fadd <8 x float> %bin.rdx, %rdx.shuf7
411 ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r = extractelement <8 x float> %bin.rdx8, i32 0
412 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r
414 ; SSSE3-LABEL: 'no_pairwise_reduction8float'
415 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf3 = shufflevector <8 x float> %rdx, <8 x float> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
416 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx4 = fadd <8 x float> %rdx, %rdx.shuf3
417 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf = shufflevector <8 x float> %bin.rdx4, <8 x float> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
418 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx = fadd <8 x float> %bin.rdx4, %rdx.shuf
419 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf7 = shufflevector <8 x float> %bin.rdx, <8 x float> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
420 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx8 = fadd <8 x float> %bin.rdx, %rdx.shuf7
421 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r = extractelement <8 x float> %bin.rdx8, i32 0
422 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r
424 ; SSE42-LABEL: 'no_pairwise_reduction8float'
425 ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf3 = shufflevector <8 x float> %rdx, <8 x float> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
426 ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx4 = fadd <8 x float> %rdx, %rdx.shuf3
427 ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf = shufflevector <8 x float> %bin.rdx4, <8 x float> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
428 ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx = fadd <8 x float> %bin.rdx4, %rdx.shuf
429 ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf7 = shufflevector <8 x float> %bin.rdx, <8 x float> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
430 ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx8 = fadd <8 x float> %bin.rdx, %rdx.shuf7
431 ; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r = extractelement <8 x float> %bin.rdx8, i32 0
432 ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r
434 ; AVX1-LABEL: 'no_pairwise_reduction8float'
435 ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf3 = shufflevector <8 x float> %rdx, <8 x float> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
436 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx4 = fadd <8 x float> %rdx, %rdx.shuf3
437 ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf = shufflevector <8 x float> %bin.rdx4, <8 x float> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
438 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx = fadd <8 x float> %bin.rdx4, %rdx.shuf
439 ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf7 = shufflevector <8 x float> %bin.rdx, <8 x float> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
440 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx8 = fadd <8 x float> %bin.rdx, %rdx.shuf7
441 ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = extractelement <8 x float> %bin.rdx8, i32 0
442 ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r
444 ; AVX2-LABEL: 'no_pairwise_reduction8float'
445 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf3 = shufflevector <8 x float> %rdx, <8 x float> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
446 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx4 = fadd <8 x float> %rdx, %rdx.shuf3
447 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <8 x float> %bin.rdx4, <8 x float> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
448 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx = fadd <8 x float> %bin.rdx4, %rdx.shuf
449 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf7 = shufflevector <8 x float> %bin.rdx, <8 x float> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
450 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = fadd <8 x float> %bin.rdx, %rdx.shuf7
451 ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = extractelement <8 x float> %bin.rdx8, i32 0
452 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r
454 %rdx.shuf3 = shufflevector <8 x float> %rdx, <8 x float> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7,i32 undef, i32 undef, i32 undef, i32 undef>
455 %bin.rdx4 = fadd <8 x float> %rdx, %rdx.shuf3
456 %rdx.shuf = shufflevector <8 x float> %bin.rdx4, <8 x float> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
457 %bin.rdx = fadd <8 x float> %bin.rdx4, %rdx.shuf
458 %rdx.shuf7 = shufflevector <8 x float> %bin.rdx, <8 x float> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
459 %bin.rdx8 = fadd <8 x float> %bin.rdx, %rdx.shuf7
461 %r = extractelement <8 x float> %bin.rdx8, i32 0
465 define fastcc i64 @no_pairwise_reduction2i64(<2 x i64> %rdx, i64 %f1) {
466 ; SSE-LABEL: 'no_pairwise_reduction2i64'
467 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <2 x i64> %rdx, <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
468 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx = add <2 x i64> %rdx, %rdx.shuf
469 ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r = extractelement <2 x i64> %bin.rdx, i32 0
470 ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %r
472 ; AVX-LABEL: 'no_pairwise_reduction2i64'
473 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <2 x i64> %rdx, <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
474 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx = add <2 x i64> %rdx, %rdx.shuf
475 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r = extractelement <2 x i64> %bin.rdx, i32 0
476 ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %r
478 %rdx.shuf = shufflevector <2 x i64> %rdx, <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
479 %bin.rdx = add <2 x i64> %rdx, %rdx.shuf
481 %r = extractelement <2 x i64> %bin.rdx, i32 0
485 define fastcc i32 @no_pairwise_reduction4i32(<4 x i32> %rdx, i32 %f1) {
486 ; CHECK-LABEL: 'no_pairwise_reduction4i32'
487 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <4 x i32> %rdx, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
488 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx = add <4 x i32> %rdx, %rdx.shuf
489 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf7 = shufflevector <4 x i32> %bin.rdx, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
490 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = add <4 x i32> %bin.rdx, %rdx.shuf7
491 ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r = extractelement <4 x i32> %bin.rdx8, i32 0
492 ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %r
494 %rdx.shuf = shufflevector <4 x i32> %rdx, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
495 %bin.rdx = add <4 x i32> %rdx, %rdx.shuf
496 %rdx.shuf7 = shufflevector <4 x i32> %bin.rdx, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
497 %bin.rdx8 = add <4 x i32> %bin.rdx, %rdx.shuf7
499 %r = extractelement <4 x i32> %bin.rdx8, i32 0
503 define fastcc i64 @no_pairwise_reduction4i64(<4 x i64> %rdx, i64 %f1) {
504 ; SSE-LABEL: 'no_pairwise_reduction4i64'
505 ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf = shufflevector <4 x i64> %rdx, <4 x i64> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
506 ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx = add <4 x i64> %rdx, %rdx.shuf
507 ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf7 = shufflevector <4 x i64> %bin.rdx, <4 x i64> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
508 ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx8 = add <4 x i64> %bin.rdx, %rdx.shuf7
509 ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = extractelement <4 x i64> %bin.rdx8, i32 0
510 ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %r
512 ; AVX1-LABEL: 'no_pairwise_reduction4i64'
513 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf = shufflevector <4 x i64> %rdx, <4 x i64> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
514 ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx = add <4 x i64> %rdx, %rdx.shuf
515 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf7 = shufflevector <4 x i64> %bin.rdx, <4 x i64> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
516 ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx8 = add <4 x i64> %bin.rdx, %rdx.shuf7
517 ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r = extractelement <4 x i64> %bin.rdx8, i32 0
518 ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %r
520 ; AVX2-LABEL: 'no_pairwise_reduction4i64'
521 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <4 x i64> %rdx, <4 x i64> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
522 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx = add <4 x i64> %rdx, %rdx.shuf
523 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf7 = shufflevector <4 x i64> %bin.rdx, <4 x i64> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
524 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = add <4 x i64> %bin.rdx, %rdx.shuf7
525 ; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r = extractelement <4 x i64> %bin.rdx8, i32 0
526 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %r
528 %rdx.shuf = shufflevector <4 x i64> %rdx, <4 x i64> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
529 %bin.rdx = add <4 x i64> %rdx, %rdx.shuf
530 %rdx.shuf7 = shufflevector <4 x i64> %bin.rdx, <4 x i64> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
531 %bin.rdx8 = add <4 x i64> %bin.rdx, %rdx.shuf7
533 %r = extractelement <4 x i64> %bin.rdx8, i32 0
537 define fastcc i16 @no_pairwise_reduction8i16(<8 x i16> %rdx, i16 %f1) {
538 ; SSE2-LABEL: 'no_pairwise_reduction8i16'
539 ; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %rdx.shuf3 = shufflevector <8 x i16> %rdx, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
540 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx4 = add <8 x i16> %rdx, %rdx.shuf3
541 ; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %rdx.shuf = shufflevector <8 x i16> %bin.rdx4, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
542 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx = add <8 x i16> %bin.rdx4, %rdx.shuf
543 ; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %rdx.shuf7 = shufflevector <8 x i16> %bin.rdx, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
544 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = add <8 x i16> %bin.rdx, %rdx.shuf7
545 ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = extractelement <8 x i16> %bin.rdx8, i32 0
546 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %r
548 ; SSSE3-LABEL: 'no_pairwise_reduction8i16'
549 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf3 = shufflevector <8 x i16> %rdx, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
550 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx4 = add <8 x i16> %rdx, %rdx.shuf3
551 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <8 x i16> %bin.rdx4, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
552 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx = add <8 x i16> %bin.rdx4, %rdx.shuf
553 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf7 = shufflevector <8 x i16> %bin.rdx, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
554 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = add <8 x i16> %bin.rdx, %rdx.shuf7
555 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = extractelement <8 x i16> %bin.rdx8, i32 0
556 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %r
558 ; SSE42-LABEL: 'no_pairwise_reduction8i16'
559 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf3 = shufflevector <8 x i16> %rdx, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
560 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx4 = add <8 x i16> %rdx, %rdx.shuf3
561 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <8 x i16> %bin.rdx4, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
562 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx = add <8 x i16> %bin.rdx4, %rdx.shuf
563 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf7 = shufflevector <8 x i16> %bin.rdx, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
564 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = add <8 x i16> %bin.rdx, %rdx.shuf7
565 ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = extractelement <8 x i16> %bin.rdx8, i32 0
566 ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %r
568 ; AVX-LABEL: 'no_pairwise_reduction8i16'
569 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf3 = shufflevector <8 x i16> %rdx, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
570 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx4 = add <8 x i16> %rdx, %rdx.shuf3
571 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <8 x i16> %bin.rdx4, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
572 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx = add <8 x i16> %bin.rdx4, %rdx.shuf
573 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf7 = shufflevector <8 x i16> %bin.rdx, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
574 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = add <8 x i16> %bin.rdx, %rdx.shuf7
575 ; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = extractelement <8 x i16> %bin.rdx8, i32 0
576 ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %r
578 %rdx.shuf3 = shufflevector <8 x i16> %rdx, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7,i32 undef, i32 undef, i32 undef, i32 undef>
579 %bin.rdx4 = add <8 x i16> %rdx, %rdx.shuf3
580 %rdx.shuf = shufflevector <8 x i16> %bin.rdx4, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
581 %bin.rdx = add <8 x i16> %bin.rdx4, %rdx.shuf
582 %rdx.shuf7 = shufflevector <8 x i16> %bin.rdx, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
583 %bin.rdx8 = add <8 x i16> %bin.rdx, %rdx.shuf7
585 %r = extractelement <8 x i16> %bin.rdx8, i32 0
589 define fastcc i32 @no_pairwise_reduction8i32(<8 x i32> %rdx, i32 %f1) {
590 ; SSE-LABEL: 'no_pairwise_reduction8i32'
591 ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf3 = shufflevector <8 x i32> %rdx, <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
592 ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx4 = add <8 x i32> %rdx, %rdx.shuf3
593 ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf = shufflevector <8 x i32> %bin.rdx4, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
594 ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx = add <8 x i32> %bin.rdx4, %rdx.shuf
595 ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf7 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
596 ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx8 = add <8 x i32> %bin.rdx, %rdx.shuf7
597 ; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r = extractelement <8 x i32> %bin.rdx8, i32 0
598 ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %r
600 ; AVX1-LABEL: 'no_pairwise_reduction8i32'
601 ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf3 = shufflevector <8 x i32> %rdx, <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
602 ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx4 = add <8 x i32> %rdx, %rdx.shuf3
603 ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf = shufflevector <8 x i32> %bin.rdx4, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
604 ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx = add <8 x i32> %bin.rdx4, %rdx.shuf
605 ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf7 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
606 ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx8 = add <8 x i32> %bin.rdx, %rdx.shuf7
607 ; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %r = extractelement <8 x i32> %bin.rdx8, i32 0
608 ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %r
610 ; AVX2-LABEL: 'no_pairwise_reduction8i32'
611 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf3 = shufflevector <8 x i32> %rdx, <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
612 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx4 = add <8 x i32> %rdx, %rdx.shuf3
613 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <8 x i32> %bin.rdx4, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
614 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx = add <8 x i32> %bin.rdx4, %rdx.shuf
615 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf7 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
616 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = add <8 x i32> %bin.rdx, %rdx.shuf7
617 ; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %r = extractelement <8 x i32> %bin.rdx8, i32 0
618 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %r
620 %rdx.shuf3 = shufflevector <8 x i32> %rdx, <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7,i32 undef, i32 undef, i32 undef, i32 undef>
621 %bin.rdx4 = add <8 x i32> %rdx, %rdx.shuf3
622 %rdx.shuf = shufflevector <8 x i32> %bin.rdx4, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
623 %bin.rdx = add <8 x i32> %bin.rdx4, %rdx.shuf
624 %rdx.shuf7 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
625 %bin.rdx8 = add <8 x i32> %bin.rdx, %rdx.shuf7
627 %r = extractelement <8 x i32> %bin.rdx8, i32 0
631 define fastcc double @pairwise_reduction2double(<2 x double> %rdx, double %f1) {
632 ; SSE2-LABEL: 'pairwise_reduction2double'
633 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <2 x double> %rdx, <2 x double> undef, <2 x i32> <i32 0, i32 undef>
634 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <2 x double> %rdx, <2 x double> undef, <2 x i32> <i32 1, i32 undef>
635 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx8 = fadd <2 x double> %rdx.shuf.1.0, %rdx.shuf.1.1
636 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r = extractelement <2 x double> %bin.rdx8, i32 0
637 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %r
639 ; SSSE3-LABEL: 'pairwise_reduction2double'
640 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <2 x double> %rdx, <2 x double> undef, <2 x i32> <i32 0, i32 undef>
641 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <2 x double> %rdx, <2 x double> undef, <2 x i32> <i32 1, i32 undef>
642 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx8 = fadd <2 x double> %rdx.shuf.1.0, %rdx.shuf.1.1
643 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r = extractelement <2 x double> %bin.rdx8, i32 0
644 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %r
646 ; SSE42-LABEL: 'pairwise_reduction2double'
647 ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <2 x double> %rdx, <2 x double> undef, <2 x i32> <i32 0, i32 undef>
648 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <2 x double> %rdx, <2 x double> undef, <2 x i32> <i32 1, i32 undef>
649 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = fadd <2 x double> %rdx.shuf.1.0, %rdx.shuf.1.1
650 ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r = extractelement <2 x double> %bin.rdx8, i32 0
651 ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %r
653 ; AVX-LABEL: 'pairwise_reduction2double'
654 ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <2 x double> %rdx, <2 x double> undef, <2 x i32> <i32 0, i32 undef>
655 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <2 x double> %rdx, <2 x double> undef, <2 x i32> <i32 1, i32 undef>
656 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = fadd <2 x double> %rdx.shuf.1.0, %rdx.shuf.1.1
657 ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r = extractelement <2 x double> %bin.rdx8, i32 0
658 ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %r
660 %rdx.shuf.1.0 = shufflevector <2 x double> %rdx, <2 x double> undef, <2 x i32> <i32 0, i32 undef>
661 %rdx.shuf.1.1 = shufflevector <2 x double> %rdx, <2 x double> undef, <2 x i32> <i32 1, i32 undef>
662 %bin.rdx8 = fadd <2 x double> %rdx.shuf.1.0, %rdx.shuf.1.1
664 %r = extractelement <2 x double> %bin.rdx8, i32 0
668 define fastcc float @pairwise_reduction4float(<4 x float> %rdx, float %f1) {
669 ; SSE2-LABEL: 'pairwise_reduction4float'
670 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> <i32 0, i32 2, i32 undef, i32 undef>
671 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
672 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1
673 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
674 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
675 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx8 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1
676 ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = extractelement <4 x float> %bin.rdx8, i32 0
677 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r
679 ; SSSE3-LABEL: 'pairwise_reduction4float'
680 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> <i32 0, i32 2, i32 undef, i32 undef>
681 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
682 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1
683 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
684 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
685 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx8 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1
686 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = extractelement <4 x float> %bin.rdx8, i32 0
687 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r
689 ; SSE42-LABEL: 'pairwise_reduction4float'
690 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> <i32 0, i32 2, i32 undef, i32 undef>
691 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
692 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1
693 ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
694 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
695 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1
696 ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = extractelement <4 x float> %bin.rdx8, i32 0
697 ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r
699 ; AVX-LABEL: 'pairwise_reduction4float'
700 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> <i32 0, i32 2, i32 undef, i32 undef>
701 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
702 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1
703 ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
704 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
705 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1
706 ; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = extractelement <4 x float> %bin.rdx8, i32 0
707 ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r
709 %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> <i32 0, i32 2, i32 undef, i32 undef>
710 %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
711 %bin.rdx = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1
712 %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
713 %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
714 %bin.rdx8 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1
716 %r = extractelement <4 x float> %bin.rdx8, i32 0
720 define fastcc double @pairwise_reduction4double(<4 x double> %rdx, double %f1) {
721 ; SSE2-LABEL: 'pairwise_reduction4double'
722 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.0.0 = shufflevector <4 x double> %rdx, <4 x double> undef, <4 x i32> <i32 0, i32 2, i32 undef, i32 undef>
723 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.0.1 = shufflevector <4 x double> %rdx, <4 x double> undef, <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
724 ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx = fadd <4 x double> %rdx.shuf.0.0, %rdx.shuf.0.1
725 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x double> %bin.rdx, <4 x double> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
726 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.1.1 = shufflevector <4 x double> %bin.rdx, <4 x double> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
727 ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx8 = fadd <4 x double> %rdx.shuf.1.0, %rdx.shuf.1.1
728 ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = extractelement <4 x double> %bin.rdx8, i32 0
729 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %r
731 ; SSSE3-LABEL: 'pairwise_reduction4double'
732 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.0.0 = shufflevector <4 x double> %rdx, <4 x double> undef, <4 x i32> <i32 0, i32 2, i32 undef, i32 undef>
733 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.0.1 = shufflevector <4 x double> %rdx, <4 x double> undef, <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
734 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx = fadd <4 x double> %rdx.shuf.0.0, %rdx.shuf.0.1
735 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x double> %bin.rdx, <4 x double> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
736 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.1.1 = shufflevector <4 x double> %bin.rdx, <4 x double> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
737 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx8 = fadd <4 x double> %rdx.shuf.1.0, %rdx.shuf.1.1
738 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = extractelement <4 x double> %bin.rdx8, i32 0
739 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %r
741 ; SSE42-LABEL: 'pairwise_reduction4double'
742 ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.0.0 = shufflevector <4 x double> %rdx, <4 x double> undef, <4 x i32> <i32 0, i32 2, i32 undef, i32 undef>
743 ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.0.1 = shufflevector <4 x double> %rdx, <4 x double> undef, <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
744 ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx = fadd <4 x double> %rdx.shuf.0.0, %rdx.shuf.0.1
745 ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x double> %bin.rdx, <4 x double> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
746 ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.1.1 = shufflevector <4 x double> %bin.rdx, <4 x double> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
747 ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx8 = fadd <4 x double> %rdx.shuf.1.0, %rdx.shuf.1.1
748 ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = extractelement <4 x double> %bin.rdx8, i32 0
749 ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %r
751 ; AVX1-LABEL: 'pairwise_reduction4double'
752 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.0.0 = shufflevector <4 x double> %rdx, <4 x double> undef, <4 x i32> <i32 0, i32 2, i32 undef, i32 undef>
753 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.0.1 = shufflevector <4 x double> %rdx, <4 x double> undef, <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
754 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx = fadd <4 x double> %rdx.shuf.0.0, %rdx.shuf.0.1
755 ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x double> %bin.rdx, <4 x double> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
756 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.1.1 = shufflevector <4 x double> %bin.rdx, <4 x double> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
757 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx8 = fadd <4 x double> %rdx.shuf.1.0, %rdx.shuf.1.1
758 ; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %r = extractelement <4 x double> %bin.rdx8, i32 0
759 ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %r
761 ; AVX2-LABEL: 'pairwise_reduction4double'
762 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.0 = shufflevector <4 x double> %rdx, <4 x double> undef, <4 x i32> <i32 0, i32 2, i32 undef, i32 undef>
763 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.1 = shufflevector <4 x double> %rdx, <4 x double> undef, <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
764 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx = fadd <4 x double> %rdx.shuf.0.0, %rdx.shuf.0.1
765 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x double> %bin.rdx, <4 x double> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
766 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x double> %bin.rdx, <4 x double> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
767 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = fadd <4 x double> %rdx.shuf.1.0, %rdx.shuf.1.1
768 ; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %r = extractelement <4 x double> %bin.rdx8, i32 0
769 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %r
771 %rdx.shuf.0.0 = shufflevector <4 x double> %rdx, <4 x double> undef, <4 x i32> <i32 0, i32 2, i32 undef, i32 undef>
772 %rdx.shuf.0.1 = shufflevector <4 x double> %rdx, <4 x double> undef, <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
773 %bin.rdx = fadd <4 x double> %rdx.shuf.0.0, %rdx.shuf.0.1
774 %rdx.shuf.1.0 = shufflevector <4 x double> %bin.rdx, <4 x double> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
775 %rdx.shuf.1.1 = shufflevector <4 x double> %bin.rdx, <4 x double> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
776 %bin.rdx8 = fadd <4 x double> %rdx.shuf.1.0, %rdx.shuf.1.1
778 %r = extractelement <4 x double> %bin.rdx8, i32 0
782 define fastcc float @pairwise_reduction8float(<8 x float> %rdx, float %f1) {
783 ; SSE2-LABEL: 'pairwise_reduction8float'
784 ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.0.0 = shufflevector <8 x float> %rdx, <8 x float> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 undef, i32 undef, i32 undef, i32 undef>
785 ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.0.1 = shufflevector <8 x float> %rdx, <8 x float> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
786 ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx = fadd <8 x float> %rdx.shuf.0.0, %rdx.shuf.0.1
787 ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.1.0 = shufflevector <8 x float> %bin.rdx, <8 x float> undef, <8 x i32> <i32 0, i32 2, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
788 ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.1.1 = shufflevector <8 x float> %bin.rdx, <8 x float> undef, <8 x i32> <i32 1, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
789 ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx8 = fadd <8 x float> %rdx.shuf.1.0, %rdx.shuf.1.1
790 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.2.0 = shufflevector <8 x float> %bin.rdx8, <8 x float> undef, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
791 ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.2.1 = shufflevector <8 x float> %bin.rdx8, <8 x float> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
792 ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx9 = fadd <8 x float> %rdx.shuf.2.0, %rdx.shuf.2.1
793 ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r = extractelement <8 x float> %bin.rdx9, i32 0
794 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r
796 ; SSSE3-LABEL: 'pairwise_reduction8float'
797 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.0.0 = shufflevector <8 x float> %rdx, <8 x float> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 undef, i32 undef, i32 undef, i32 undef>
798 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.0.1 = shufflevector <8 x float> %rdx, <8 x float> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
799 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx = fadd <8 x float> %rdx.shuf.0.0, %rdx.shuf.0.1
800 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.1.0 = shufflevector <8 x float> %bin.rdx, <8 x float> undef, <8 x i32> <i32 0, i32 2, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
801 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.1.1 = shufflevector <8 x float> %bin.rdx, <8 x float> undef, <8 x i32> <i32 1, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
802 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx8 = fadd <8 x float> %rdx.shuf.1.0, %rdx.shuf.1.1
803 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.2.0 = shufflevector <8 x float> %bin.rdx8, <8 x float> undef, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
804 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.2.1 = shufflevector <8 x float> %bin.rdx8, <8 x float> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
805 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx9 = fadd <8 x float> %rdx.shuf.2.0, %rdx.shuf.2.1
806 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r = extractelement <8 x float> %bin.rdx9, i32 0
807 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r
809 ; SSE42-LABEL: 'pairwise_reduction8float'
810 ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.0.0 = shufflevector <8 x float> %rdx, <8 x float> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 undef, i32 undef, i32 undef, i32 undef>
811 ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.0.1 = shufflevector <8 x float> %rdx, <8 x float> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
812 ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx = fadd <8 x float> %rdx.shuf.0.0, %rdx.shuf.0.1
813 ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.1.0 = shufflevector <8 x float> %bin.rdx, <8 x float> undef, <8 x i32> <i32 0, i32 2, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
814 ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.1.1 = shufflevector <8 x float> %bin.rdx, <8 x float> undef, <8 x i32> <i32 1, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
815 ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx8 = fadd <8 x float> %rdx.shuf.1.0, %rdx.shuf.1.1
816 ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.2.0 = shufflevector <8 x float> %bin.rdx8, <8 x float> undef, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
817 ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.2.1 = shufflevector <8 x float> %bin.rdx8, <8 x float> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
818 ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx9 = fadd <8 x float> %rdx.shuf.2.0, %rdx.shuf.2.1
819 ; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r = extractelement <8 x float> %bin.rdx9, i32 0
820 ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r
822 ; AVX1-LABEL: 'pairwise_reduction8float'
823 ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.0.0 = shufflevector <8 x float> %rdx, <8 x float> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 undef, i32 undef, i32 undef, i32 undef>
824 ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.0.1 = shufflevector <8 x float> %rdx, <8 x float> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
825 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx = fadd <8 x float> %rdx.shuf.0.0, %rdx.shuf.0.1
826 ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.1.0 = shufflevector <8 x float> %bin.rdx, <8 x float> undef, <8 x i32> <i32 0, i32 2, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
827 ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.1.1 = shufflevector <8 x float> %bin.rdx, <8 x float> undef, <8 x i32> <i32 1, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
828 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx8 = fadd <8 x float> %rdx.shuf.1.0, %rdx.shuf.1.1
829 ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.2.0 = shufflevector <8 x float> %bin.rdx8, <8 x float> undef, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
830 ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.2.1 = shufflevector <8 x float> %bin.rdx8, <8 x float> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
831 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx9 = fadd <8 x float> %rdx.shuf.2.0, %rdx.shuf.2.1
832 ; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r = extractelement <8 x float> %bin.rdx9, i32 0
833 ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r
835 ; AVX2-LABEL: 'pairwise_reduction8float'
836 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.0 = shufflevector <8 x float> %rdx, <8 x float> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 undef, i32 undef, i32 undef, i32 undef>
837 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.1 = shufflevector <8 x float> %rdx, <8 x float> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
838 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx = fadd <8 x float> %rdx.shuf.0.0, %rdx.shuf.0.1
839 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.0 = shufflevector <8 x float> %bin.rdx, <8 x float> undef, <8 x i32> <i32 0, i32 2, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
840 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <8 x float> %bin.rdx, <8 x float> undef, <8 x i32> <i32 1, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
841 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = fadd <8 x float> %rdx.shuf.1.0, %rdx.shuf.1.1
842 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.2.0 = shufflevector <8 x float> %bin.rdx8, <8 x float> undef, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
843 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.2.1 = shufflevector <8 x float> %bin.rdx8, <8 x float> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
844 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx9 = fadd <8 x float> %rdx.shuf.2.0, %rdx.shuf.2.1
845 ; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r = extractelement <8 x float> %bin.rdx9, i32 0
846 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r
848 %rdx.shuf.0.0 = shufflevector <8 x float> %rdx, <8 x float> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6,i32 undef, i32 undef, i32 undef, i32 undef>
849 %rdx.shuf.0.1 = shufflevector <8 x float> %rdx, <8 x float> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7,i32 undef, i32 undef, i32 undef, i32 undef>
850 %bin.rdx = fadd <8 x float> %rdx.shuf.0.0, %rdx.shuf.0.1
851 %rdx.shuf.1.0 = shufflevector <8 x float> %bin.rdx, <8 x float> undef,<8 x i32> <i32 0, i32 2, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
852 %rdx.shuf.1.1 = shufflevector <8 x float> %bin.rdx, <8 x float> undef,<8 x i32> <i32 1, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
853 %bin.rdx8 = fadd <8 x float> %rdx.shuf.1.0, %rdx.shuf.1.1
854 %rdx.shuf.2.0 = shufflevector <8 x float> %bin.rdx8, <8 x float> undef,<8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
855 %rdx.shuf.2.1 = shufflevector <8 x float> %bin.rdx8, <8 x float> undef,<8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
856 %bin.rdx9 = fadd <8 x float> %rdx.shuf.2.0, %rdx.shuf.2.1
858 %r = extractelement <8 x float> %bin.rdx9, i32 0
862 define fastcc i64 @pairwise_reduction2i64(<2 x i64> %rdx, i64 %f1) {
863 ; SSE-LABEL: 'pairwise_reduction2i64'
864 ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <2 x i64> %rdx, <2 x i64> undef, <2 x i32> <i32 0, i32 undef>
865 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <2 x i64> %rdx, <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
866 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = add <2 x i64> %rdx.shuf.1.0, %rdx.shuf.1.1
867 ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r = extractelement <2 x i64> %bin.rdx8, i32 0
868 ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %r
870 ; AVX-LABEL: 'pairwise_reduction2i64'
871 ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <2 x i64> %rdx, <2 x i64> undef, <2 x i32> <i32 0, i32 undef>
872 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <2 x i64> %rdx, <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
873 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = add <2 x i64> %rdx.shuf.1.0, %rdx.shuf.1.1
874 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r = extractelement <2 x i64> %bin.rdx8, i32 0
875 ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %r
877 %rdx.shuf.1.0 = shufflevector <2 x i64> %rdx, <2 x i64> undef, <2 x i32> <i32 0, i32 undef>
878 %rdx.shuf.1.1 = shufflevector <2 x i64> %rdx, <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
879 %bin.rdx8 = add <2 x i64> %rdx.shuf.1.0, %rdx.shuf.1.1
881 %r = extractelement <2 x i64> %bin.rdx8, i32 0
885 define fastcc i32 @pairwise_reduction4i32(<4 x i32> %rdx, i32 %f1) {
886 ; CHECK-LABEL: 'pairwise_reduction4i32'
887 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.0 = shufflevector <4 x i32> %rdx, <4 x i32> undef, <4 x i32> <i32 0, i32 2, i32 undef, i32 undef>
888 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.1 = shufflevector <4 x i32> %rdx, <4 x i32> undef, <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
889 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx = add <4 x i32> %rdx.shuf.0.0, %rdx.shuf.0.1
890 ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x i32> %bin.rdx, <4 x i32> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
891 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x i32> %bin.rdx, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
892 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = add <4 x i32> %rdx.shuf.1.0, %rdx.shuf.1.1
893 ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r = extractelement <4 x i32> %bin.rdx8, i32 0
894 ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %r
896 %rdx.shuf.0.0 = shufflevector <4 x i32> %rdx, <4 x i32> undef, <4 x i32> <i32 0, i32 2, i32 undef, i32 undef>
897 %rdx.shuf.0.1 = shufflevector <4 x i32> %rdx, <4 x i32> undef, <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
898 %bin.rdx = add <4 x i32> %rdx.shuf.0.0, %rdx.shuf.0.1
899 %rdx.shuf.1.0 = shufflevector <4 x i32> %bin.rdx, <4 x i32> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
900 %rdx.shuf.1.1 = shufflevector <4 x i32> %bin.rdx, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
901 %bin.rdx8 = add <4 x i32> %rdx.shuf.1.0, %rdx.shuf.1.1
903 %r = extractelement <4 x i32> %bin.rdx8, i32 0
907 define fastcc i64 @pairwise_reduction4i64(<4 x i64> %rdx, i64 %f1) {
908 ; SSE-LABEL: 'pairwise_reduction4i64'
909 ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.0.0 = shufflevector <4 x i64> %rdx, <4 x i64> undef, <4 x i32> <i32 0, i32 2, i32 undef, i32 undef>
910 ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.0.1 = shufflevector <4 x i64> %rdx, <4 x i64> undef, <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
911 ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx = add <4 x i64> %rdx.shuf.0.0, %rdx.shuf.0.1
912 ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x i64> %bin.rdx, <4 x i64> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
913 ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.1.1 = shufflevector <4 x i64> %bin.rdx, <4 x i64> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
914 ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx8 = add <4 x i64> %rdx.shuf.1.0, %rdx.shuf.1.1
915 ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = extractelement <4 x i64> %bin.rdx8, i32 0
916 ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %r
918 ; AVX1-LABEL: 'pairwise_reduction4i64'
919 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.0.0 = shufflevector <4 x i64> %rdx, <4 x i64> undef, <4 x i32> <i32 0, i32 2, i32 undef, i32 undef>
920 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.0.1 = shufflevector <4 x i64> %rdx, <4 x i64> undef, <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
921 ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx = add <4 x i64> %rdx.shuf.0.0, %rdx.shuf.0.1
922 ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x i64> %bin.rdx, <4 x i64> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
923 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.1.1 = shufflevector <4 x i64> %bin.rdx, <4 x i64> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
924 ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx8 = add <4 x i64> %rdx.shuf.1.0, %rdx.shuf.1.1
925 ; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %r = extractelement <4 x i64> %bin.rdx8, i32 0
926 ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %r
928 ; AVX2-LABEL: 'pairwise_reduction4i64'
929 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.0 = shufflevector <4 x i64> %rdx, <4 x i64> undef, <4 x i32> <i32 0, i32 2, i32 undef, i32 undef>
930 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.1 = shufflevector <4 x i64> %rdx, <4 x i64> undef, <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
931 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx = add <4 x i64> %rdx.shuf.0.0, %rdx.shuf.0.1
932 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x i64> %bin.rdx, <4 x i64> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
933 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x i64> %bin.rdx, <4 x i64> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
934 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = add <4 x i64> %rdx.shuf.1.0, %rdx.shuf.1.1
935 ; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %r = extractelement <4 x i64> %bin.rdx8, i32 0
936 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %r
938 %rdx.shuf.0.0 = shufflevector <4 x i64> %rdx, <4 x i64> undef, <4 x i32> <i32 0, i32 2, i32 undef, i32 undef>
939 %rdx.shuf.0.1 = shufflevector <4 x i64> %rdx, <4 x i64> undef, <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
940 %bin.rdx = add <4 x i64> %rdx.shuf.0.0, %rdx.shuf.0.1
941 %rdx.shuf.1.0 = shufflevector <4 x i64> %bin.rdx, <4 x i64> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
942 %rdx.shuf.1.1 = shufflevector <4 x i64> %bin.rdx, <4 x i64> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
943 %bin.rdx8 = add <4 x i64> %rdx.shuf.1.0, %rdx.shuf.1.1
945 %r = extractelement <4 x i64> %bin.rdx8, i32 0
949 define fastcc i16 @pairwise_reduction8i16(<8 x i16> %rdx, i16 %f1) {
950 ; SSE2-LABEL: 'pairwise_reduction8i16'
951 ; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %rdx.shuf.0.0 = shufflevector <8 x i16> %rdx, <8 x i16> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 undef, i32 undef, i32 undef, i32 undef>
952 ; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %rdx.shuf.0.1 = shufflevector <8 x i16> %rdx, <8 x i16> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
953 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx = add <8 x i16> %rdx.shuf.0.0, %rdx.shuf.0.1
954 ; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %rdx.shuf.1.0 = shufflevector <8 x i16> %bin.rdx, <8 x i16> undef, <8 x i32> <i32 0, i32 2, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
955 ; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %rdx.shuf.1.1 = shufflevector <8 x i16> %bin.rdx, <8 x i16> undef, <8 x i32> <i32 1, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
956 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = add <8 x i16> %rdx.shuf.1.0, %rdx.shuf.1.1
957 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.2.0 = shufflevector <8 x i16> %bin.rdx8, <8 x i16> undef, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
958 ; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %rdx.shuf.2.1 = shufflevector <8 x i16> %bin.rdx8, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
959 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx9 = add <8 x i16> %rdx.shuf.2.0, %rdx.shuf.2.1
960 ; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %r = extractelement <8 x i16> %bin.rdx9, i32 0
961 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %r
963 ; SSSE3-LABEL: 'pairwise_reduction8i16'
964 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.0 = shufflevector <8 x i16> %rdx, <8 x i16> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 undef, i32 undef, i32 undef, i32 undef>
965 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.1 = shufflevector <8 x i16> %rdx, <8 x i16> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
966 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx = add <8 x i16> %rdx.shuf.0.0, %rdx.shuf.0.1
967 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.0 = shufflevector <8 x i16> %bin.rdx, <8 x i16> undef, <8 x i32> <i32 0, i32 2, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
968 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <8 x i16> %bin.rdx, <8 x i16> undef, <8 x i32> <i32 1, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
969 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = add <8 x i16> %rdx.shuf.1.0, %rdx.shuf.1.1
970 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.2.0 = shufflevector <8 x i16> %bin.rdx8, <8 x i16> undef, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
971 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.2.1 = shufflevector <8 x i16> %bin.rdx8, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
972 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx9 = add <8 x i16> %rdx.shuf.2.0, %rdx.shuf.2.1
973 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %r = extractelement <8 x i16> %bin.rdx9, i32 0
974 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %r
976 ; SSE42-LABEL: 'pairwise_reduction8i16'
977 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.0 = shufflevector <8 x i16> %rdx, <8 x i16> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 undef, i32 undef, i32 undef, i32 undef>
978 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.1 = shufflevector <8 x i16> %rdx, <8 x i16> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
979 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx = add <8 x i16> %rdx.shuf.0.0, %rdx.shuf.0.1
980 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.0 = shufflevector <8 x i16> %bin.rdx, <8 x i16> undef, <8 x i32> <i32 0, i32 2, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
981 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <8 x i16> %bin.rdx, <8 x i16> undef, <8 x i32> <i32 1, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
982 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = add <8 x i16> %rdx.shuf.1.0, %rdx.shuf.1.1
983 ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.2.0 = shufflevector <8 x i16> %bin.rdx8, <8 x i16> undef, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
984 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.2.1 = shufflevector <8 x i16> %bin.rdx8, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
985 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx9 = add <8 x i16> %rdx.shuf.2.0, %rdx.shuf.2.1
986 ; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %r = extractelement <8 x i16> %bin.rdx9, i32 0
987 ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %r
989 ; AVX-LABEL: 'pairwise_reduction8i16'
990 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.0 = shufflevector <8 x i16> %rdx, <8 x i16> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 undef, i32 undef, i32 undef, i32 undef>
991 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.1 = shufflevector <8 x i16> %rdx, <8 x i16> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
992 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx = add <8 x i16> %rdx.shuf.0.0, %rdx.shuf.0.1
993 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.0 = shufflevector <8 x i16> %bin.rdx, <8 x i16> undef, <8 x i32> <i32 0, i32 2, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
994 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <8 x i16> %bin.rdx, <8 x i16> undef, <8 x i32> <i32 1, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
995 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = add <8 x i16> %rdx.shuf.1.0, %rdx.shuf.1.1
996 ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.2.0 = shufflevector <8 x i16> %bin.rdx8, <8 x i16> undef, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
997 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.2.1 = shufflevector <8 x i16> %bin.rdx8, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
998 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx9 = add <8 x i16> %rdx.shuf.2.0, %rdx.shuf.2.1
999 ; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %r = extractelement <8 x i16> %bin.rdx9, i32 0
1000 ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %r
1002 %rdx.shuf.0.0 = shufflevector <8 x i16> %rdx, <8 x i16> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6,i32 undef, i32 undef, i32 undef, i32 undef>
1003 %rdx.shuf.0.1 = shufflevector <8 x i16> %rdx, <8 x i16> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7,i32 undef, i32 undef, i32 undef, i32 undef>
1004 %bin.rdx = add <8 x i16> %rdx.shuf.0.0, %rdx.shuf.0.1
1005 %rdx.shuf.1.0 = shufflevector <8 x i16> %bin.rdx, <8 x i16> undef,<8 x i32> <i32 0, i32 2, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1006 %rdx.shuf.1.1 = shufflevector <8 x i16> %bin.rdx, <8 x i16> undef,<8 x i32> <i32 1, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1007 %bin.rdx8 = add <8 x i16> %rdx.shuf.1.0, %rdx.shuf.1.1
1008 %rdx.shuf.2.0 = shufflevector <8 x i16> %bin.rdx8, <8 x i16> undef,<8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1009 %rdx.shuf.2.1 = shufflevector <8 x i16> %bin.rdx8, <8 x i16> undef,<8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1010 %bin.rdx9 = add <8 x i16> %rdx.shuf.2.0, %rdx.shuf.2.1
1012 %r = extractelement <8 x i16> %bin.rdx9, i32 0
1016 define fastcc i32 @pairwise_reduction8i32(<8 x i32> %rdx, i32 %f1) {
1017 ; SSE-LABEL: 'pairwise_reduction8i32'
1018 ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.0.0 = shufflevector <8 x i32> %rdx, <8 x i32> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 undef, i32 undef, i32 undef, i32 undef>
1019 ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.0.1 = shufflevector <8 x i32> %rdx, <8 x i32> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
1020 ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx = add <8 x i32> %rdx.shuf.0.0, %rdx.shuf.0.1
1021 ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.1.0 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef, <8 x i32> <i32 0, i32 2, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1022 ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.1.1 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef, <8 x i32> <i32 1, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1023 ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx8 = add <8 x i32> %rdx.shuf.1.0, %rdx.shuf.1.1
1024 ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.2.0 = shufflevector <8 x i32> %bin.rdx8, <8 x i32> undef, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1025 ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.2.1 = shufflevector <8 x i32> %bin.rdx8, <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1026 ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx9 = add <8 x i32> %rdx.shuf.2.0, %rdx.shuf.2.1
1027 ; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r = extractelement <8 x i32> %bin.rdx9, i32 0
1028 ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %r
1030 ; AVX1-LABEL: 'pairwise_reduction8i32'
1031 ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.0.0 = shufflevector <8 x i32> %rdx, <8 x i32> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 undef, i32 undef, i32 undef, i32 undef>
1032 ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.0.1 = shufflevector <8 x i32> %rdx, <8 x i32> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
1033 ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx = add <8 x i32> %rdx.shuf.0.0, %rdx.shuf.0.1
1034 ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.1.0 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef, <8 x i32> <i32 0, i32 2, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1035 ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.1.1 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef, <8 x i32> <i32 1, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1036 ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx8 = add <8 x i32> %rdx.shuf.1.0, %rdx.shuf.1.1
1037 ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.2.0 = shufflevector <8 x i32> %bin.rdx8, <8 x i32> undef, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1038 ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.2.1 = shufflevector <8 x i32> %bin.rdx8, <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1039 ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx9 = add <8 x i32> %rdx.shuf.2.0, %rdx.shuf.2.1
1040 ; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %r = extractelement <8 x i32> %bin.rdx9, i32 0
1041 ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %r
1043 ; AVX2-LABEL: 'pairwise_reduction8i32'
1044 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.0 = shufflevector <8 x i32> %rdx, <8 x i32> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 undef, i32 undef, i32 undef, i32 undef>
1045 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.1 = shufflevector <8 x i32> %rdx, <8 x i32> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
1046 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx = add <8 x i32> %rdx.shuf.0.0, %rdx.shuf.0.1
1047 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.0 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef, <8 x i32> <i32 0, i32 2, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1048 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef, <8 x i32> <i32 1, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1049 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = add <8 x i32> %rdx.shuf.1.0, %rdx.shuf.1.1
1050 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.2.0 = shufflevector <8 x i32> %bin.rdx8, <8 x i32> undef, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1051 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.2.1 = shufflevector <8 x i32> %bin.rdx8, <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1052 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx9 = add <8 x i32> %rdx.shuf.2.0, %rdx.shuf.2.1
1053 ; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %r = extractelement <8 x i32> %bin.rdx9, i32 0
1054 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %r
1056 %rdx.shuf.0.0 = shufflevector <8 x i32> %rdx, <8 x i32> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6,i32 undef, i32 undef, i32 undef, i32 undef>
1057 %rdx.shuf.0.1 = shufflevector <8 x i32> %rdx, <8 x i32> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7,i32 undef, i32 undef, i32 undef, i32 undef>
1058 %bin.rdx = add <8 x i32> %rdx.shuf.0.0, %rdx.shuf.0.1
1059 %rdx.shuf.1.0 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef,<8 x i32> <i32 0, i32 2, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1060 %rdx.shuf.1.1 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef,<8 x i32> <i32 1, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1061 %bin.rdx8 = add <8 x i32> %rdx.shuf.1.0, %rdx.shuf.1.1
1062 %rdx.shuf.2.0 = shufflevector <8 x i32> %bin.rdx8, <8 x i32> undef,<8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1063 %rdx.shuf.2.1 = shufflevector <8 x i32> %bin.rdx8, <8 x i32> undef,<8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1064 %bin.rdx9 = add <8 x i32> %rdx.shuf.2.0, %rdx.shuf.2.1
1066 %r = extractelement <8 x i32> %bin.rdx9, i32 0