1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2 ; RUN: llc < %s -mtriple=wasm32 -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+simd128 | FileCheck %s --check-prefix=SIMD128
4 define i64 @pairwise_add_v2i64(<2 x i64> %arg) {
5 ; SIMD128-LABEL: pairwise_add_v2i64:
6 ; SIMD128: .functype pairwise_add_v2i64 (v128) -> (i64)
7 ; SIMD128-NEXT: # %bb.0:
8 ; SIMD128-NEXT: i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7
9 ; SIMD128-NEXT: i64x2.add $push1=, $0, $pop0
10 ; SIMD128-NEXT: i64x2.extract_lane $push2=, $pop1, 0
11 ; SIMD128-NEXT: return $pop2
12 %res = tail call i64 @llvm.vector.reduce.add.i64.v4i64(<2 x i64> %arg)
16 define i32 @pairwise_add_v4i32(<4 x i32> %arg) {
17 ; SIMD128-LABEL: pairwise_add_v4i32:
18 ; SIMD128: .functype pairwise_add_v4i32 (v128) -> (i32)
19 ; SIMD128-NEXT: # %bb.0:
20 ; SIMD128-NEXT: i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 0, 1, 2, 3
21 ; SIMD128-NEXT: i32x4.add $push5=, $0, $pop0
22 ; SIMD128-NEXT: local.tee $push4=, $0=, $pop5
23 ; SIMD128-NEXT: i8x16.shuffle $push1=, $0, $0, 4, 5, 6, 7, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3
24 ; SIMD128-NEXT: i32x4.add $push2=, $pop4, $pop1
25 ; SIMD128-NEXT: i32x4.extract_lane $push3=, $pop2, 0
26 ; SIMD128-NEXT: return $pop3
27 %res = tail call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %arg)
31 define i16 @pairwise_add_v8i16(<8 x i16> %arg) {
32 ; SIMD128-LABEL: pairwise_add_v8i16:
33 ; SIMD128: .functype pairwise_add_v8i16 (v128) -> (i32)
34 ; SIMD128-NEXT: # %bb.0:
35 ; SIMD128-NEXT: i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 0, 1, 0, 1, 0, 1
36 ; SIMD128-NEXT: i16x8.add $push8=, $0, $pop0
37 ; SIMD128-NEXT: local.tee $push7=, $0=, $pop8
38 ; SIMD128-NEXT: i8x16.shuffle $push1=, $0, $0, 4, 5, 6, 7, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1
39 ; SIMD128-NEXT: i16x8.add $push6=, $pop7, $pop1
40 ; SIMD128-NEXT: local.tee $push5=, $0=, $pop6
41 ; SIMD128-NEXT: i8x16.shuffle $push2=, $0, $0, 2, 3, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1
42 ; SIMD128-NEXT: i16x8.add $push3=, $pop5, $pop2
43 ; SIMD128-NEXT: i16x8.extract_lane_u $push4=, $pop3, 0
44 ; SIMD128-NEXT: return $pop4
45 %res = tail call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %arg)
49 define i8 @pairwise_add_v16i8(<16 x i8> %arg) {
50 ; SIMD128-LABEL: pairwise_add_v16i8:
51 ; SIMD128: .functype pairwise_add_v16i8 (v128) -> (i32)
52 ; SIMD128-NEXT: # %bb.0:
53 ; SIMD128-NEXT: i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0
54 ; SIMD128-NEXT: i8x16.add $push11=, $0, $pop0
55 ; SIMD128-NEXT: local.tee $push10=, $0=, $pop11
56 ; SIMD128-NEXT: i8x16.shuffle $push1=, $0, $0, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
57 ; SIMD128-NEXT: i8x16.add $push9=, $pop10, $pop1
58 ; SIMD128-NEXT: local.tee $push8=, $0=, $pop9
59 ; SIMD128-NEXT: i8x16.shuffle $push2=, $0, $0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
60 ; SIMD128-NEXT: i8x16.add $push7=, $pop8, $pop2
61 ; SIMD128-NEXT: local.tee $push6=, $0=, $pop7
62 ; SIMD128-NEXT: i8x16.shuffle $push3=, $0, $0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
63 ; SIMD128-NEXT: i8x16.add $push4=, $pop6, $pop3
64 ; SIMD128-NEXT: i8x16.extract_lane_u $push5=, $pop4, 0
65 ; SIMD128-NEXT: return $pop5
66 %res = tail call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %arg)
70 define i64 @pairwise_mul_v2i64(<2 x i64> %arg) {
71 ; SIMD128-LABEL: pairwise_mul_v2i64:
72 ; SIMD128: .functype pairwise_mul_v2i64 (v128) -> (i64)
73 ; SIMD128-NEXT: # %bb.0:
74 ; SIMD128-NEXT: i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7
75 ; SIMD128-NEXT: i64x2.mul $push1=, $0, $pop0
76 ; SIMD128-NEXT: i64x2.extract_lane $push2=, $pop1, 0
77 ; SIMD128-NEXT: return $pop2
78 %res = tail call i64 @llvm.vector.reduce.mul.v2i64(<2 x i64> %arg)
82 define i32 @pairwise_mul_v4i32(<4 x i32> %arg) {
83 ; SIMD128-LABEL: pairwise_mul_v4i32:
84 ; SIMD128: .functype pairwise_mul_v4i32 (v128) -> (i32)
85 ; SIMD128-NEXT: # %bb.0:
86 ; SIMD128-NEXT: i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 0, 1, 2, 3
87 ; SIMD128-NEXT: i32x4.mul $push5=, $0, $pop0
88 ; SIMD128-NEXT: local.tee $push4=, $0=, $pop5
89 ; SIMD128-NEXT: i8x16.shuffle $push1=, $0, $0, 4, 5, 6, 7, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3
90 ; SIMD128-NEXT: i32x4.mul $push2=, $pop4, $pop1
91 ; SIMD128-NEXT: i32x4.extract_lane $push3=, $pop2, 0
92 ; SIMD128-NEXT: return $pop3
93 %res = tail call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> %arg)
97 define i16 @pairwise_mul_v8i16(<8 x i16> %arg) {
98 ; SIMD128-LABEL: pairwise_mul_v8i16:
99 ; SIMD128: .functype pairwise_mul_v8i16 (v128) -> (i32)
100 ; SIMD128-NEXT: # %bb.0:
101 ; SIMD128-NEXT: i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 0, 1, 0, 1, 0, 1
102 ; SIMD128-NEXT: i16x8.mul $push8=, $0, $pop0
103 ; SIMD128-NEXT: local.tee $push7=, $0=, $pop8
104 ; SIMD128-NEXT: i8x16.shuffle $push1=, $0, $0, 4, 5, 6, 7, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1
105 ; SIMD128-NEXT: i16x8.mul $push6=, $pop7, $pop1
106 ; SIMD128-NEXT: local.tee $push5=, $0=, $pop6
107 ; SIMD128-NEXT: i8x16.shuffle $push2=, $0, $0, 2, 3, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1
108 ; SIMD128-NEXT: i16x8.mul $push3=, $pop5, $pop2
109 ; SIMD128-NEXT: i16x8.extract_lane_u $push4=, $pop3, 0
110 ; SIMD128-NEXT: return $pop4
111 %res = tail call i16 @llvm.vector.reduce.mul.v8i16(<8 x i16> %arg)
115 define i8 @pairwise_mul_v16i8(<16 x i8> %arg) {
116 ; SIMD128-LABEL: pairwise_mul_v16i8:
117 ; SIMD128: .functype pairwise_mul_v16i8 (v128) -> (i32)
118 ; SIMD128-NEXT: # %bb.0:
119 ; SIMD128-NEXT: i8x16.extract_lane_u $push26=, $0, 0
120 ; SIMD128-NEXT: i8x16.shuffle $push32=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0
121 ; SIMD128-NEXT: local.tee $push31=, $1=, $pop32
122 ; SIMD128-NEXT: i8x16.extract_lane_u $push25=, $pop31, 0
123 ; SIMD128-NEXT: i32.mul $push27=, $pop26, $pop25
124 ; SIMD128-NEXT: i8x16.extract_lane_u $push23=, $0, 4
125 ; SIMD128-NEXT: i8x16.extract_lane_u $push22=, $1, 4
126 ; SIMD128-NEXT: i32.mul $push24=, $pop23, $pop22
127 ; SIMD128-NEXT: i32.mul $push28=, $pop27, $pop24
128 ; SIMD128-NEXT: i8x16.extract_lane_u $push19=, $0, 2
129 ; SIMD128-NEXT: i8x16.extract_lane_u $push18=, $1, 2
130 ; SIMD128-NEXT: i32.mul $push20=, $pop19, $pop18
131 ; SIMD128-NEXT: i8x16.extract_lane_u $push16=, $0, 6
132 ; SIMD128-NEXT: i8x16.extract_lane_u $push15=, $1, 6
133 ; SIMD128-NEXT: i32.mul $push17=, $pop16, $pop15
134 ; SIMD128-NEXT: i32.mul $push21=, $pop20, $pop17
135 ; SIMD128-NEXT: i32.mul $push29=, $pop28, $pop21
136 ; SIMD128-NEXT: i8x16.extract_lane_u $push11=, $0, 1
137 ; SIMD128-NEXT: i8x16.extract_lane_u $push10=, $1, 1
138 ; SIMD128-NEXT: i32.mul $push12=, $pop11, $pop10
139 ; SIMD128-NEXT: i8x16.extract_lane_u $push8=, $0, 5
140 ; SIMD128-NEXT: i8x16.extract_lane_u $push7=, $1, 5
141 ; SIMD128-NEXT: i32.mul $push9=, $pop8, $pop7
142 ; SIMD128-NEXT: i32.mul $push13=, $pop12, $pop9
143 ; SIMD128-NEXT: i8x16.extract_lane_u $push4=, $0, 3
144 ; SIMD128-NEXT: i8x16.extract_lane_u $push3=, $1, 3
145 ; SIMD128-NEXT: i32.mul $push5=, $pop4, $pop3
146 ; SIMD128-NEXT: i8x16.extract_lane_u $push1=, $0, 7
147 ; SIMD128-NEXT: i8x16.extract_lane_u $push0=, $1, 7
148 ; SIMD128-NEXT: i32.mul $push2=, $pop1, $pop0
149 ; SIMD128-NEXT: i32.mul $push6=, $pop5, $pop2
150 ; SIMD128-NEXT: i32.mul $push14=, $pop13, $pop6
151 ; SIMD128-NEXT: i32.mul $push30=, $pop29, $pop14
152 ; SIMD128-NEXT: return $pop30
153 %res = tail call i8 @llvm.vector.reduce.mul.v16i8(<16 x i8> %arg)
157 define i64 @pairwise_and_v2i64(<2 x i64> %arg) {
158 ; SIMD128-LABEL: pairwise_and_v2i64:
159 ; SIMD128: .functype pairwise_and_v2i64 (v128) -> (i64)
160 ; SIMD128-NEXT: # %bb.0:
161 ; SIMD128-NEXT: i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7
162 ; SIMD128-NEXT: v128.and $push1=, $0, $pop0
163 ; SIMD128-NEXT: i64x2.extract_lane $push2=, $pop1, 0
164 ; SIMD128-NEXT: return $pop2
165 %res = tail call i64 @llvm.vector.reduce.and.v2i64(<2 x i64> %arg)
169 define i32 @pairwise_and_v4i32(<4 x i32> %arg) {
170 ; SIMD128-LABEL: pairwise_and_v4i32:
171 ; SIMD128: .functype pairwise_and_v4i32 (v128) -> (i32)
172 ; SIMD128-NEXT: # %bb.0:
173 ; SIMD128-NEXT: i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 0, 1, 2, 3
174 ; SIMD128-NEXT: v128.and $push5=, $0, $pop0
175 ; SIMD128-NEXT: local.tee $push4=, $0=, $pop5
176 ; SIMD128-NEXT: i8x16.shuffle $push1=, $0, $0, 4, 5, 6, 7, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3
177 ; SIMD128-NEXT: v128.and $push2=, $pop4, $pop1
178 ; SIMD128-NEXT: i32x4.extract_lane $push3=, $pop2, 0
179 ; SIMD128-NEXT: return $pop3
180 %res = tail call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> %arg)
184 define i16 @pairwise_and_v8i16(<8 x i16> %arg) {
185 ; SIMD128-LABEL: pairwise_and_v8i16:
186 ; SIMD128: .functype pairwise_and_v8i16 (v128) -> (i32)
187 ; SIMD128-NEXT: # %bb.0:
188 ; SIMD128-NEXT: i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 0, 1, 0, 1, 0, 1
189 ; SIMD128-NEXT: v128.and $push8=, $0, $pop0
190 ; SIMD128-NEXT: local.tee $push7=, $0=, $pop8
191 ; SIMD128-NEXT: i8x16.shuffle $push1=, $0, $0, 4, 5, 6, 7, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1
192 ; SIMD128-NEXT: v128.and $push6=, $pop7, $pop1
193 ; SIMD128-NEXT: local.tee $push5=, $0=, $pop6
194 ; SIMD128-NEXT: i8x16.shuffle $push2=, $0, $0, 2, 3, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1
195 ; SIMD128-NEXT: v128.and $push3=, $pop5, $pop2
196 ; SIMD128-NEXT: i16x8.extract_lane_u $push4=, $pop3, 0
197 ; SIMD128-NEXT: return $pop4
198 %res = tail call i16 @llvm.vector.reduce.and.v8i16(<8 x i16> %arg)
202 define i8 @pairwise_and_v16i8(<16 x i8> %arg) {
203 ; SIMD128-LABEL: pairwise_and_v16i8:
204 ; SIMD128: .functype pairwise_and_v16i8 (v128) -> (i32)
205 ; SIMD128-NEXT: # %bb.0:
206 ; SIMD128-NEXT: i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0
207 ; SIMD128-NEXT: v128.and $push11=, $0, $pop0
208 ; SIMD128-NEXT: local.tee $push10=, $0=, $pop11
209 ; SIMD128-NEXT: i8x16.shuffle $push1=, $0, $0, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
210 ; SIMD128-NEXT: v128.and $push9=, $pop10, $pop1
211 ; SIMD128-NEXT: local.tee $push8=, $0=, $pop9
212 ; SIMD128-NEXT: i8x16.shuffle $push2=, $0, $0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
213 ; SIMD128-NEXT: v128.and $push7=, $pop8, $pop2
214 ; SIMD128-NEXT: local.tee $push6=, $0=, $pop7
215 ; SIMD128-NEXT: i8x16.shuffle $push3=, $0, $0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
216 ; SIMD128-NEXT: v128.and $push4=, $pop6, $pop3
217 ; SIMD128-NEXT: i8x16.extract_lane_u $push5=, $pop4, 0
218 ; SIMD128-NEXT: return $pop5
219 %res = tail call i8 @llvm.vector.reduce.and.v16i8(<16 x i8> %arg)
223 define i64 @pairwise_or_v2i64(<2 x i64> %arg) {
224 ; SIMD128-LABEL: pairwise_or_v2i64:
225 ; SIMD128: .functype pairwise_or_v2i64 (v128) -> (i64)
226 ; SIMD128-NEXT: # %bb.0:
227 ; SIMD128-NEXT: i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7
228 ; SIMD128-NEXT: v128.or $push1=, $0, $pop0
229 ; SIMD128-NEXT: i64x2.extract_lane $push2=, $pop1, 0
230 ; SIMD128-NEXT: return $pop2
231 %res = tail call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> %arg)
235 define i32 @pairwise_or_v4i32(<4 x i32> %arg) {
236 ; SIMD128-LABEL: pairwise_or_v4i32:
237 ; SIMD128: .functype pairwise_or_v4i32 (v128) -> (i32)
238 ; SIMD128-NEXT: # %bb.0:
239 ; SIMD128-NEXT: i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 0, 1, 2, 3
240 ; SIMD128-NEXT: v128.or $push5=, $0, $pop0
241 ; SIMD128-NEXT: local.tee $push4=, $0=, $pop5
242 ; SIMD128-NEXT: i8x16.shuffle $push1=, $0, $0, 4, 5, 6, 7, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3
243 ; SIMD128-NEXT: v128.or $push2=, $pop4, $pop1
244 ; SIMD128-NEXT: i32x4.extract_lane $push3=, $pop2, 0
245 ; SIMD128-NEXT: return $pop3
246 %res = tail call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> %arg)
250 define i16 @pairwise_or_v8i16(<8 x i16> %arg) {
251 ; SIMD128-LABEL: pairwise_or_v8i16:
252 ; SIMD128: .functype pairwise_or_v8i16 (v128) -> (i32)
253 ; SIMD128-NEXT: # %bb.0:
254 ; SIMD128-NEXT: i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 0, 1, 0, 1, 0, 1
255 ; SIMD128-NEXT: v128.or $push8=, $0, $pop0
256 ; SIMD128-NEXT: local.tee $push7=, $0=, $pop8
257 ; SIMD128-NEXT: i8x16.shuffle $push1=, $0, $0, 4, 5, 6, 7, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1
258 ; SIMD128-NEXT: v128.or $push6=, $pop7, $pop1
259 ; SIMD128-NEXT: local.tee $push5=, $0=, $pop6
260 ; SIMD128-NEXT: i8x16.shuffle $push2=, $0, $0, 2, 3, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1
261 ; SIMD128-NEXT: v128.or $push3=, $pop5, $pop2
262 ; SIMD128-NEXT: i16x8.extract_lane_u $push4=, $pop3, 0
263 ; SIMD128-NEXT: return $pop4
264 %res = tail call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> %arg)
268 define i8 @pairwise_or_v16i8(<16 x i8> %arg) {
269 ; SIMD128-LABEL: pairwise_or_v16i8:
270 ; SIMD128: .functype pairwise_or_v16i8 (v128) -> (i32)
271 ; SIMD128-NEXT: # %bb.0:
272 ; SIMD128-NEXT: i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0
273 ; SIMD128-NEXT: v128.or $push11=, $0, $pop0
274 ; SIMD128-NEXT: local.tee $push10=, $0=, $pop11
275 ; SIMD128-NEXT: i8x16.shuffle $push1=, $0, $0, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
276 ; SIMD128-NEXT: v128.or $push9=, $pop10, $pop1
277 ; SIMD128-NEXT: local.tee $push8=, $0=, $pop9
278 ; SIMD128-NEXT: i8x16.shuffle $push2=, $0, $0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
279 ; SIMD128-NEXT: v128.or $push7=, $pop8, $pop2
280 ; SIMD128-NEXT: local.tee $push6=, $0=, $pop7
281 ; SIMD128-NEXT: i8x16.shuffle $push3=, $0, $0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
282 ; SIMD128-NEXT: v128.or $push4=, $pop6, $pop3
283 ; SIMD128-NEXT: i8x16.extract_lane_u $push5=, $pop4, 0
284 ; SIMD128-NEXT: return $pop5
285 %res = tail call i8 @llvm.vector.reduce.or.v16i8(<16 x i8> %arg)
289 define i64 @pairwise_xor_v2i64(<2 x i64> %arg) {
290 ; SIMD128-LABEL: pairwise_xor_v2i64:
291 ; SIMD128: .functype pairwise_xor_v2i64 (v128) -> (i64)
292 ; SIMD128-NEXT: # %bb.0:
293 ; SIMD128-NEXT: i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7
294 ; SIMD128-NEXT: v128.xor $push1=, $0, $pop0
295 ; SIMD128-NEXT: i64x2.extract_lane $push2=, $pop1, 0
296 ; SIMD128-NEXT: return $pop2
297 %res = tail call i64 @llvm.vector.reduce.xor.v2i64(<2 x i64> %arg)
301 define i32 @pairwise_xor_v4i32(<4 x i32> %arg) {
302 ; SIMD128-LABEL: pairwise_xor_v4i32:
303 ; SIMD128: .functype pairwise_xor_v4i32 (v128) -> (i32)
304 ; SIMD128-NEXT: # %bb.0:
305 ; SIMD128-NEXT: i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 0, 1, 2, 3
306 ; SIMD128-NEXT: v128.xor $push5=, $0, $pop0
307 ; SIMD128-NEXT: local.tee $push4=, $0=, $pop5
308 ; SIMD128-NEXT: i8x16.shuffle $push1=, $0, $0, 4, 5, 6, 7, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3
309 ; SIMD128-NEXT: v128.xor $push2=, $pop4, $pop1
310 ; SIMD128-NEXT: i32x4.extract_lane $push3=, $pop2, 0
311 ; SIMD128-NEXT: return $pop3
312 %res = tail call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> %arg)
316 define i16 @pairwise_xor_v8i16(<8 x i16> %arg) {
317 ; SIMD128-LABEL: pairwise_xor_v8i16:
318 ; SIMD128: .functype pairwise_xor_v8i16 (v128) -> (i32)
319 ; SIMD128-NEXT: # %bb.0:
320 ; SIMD128-NEXT: i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 0, 1, 0, 1, 0, 1
321 ; SIMD128-NEXT: v128.xor $push8=, $0, $pop0
322 ; SIMD128-NEXT: local.tee $push7=, $0=, $pop8
323 ; SIMD128-NEXT: i8x16.shuffle $push1=, $0, $0, 4, 5, 6, 7, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1
324 ; SIMD128-NEXT: v128.xor $push6=, $pop7, $pop1
325 ; SIMD128-NEXT: local.tee $push5=, $0=, $pop6
326 ; SIMD128-NEXT: i8x16.shuffle $push2=, $0, $0, 2, 3, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1
327 ; SIMD128-NEXT: v128.xor $push3=, $pop5, $pop2
328 ; SIMD128-NEXT: i16x8.extract_lane_u $push4=, $pop3, 0
329 ; SIMD128-NEXT: return $pop4
330 %res = tail call i16 @llvm.vector.reduce.xor.v8i16(<8 x i16> %arg)
334 define i8 @pairwise_xor_v16i8(<16 x i8> %arg) {
335 ; SIMD128-LABEL: pairwise_xor_v16i8:
336 ; SIMD128: .functype pairwise_xor_v16i8 (v128) -> (i32)
337 ; SIMD128-NEXT: # %bb.0:
338 ; SIMD128-NEXT: i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0
339 ; SIMD128-NEXT: v128.xor $push11=, $0, $pop0
340 ; SIMD128-NEXT: local.tee $push10=, $0=, $pop11
341 ; SIMD128-NEXT: i8x16.shuffle $push1=, $0, $0, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
342 ; SIMD128-NEXT: v128.xor $push9=, $pop10, $pop1
343 ; SIMD128-NEXT: local.tee $push8=, $0=, $pop9
344 ; SIMD128-NEXT: i8x16.shuffle $push2=, $0, $0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
345 ; SIMD128-NEXT: v128.xor $push7=, $pop8, $pop2
346 ; SIMD128-NEXT: local.tee $push6=, $0=, $pop7
347 ; SIMD128-NEXT: i8x16.shuffle $push3=, $0, $0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
348 ; SIMD128-NEXT: v128.xor $push4=, $pop6, $pop3
349 ; SIMD128-NEXT: i8x16.extract_lane_u $push5=, $pop4, 0
350 ; SIMD128-NEXT: return $pop5
351 %res = tail call i8 @llvm.vector.reduce.xor.v16i8(<16 x i8> %arg)
355 define i64 @pairwise_smax_v2i64(<2 x i64> %arg) {
356 ; SIMD128-LABEL: pairwise_smax_v2i64:
357 ; SIMD128: .functype pairwise_smax_v2i64 (v128) -> (i64)
358 ; SIMD128-NEXT: # %bb.0:
359 ; SIMD128-NEXT: i8x16.shuffle $push4=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7
360 ; SIMD128-NEXT: local.tee $push3=, $1=, $pop4
361 ; SIMD128-NEXT: i64x2.gt_s $push0=, $0, $1
362 ; SIMD128-NEXT: v128.bitselect $push1=, $0, $pop3, $pop0
363 ; SIMD128-NEXT: i64x2.extract_lane $push2=, $pop1, 0
364 ; SIMD128-NEXT: return $pop2
365 %res = tail call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> %arg)
369 define i32 @pairwise_smax_v4i32(<4 x i32> %arg) {
370 ; SIMD128-LABEL: pairwise_smax_v4i32:
371 ; SIMD128: .functype pairwise_smax_v4i32 (v128) -> (i32)
372 ; SIMD128-NEXT: # %bb.0:
373 ; SIMD128-NEXT: i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 0, 1, 2, 3
374 ; SIMD128-NEXT: i32x4.max_s $push5=, $0, $pop0
375 ; SIMD128-NEXT: local.tee $push4=, $0=, $pop5
376 ; SIMD128-NEXT: i8x16.shuffle $push1=, $0, $0, 4, 5, 6, 7, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3
377 ; SIMD128-NEXT: i32x4.max_s $push2=, $pop4, $pop1
378 ; SIMD128-NEXT: i32x4.extract_lane $push3=, $pop2, 0
379 ; SIMD128-NEXT: return $pop3
380 %res = tail call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %arg)
384 define i16 @pairwise_smax_v8i16(<8 x i16> %arg) {
385 ; SIMD128-LABEL: pairwise_smax_v8i16:
386 ; SIMD128: .functype pairwise_smax_v8i16 (v128) -> (i32)
387 ; SIMD128-NEXT: # %bb.0:
388 ; SIMD128-NEXT: i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 0, 1, 0, 1, 0, 1
389 ; SIMD128-NEXT: i16x8.max_s $push8=, $0, $pop0
390 ; SIMD128-NEXT: local.tee $push7=, $0=, $pop8
391 ; SIMD128-NEXT: i8x16.shuffle $push1=, $0, $0, 4, 5, 6, 7, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1
392 ; SIMD128-NEXT: i16x8.max_s $push6=, $pop7, $pop1
393 ; SIMD128-NEXT: local.tee $push5=, $0=, $pop6
394 ; SIMD128-NEXT: i8x16.shuffle $push2=, $0, $0, 2, 3, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1
395 ; SIMD128-NEXT: i16x8.max_s $push3=, $pop5, $pop2
396 ; SIMD128-NEXT: i16x8.extract_lane_u $push4=, $pop3, 0
397 ; SIMD128-NEXT: return $pop4
398 %res = tail call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> %arg)
402 define i8 @pairwise_smax_v16i8(<16 x i8> %arg) {
403 ; SIMD128-LABEL: pairwise_smax_v16i8:
404 ; SIMD128: .functype pairwise_smax_v16i8 (v128) -> (i32)
405 ; SIMD128-NEXT: # %bb.0:
406 ; SIMD128-NEXT: i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0
407 ; SIMD128-NEXT: i8x16.max_s $push11=, $0, $pop0
408 ; SIMD128-NEXT: local.tee $push10=, $0=, $pop11
409 ; SIMD128-NEXT: i8x16.shuffle $push1=, $0, $0, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
410 ; SIMD128-NEXT: i8x16.max_s $push9=, $pop10, $pop1
411 ; SIMD128-NEXT: local.tee $push8=, $0=, $pop9
412 ; SIMD128-NEXT: i8x16.shuffle $push2=, $0, $0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
413 ; SIMD128-NEXT: i8x16.max_s $push7=, $pop8, $pop2
414 ; SIMD128-NEXT: local.tee $push6=, $0=, $pop7
415 ; SIMD128-NEXT: i8x16.shuffle $push3=, $0, $0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
416 ; SIMD128-NEXT: i8x16.max_s $push4=, $pop6, $pop3
417 ; SIMD128-NEXT: i8x16.extract_lane_u $push5=, $pop4, 0
418 ; SIMD128-NEXT: return $pop5
419 %res = tail call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> %arg)
423 define i64 @pairwise_smin_v2i64(<2 x i64> %arg) {
424 ; SIMD128-LABEL: pairwise_smin_v2i64:
425 ; SIMD128: .functype pairwise_smin_v2i64 (v128) -> (i64)
426 ; SIMD128-NEXT: # %bb.0:
427 ; SIMD128-NEXT: i8x16.shuffle $push4=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7
428 ; SIMD128-NEXT: local.tee $push3=, $1=, $pop4
429 ; SIMD128-NEXT: i64x2.lt_s $push0=, $0, $1
430 ; SIMD128-NEXT: v128.bitselect $push1=, $0, $pop3, $pop0
431 ; SIMD128-NEXT: i64x2.extract_lane $push2=, $pop1, 0
432 ; SIMD128-NEXT: return $pop2
433 %res = tail call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> %arg)
437 define i32 @pairwise_smin_v4i32(<4 x i32> %arg) {
438 ; SIMD128-LABEL: pairwise_smin_v4i32:
439 ; SIMD128: .functype pairwise_smin_v4i32 (v128) -> (i32)
440 ; SIMD128-NEXT: # %bb.0:
441 ; SIMD128-NEXT: i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 0, 1, 2, 3
442 ; SIMD128-NEXT: i32x4.min_s $push5=, $0, $pop0
443 ; SIMD128-NEXT: local.tee $push4=, $0=, $pop5
444 ; SIMD128-NEXT: i8x16.shuffle $push1=, $0, $0, 4, 5, 6, 7, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3
445 ; SIMD128-NEXT: i32x4.min_s $push2=, $pop4, $pop1
446 ; SIMD128-NEXT: i32x4.extract_lane $push3=, $pop2, 0
447 ; SIMD128-NEXT: return $pop3
448 %res = tail call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> %arg)
452 define i16 @pairwise_smin_v8i16(<8 x i16> %arg) {
453 ; SIMD128-LABEL: pairwise_smin_v8i16:
454 ; SIMD128: .functype pairwise_smin_v8i16 (v128) -> (i32)
455 ; SIMD128-NEXT: # %bb.0:
456 ; SIMD128-NEXT: i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 0, 1, 0, 1, 0, 1
457 ; SIMD128-NEXT: i16x8.min_s $push8=, $0, $pop0
458 ; SIMD128-NEXT: local.tee $push7=, $0=, $pop8
459 ; SIMD128-NEXT: i8x16.shuffle $push1=, $0, $0, 4, 5, 6, 7, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1
460 ; SIMD128-NEXT: i16x8.min_s $push6=, $pop7, $pop1
461 ; SIMD128-NEXT: local.tee $push5=, $0=, $pop6
462 ; SIMD128-NEXT: i8x16.shuffle $push2=, $0, $0, 2, 3, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1
463 ; SIMD128-NEXT: i16x8.min_s $push3=, $pop5, $pop2
464 ; SIMD128-NEXT: i16x8.extract_lane_u $push4=, $pop3, 0
465 ; SIMD128-NEXT: return $pop4
466 %res = tail call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> %arg)
470 define i8 @pairwise_smin_v16i8(<16 x i8> %arg) {
471 ; SIMD128-LABEL: pairwise_smin_v16i8:
472 ; SIMD128: .functype pairwise_smin_v16i8 (v128) -> (i32)
473 ; SIMD128-NEXT: # %bb.0:
474 ; SIMD128-NEXT: i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0
475 ; SIMD128-NEXT: i8x16.min_s $push11=, $0, $pop0
476 ; SIMD128-NEXT: local.tee $push10=, $0=, $pop11
477 ; SIMD128-NEXT: i8x16.shuffle $push1=, $0, $0, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
478 ; SIMD128-NEXT: i8x16.min_s $push9=, $pop10, $pop1
479 ; SIMD128-NEXT: local.tee $push8=, $0=, $pop9
480 ; SIMD128-NEXT: i8x16.shuffle $push2=, $0, $0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
481 ; SIMD128-NEXT: i8x16.min_s $push7=, $pop8, $pop2
482 ; SIMD128-NEXT: local.tee $push6=, $0=, $pop7
483 ; SIMD128-NEXT: i8x16.shuffle $push3=, $0, $0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
484 ; SIMD128-NEXT: i8x16.min_s $push4=, $pop6, $pop3
485 ; SIMD128-NEXT: i8x16.extract_lane_u $push5=, $pop4, 0
486 ; SIMD128-NEXT: return $pop5
487 %res = tail call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> %arg)
491 define i64 @pairwise_umax_v2i64(<2 x i64> %arg) {
492 ; SIMD128-LABEL: pairwise_umax_v2i64:
493 ; SIMD128: .functype pairwise_umax_v2i64 (v128) -> (i64)
494 ; SIMD128-NEXT: # %bb.0:
495 ; SIMD128-NEXT: i8x16.shuffle $push10=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7
496 ; SIMD128-NEXT: local.tee $push9=, $1=, $pop10
497 ; SIMD128-NEXT: i64.const $push4=, -1
498 ; SIMD128-NEXT: i64.const $push3=, 0
499 ; SIMD128-NEXT: i64x2.extract_lane $push1=, $0, 0
500 ; SIMD128-NEXT: i64x2.extract_lane $push0=, $1, 0
501 ; SIMD128-NEXT: i64.gt_u $push2=, $pop1, $pop0
502 ; SIMD128-NEXT: i64.select $push5=, $pop4, $pop3, $pop2
503 ; SIMD128-NEXT: i64x2.replace_lane $push6=, $0, 0, $pop5
504 ; SIMD128-NEXT: v128.bitselect $push7=, $0, $pop9, $pop6
505 ; SIMD128-NEXT: i64x2.extract_lane $push8=, $pop7, 0
506 ; SIMD128-NEXT: return $pop8
507 %res = tail call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> %arg)
511 define i32 @pairwise_umax_v4i32(<4 x i32> %arg) {
512 ; SIMD128-LABEL: pairwise_umax_v4i32:
513 ; SIMD128: .functype pairwise_umax_v4i32 (v128) -> (i32)
514 ; SIMD128-NEXT: # %bb.0:
515 ; SIMD128-NEXT: i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 0, 1, 2, 3
516 ; SIMD128-NEXT: i32x4.max_u $push5=, $0, $pop0
517 ; SIMD128-NEXT: local.tee $push4=, $0=, $pop5
518 ; SIMD128-NEXT: i8x16.shuffle $push1=, $0, $0, 4, 5, 6, 7, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3
519 ; SIMD128-NEXT: i32x4.max_u $push2=, $pop4, $pop1
520 ; SIMD128-NEXT: i32x4.extract_lane $push3=, $pop2, 0
521 ; SIMD128-NEXT: return $pop3
522 %res = tail call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> %arg)
526 define i16 @pairwise_umax_v8i16(<8 x i16> %arg) {
527 ; SIMD128-LABEL: pairwise_umax_v8i16:
528 ; SIMD128: .functype pairwise_umax_v8i16 (v128) -> (i32)
529 ; SIMD128-NEXT: # %bb.0:
530 ; SIMD128-NEXT: i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 0, 1, 0, 1, 0, 1
531 ; SIMD128-NEXT: i16x8.max_u $push8=, $0, $pop0
532 ; SIMD128-NEXT: local.tee $push7=, $0=, $pop8
533 ; SIMD128-NEXT: i8x16.shuffle $push1=, $0, $0, 4, 5, 6, 7, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1
534 ; SIMD128-NEXT: i16x8.max_u $push6=, $pop7, $pop1
535 ; SIMD128-NEXT: local.tee $push5=, $0=, $pop6
536 ; SIMD128-NEXT: i8x16.shuffle $push2=, $0, $0, 2, 3, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1
537 ; SIMD128-NEXT: i16x8.max_u $push3=, $pop5, $pop2
538 ; SIMD128-NEXT: i16x8.extract_lane_u $push4=, $pop3, 0
539 ; SIMD128-NEXT: return $pop4
540 %res = tail call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> %arg)
544 define i8 @pairwise_umax_v16i8(<16 x i8> %arg) {
545 ; SIMD128-LABEL: pairwise_umax_v16i8:
546 ; SIMD128: .functype pairwise_umax_v16i8 (v128) -> (i32)
547 ; SIMD128-NEXT: # %bb.0:
548 ; SIMD128-NEXT: i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0
549 ; SIMD128-NEXT: i8x16.max_u $push11=, $0, $pop0
550 ; SIMD128-NEXT: local.tee $push10=, $0=, $pop11
551 ; SIMD128-NEXT: i8x16.shuffle $push1=, $0, $0, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
552 ; SIMD128-NEXT: i8x16.max_u $push9=, $pop10, $pop1
553 ; SIMD128-NEXT: local.tee $push8=, $0=, $pop9
554 ; SIMD128-NEXT: i8x16.shuffle $push2=, $0, $0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
555 ; SIMD128-NEXT: i8x16.max_u $push7=, $pop8, $pop2
556 ; SIMD128-NEXT: local.tee $push6=, $0=, $pop7
557 ; SIMD128-NEXT: i8x16.shuffle $push3=, $0, $0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
558 ; SIMD128-NEXT: i8x16.max_u $push4=, $pop6, $pop3
559 ; SIMD128-NEXT: i8x16.extract_lane_u $push5=, $pop4, 0
560 ; SIMD128-NEXT: return $pop5
561 %res = tail call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> %arg)
565 define i64 @pairwise_umin_v2i64(<2 x i64> %arg) {
566 ; SIMD128-LABEL: pairwise_umin_v2i64:
567 ; SIMD128: .functype pairwise_umin_v2i64 (v128) -> (i64)
568 ; SIMD128-NEXT: # %bb.0:
569 ; SIMD128-NEXT: i8x16.shuffle $push10=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7
570 ; SIMD128-NEXT: local.tee $push9=, $1=, $pop10
571 ; SIMD128-NEXT: i64.const $push4=, -1
572 ; SIMD128-NEXT: i64.const $push3=, 0
573 ; SIMD128-NEXT: i64x2.extract_lane $push1=, $0, 0
574 ; SIMD128-NEXT: i64x2.extract_lane $push0=, $1, 0
575 ; SIMD128-NEXT: i64.lt_u $push2=, $pop1, $pop0
576 ; SIMD128-NEXT: i64.select $push5=, $pop4, $pop3, $pop2
577 ; SIMD128-NEXT: i64x2.replace_lane $push6=, $0, 0, $pop5
578 ; SIMD128-NEXT: v128.bitselect $push7=, $0, $pop9, $pop6
579 ; SIMD128-NEXT: i64x2.extract_lane $push8=, $pop7, 0
580 ; SIMD128-NEXT: return $pop8
581 %res = tail call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> %arg)
585 define i32 @pairwise_umin_v4i32(<4 x i32> %arg) {
586 ; SIMD128-LABEL: pairwise_umin_v4i32:
587 ; SIMD128: .functype pairwise_umin_v4i32 (v128) -> (i32)
588 ; SIMD128-NEXT: # %bb.0:
589 ; SIMD128-NEXT: i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 0, 1, 2, 3
590 ; SIMD128-NEXT: i32x4.min_u $push5=, $0, $pop0
591 ; SIMD128-NEXT: local.tee $push4=, $0=, $pop5
592 ; SIMD128-NEXT: i8x16.shuffle $push1=, $0, $0, 4, 5, 6, 7, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3
593 ; SIMD128-NEXT: i32x4.min_u $push2=, $pop4, $pop1
594 ; SIMD128-NEXT: i32x4.extract_lane $push3=, $pop2, 0
595 ; SIMD128-NEXT: return $pop3
596 %res = tail call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> %arg)
600 define i16 @pairwise_umin_v8i16(<8 x i16> %arg) {
601 ; SIMD128-LABEL: pairwise_umin_v8i16:
602 ; SIMD128: .functype pairwise_umin_v8i16 (v128) -> (i32)
603 ; SIMD128-NEXT: # %bb.0:
604 ; SIMD128-NEXT: i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 0, 1, 0, 1, 0, 1
605 ; SIMD128-NEXT: i16x8.min_u $push8=, $0, $pop0
606 ; SIMD128-NEXT: local.tee $push7=, $0=, $pop8
607 ; SIMD128-NEXT: i8x16.shuffle $push1=, $0, $0, 4, 5, 6, 7, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1
608 ; SIMD128-NEXT: i16x8.min_u $push6=, $pop7, $pop1
609 ; SIMD128-NEXT: local.tee $push5=, $0=, $pop6
610 ; SIMD128-NEXT: i8x16.shuffle $push2=, $0, $0, 2, 3, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1
611 ; SIMD128-NEXT: i16x8.min_u $push3=, $pop5, $pop2
612 ; SIMD128-NEXT: i16x8.extract_lane_u $push4=, $pop3, 0
613 ; SIMD128-NEXT: return $pop4
614 %res = tail call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> %arg)
618 define i8 @pairwise_umin_v16i8(<16 x i8> %arg) {
619 ; SIMD128-LABEL: pairwise_umin_v16i8:
620 ; SIMD128: .functype pairwise_umin_v16i8 (v128) -> (i32)
621 ; SIMD128-NEXT: # %bb.0:
622 ; SIMD128-NEXT: i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0
623 ; SIMD128-NEXT: i8x16.min_u $push11=, $0, $pop0
624 ; SIMD128-NEXT: local.tee $push10=, $0=, $pop11
625 ; SIMD128-NEXT: i8x16.shuffle $push1=, $0, $0, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
626 ; SIMD128-NEXT: i8x16.min_u $push9=, $pop10, $pop1
627 ; SIMD128-NEXT: local.tee $push8=, $0=, $pop9
628 ; SIMD128-NEXT: i8x16.shuffle $push2=, $0, $0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
629 ; SIMD128-NEXT: i8x16.min_u $push7=, $pop8, $pop2
630 ; SIMD128-NEXT: local.tee $push6=, $0=, $pop7
631 ; SIMD128-NEXT: i8x16.shuffle $push3=, $0, $0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
632 ; SIMD128-NEXT: i8x16.min_u $push4=, $pop6, $pop3
633 ; SIMD128-NEXT: i8x16.extract_lane_u $push5=, $pop4, 0
634 ; SIMD128-NEXT: return $pop5
635 %res = tail call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> %arg)
639 define double @pairwise_add_v2f64(<2 x double> %arg) {
640 ; SIMD128-LABEL: pairwise_add_v2f64:
641 ; SIMD128: .functype pairwise_add_v2f64 (v128) -> (f64)
642 ; SIMD128-NEXT: # %bb.0:
643 ; SIMD128-NEXT: f64x2.extract_lane $push1=, $0, 0
644 ; SIMD128-NEXT: f64x2.extract_lane $push0=, $0, 1
645 ; SIMD128-NEXT: f64.add $push2=, $pop1, $pop0
646 ; SIMD128-NEXT: return $pop2
647 %res = tail call double @llvm.vector.reduce.fadd.v2f64(double -0.0, <2 x double> %arg)
651 define double @pairwise_add_v2f64_fast(<2 x double> %arg) {
652 ; SIMD128-LABEL: pairwise_add_v2f64_fast:
653 ; SIMD128: .functype pairwise_add_v2f64_fast (v128) -> (f64)
654 ; SIMD128-NEXT: # %bb.0:
655 ; SIMD128-NEXT: i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7
656 ; SIMD128-NEXT: f64x2.add $push1=, $0, $pop0
657 ; SIMD128-NEXT: f64x2.extract_lane $push2=, $pop1, 0
658 ; SIMD128-NEXT: return $pop2
659 %res = tail call fast double @llvm.vector.reduce.fadd.v2f64(double -0.0, <2 x double> %arg)
663 define float @pairwise_add_v4f32(<4 x float> %arg) {
664 ; SIMD128-LABEL: pairwise_add_v4f32:
665 ; SIMD128: .functype pairwise_add_v4f32 (v128) -> (f32)
666 ; SIMD128-NEXT: # %bb.0:
667 ; SIMD128-NEXT: f32x4.extract_lane $push1=, $0, 0
668 ; SIMD128-NEXT: f32x4.extract_lane $push0=, $0, 1
669 ; SIMD128-NEXT: f32.add $push2=, $pop1, $pop0
670 ; SIMD128-NEXT: f32x4.extract_lane $push3=, $0, 2
671 ; SIMD128-NEXT: f32.add $push4=, $pop2, $pop3
672 ; SIMD128-NEXT: f32x4.extract_lane $push5=, $0, 3
673 ; SIMD128-NEXT: f32.add $push6=, $pop4, $pop5
674 ; SIMD128-NEXT: return $pop6
675 %res = tail call float @llvm.vector.reduce.fadd.v4f32(float -0.0, <4 x float> %arg)
679 define float @pairwise_add_v4f32_fast(<4 x float> %arg) {
680 ; SIMD128-LABEL: pairwise_add_v4f32_fast:
681 ; SIMD128: .functype pairwise_add_v4f32_fast (v128) -> (f32)
682 ; SIMD128-NEXT: # %bb.0:
683 ; SIMD128-NEXT: i8x16.shuffle $push0=, $0, $0, 4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 0, 1, 2, 3
684 ; SIMD128-NEXT: f32x4.add $push5=, $0, $pop0
685 ; SIMD128-NEXT: local.tee $push4=, $0=, $pop5
686 ; SIMD128-NEXT: i8x16.shuffle $push1=, $0, $0, 8, 9, 10, 11, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3
687 ; SIMD128-NEXT: f32x4.add $push2=, $pop4, $pop1
688 ; SIMD128-NEXT: f32x4.extract_lane $push3=, $pop2, 0
689 ; SIMD128-NEXT: return $pop3
690 %res = tail call fast float @llvm.vector.reduce.fadd.v4f32(float -0.0, <4 x float> %arg)
694 define float @pairwise_add_v4f32_reassoc(<4 x float> %arg) {
695 ; SIMD128-LABEL: pairwise_add_v4f32_reassoc:
696 ; SIMD128: .functype pairwise_add_v4f32_reassoc (v128) -> (f32)
697 ; SIMD128-NEXT: # %bb.0:
698 ; SIMD128-NEXT: i8x16.shuffle $push0=, $0, $0, 4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 0, 1, 2, 3
699 ; SIMD128-NEXT: f32x4.add $push5=, $0, $pop0
700 ; SIMD128-NEXT: local.tee $push4=, $0=, $pop5
701 ; SIMD128-NEXT: i8x16.shuffle $push1=, $0, $0, 8, 9, 10, 11, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3
702 ; SIMD128-NEXT: f32x4.add $push2=, $pop4, $pop1
703 ; SIMD128-NEXT: f32x4.extract_lane $push3=, $pop2, 0
704 ; SIMD128-NEXT: return $pop3
705 %res = tail call reassoc float @llvm.vector.reduce.fadd.v4f32(float -0.0, <4 x float> %arg)
709 define double @pairwise_mul_v2f64(<2 x double> %arg) {
710 ; SIMD128-LABEL: pairwise_mul_v2f64:
711 ; SIMD128: .functype pairwise_mul_v2f64 (v128) -> (f64)
712 ; SIMD128-NEXT: # %bb.0:
713 ; SIMD128-NEXT: f64x2.extract_lane $push0=, $0, 0
714 ; SIMD128-NEXT: f64.const $push1=, -0x0p0
715 ; SIMD128-NEXT: f64.mul $push2=, $pop0, $pop1
716 ; SIMD128-NEXT: f64x2.extract_lane $push3=, $0, 1
717 ; SIMD128-NEXT: f64.mul $push4=, $pop2, $pop3
718 ; SIMD128-NEXT: return $pop4
719 %res = tail call double @llvm.vector.reduce.fmul.v2f64(double -0.0, <2 x double> %arg)
723 define double @pairwise_mul_v2f64_fast(<2 x double> %arg) {
724 ; SIMD128-LABEL: pairwise_mul_v2f64_fast:
725 ; SIMD128: .functype pairwise_mul_v2f64_fast (v128) -> (f64)
726 ; SIMD128-NEXT: # %bb.0:
727 ; SIMD128-NEXT: f64.const $push0=, 0x0p0
728 ; SIMD128-NEXT: return $pop0
729 %res = tail call fast double @llvm.vector.reduce.fmul.v2f64(double -0.0, <2 x double> %arg)
733 define float @pairwise_mul_v4f32(<4 x float> %arg) {
734 ; SIMD128-LABEL: pairwise_mul_v4f32:
735 ; SIMD128: .functype pairwise_mul_v4f32 (v128) -> (f32)
736 ; SIMD128-NEXT: # %bb.0:
737 ; SIMD128-NEXT: f32x4.extract_lane $push0=, $0, 0
738 ; SIMD128-NEXT: f32.const $push1=, -0x0p0
739 ; SIMD128-NEXT: f32.mul $push2=, $pop0, $pop1
740 ; SIMD128-NEXT: f32x4.extract_lane $push3=, $0, 1
741 ; SIMD128-NEXT: f32.mul $push4=, $pop2, $pop3
742 ; SIMD128-NEXT: f32x4.extract_lane $push5=, $0, 2
743 ; SIMD128-NEXT: f32.mul $push6=, $pop4, $pop5
744 ; SIMD128-NEXT: f32x4.extract_lane $push7=, $0, 3
745 ; SIMD128-NEXT: f32.mul $push8=, $pop6, $pop7
746 ; SIMD128-NEXT: return $pop8
747 %res = tail call float @llvm.vector.reduce.fmul.v4f32(float -0.0, <4 x float> %arg)
751 define float @pairwise_mul_v4f32_fast(<4 x float> %arg) {
752 ; SIMD128-LABEL: pairwise_mul_v4f32_fast:
753 ; SIMD128: .functype pairwise_mul_v4f32_fast (v128) -> (f32)
754 ; SIMD128-NEXT: # %bb.0:
755 ; SIMD128-NEXT: f32.const $push0=, 0x0p0
756 ; SIMD128-NEXT: return $pop0
757 %res = tail call fast float @llvm.vector.reduce.fmul.v4f32(float -0.0, <4 x float> %arg)
761 define float @pairwise_mul_v4f32_reassoc(<4 x float> %arg) {
762 ; SIMD128-LABEL: pairwise_mul_v4f32_reassoc:
763 ; SIMD128: .functype pairwise_mul_v4f32_reassoc (v128) -> (f32)
764 ; SIMD128-NEXT: # %bb.0:
765 ; SIMD128-NEXT: i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 0, 1, 2, 3
766 ; SIMD128-NEXT: f32x4.mul $push7=, $0, $pop0
767 ; SIMD128-NEXT: local.tee $push6=, $0=, $pop7
768 ; SIMD128-NEXT: i8x16.shuffle $push1=, $0, $0, 4, 5, 6, 7, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3
769 ; SIMD128-NEXT: f32x4.mul $push2=, $pop6, $pop1
770 ; SIMD128-NEXT: f32x4.extract_lane $push3=, $pop2, 0
771 ; SIMD128-NEXT: f32.const $push4=, -0x0p0
772 ; SIMD128-NEXT: f32.mul $push5=, $pop3, $pop4
773 ; SIMD128-NEXT: return $pop5
774 %res = tail call reassoc float @llvm.vector.reduce.fmul.v4f32(float -0.0, <4 x float> %arg)
778 define double @pairwise_max_v2f64(<2 x double> %arg) {
779 ; SIMD128-LABEL: pairwise_max_v2f64:
780 ; SIMD128: .functype pairwise_max_v2f64 (v128) -> (f64)
781 ; SIMD128-NEXT: # %bb.0:
782 ; SIMD128-NEXT: f64x2.extract_lane $push1=, $0, 0
783 ; SIMD128-NEXT: f64x2.extract_lane $push0=, $0, 1
784 ; SIMD128-NEXT: call $push2=, fmax, $pop1, $pop0
785 ; SIMD128-NEXT: return $pop2
786 %res = tail call double @llvm.vector.reduce.fmax.v2f64(<2 x double> %arg)
790 define double @pairwise_max_v2f64_fast(<2 x double> %arg) {
791 ; SIMD128-LABEL: pairwise_max_v2f64_fast:
792 ; SIMD128: .functype pairwise_max_v2f64_fast (v128) -> (f64)
793 ; SIMD128-NEXT: # %bb.0:
794 ; SIMD128-NEXT: i8x16.shuffle $push4=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7
795 ; SIMD128-NEXT: local.tee $push3=, $1=, $pop4
796 ; SIMD128-NEXT: f64x2.gt $push0=, $0, $1
797 ; SIMD128-NEXT: v128.bitselect $push1=, $0, $pop3, $pop0
798 ; SIMD128-NEXT: f64x2.extract_lane $push2=, $pop1, 0
799 ; SIMD128-NEXT: return $pop2
800 %res = tail call fast double @llvm.vector.reduce.fmax.v2f64(<2 x double> %arg)
804 define float @pairwise_max_v4f32(<4 x float> %arg) {
805 ; SIMD128-LABEL: pairwise_max_v4f32:
806 ; SIMD128: .functype pairwise_max_v4f32 (v128) -> (f32)
807 ; SIMD128-NEXT: # %bb.0:
808 ; SIMD128-NEXT: f32x4.extract_lane $push3=, $0, 0
809 ; SIMD128-NEXT: f32x4.extract_lane $push2=, $0, 1
810 ; SIMD128-NEXT: call $push4=, fmaxf, $pop3, $pop2
811 ; SIMD128-NEXT: f32x4.extract_lane $push1=, $0, 2
812 ; SIMD128-NEXT: call $push5=, fmaxf, $pop4, $pop1
813 ; SIMD128-NEXT: f32x4.extract_lane $push0=, $0, 3
814 ; SIMD128-NEXT: call $push6=, fmaxf, $pop5, $pop0
815 ; SIMD128-NEXT: return $pop6
816 %res = tail call float @llvm.vector.reduce.fmax.v4f32(<4 x float> %arg)
820 define float @pairwise_max_v4f32_fast(<4 x float> %arg) {
821 ; SIMD128-LABEL: pairwise_max_v4f32_fast:
822 ; SIMD128: .functype pairwise_max_v4f32_fast (v128) -> (f32)
823 ; SIMD128-NEXT: # %bb.0:
824 ; SIMD128-NEXT: i8x16.shuffle $push9=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 0, 1, 2, 3
825 ; SIMD128-NEXT: local.tee $push8=, $1=, $pop9
826 ; SIMD128-NEXT: f32x4.gt $push0=, $0, $1
827 ; SIMD128-NEXT: v128.bitselect $push7=, $0, $pop8, $pop0
828 ; SIMD128-NEXT: local.tee $push6=, $0=, $pop7
829 ; SIMD128-NEXT: i8x16.shuffle $push5=, $0, $0, 4, 5, 6, 7, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3
830 ; SIMD128-NEXT: local.tee $push4=, $1=, $pop5
831 ; SIMD128-NEXT: f32x4.gt $push1=, $0, $1
832 ; SIMD128-NEXT: v128.bitselect $push2=, $pop6, $pop4, $pop1
833 ; SIMD128-NEXT: f32x4.extract_lane $push3=, $pop2, 0
834 ; SIMD128-NEXT: return $pop3
835 %res = tail call fast float @llvm.vector.reduce.fmax.v4f32(<4 x float> %arg)
839 define float @pairwise_max_v4f32_reassoc(<4 x float> %arg) {
840 ; SIMD128-LABEL: pairwise_max_v4f32_reassoc:
841 ; SIMD128: .functype pairwise_max_v4f32_reassoc (v128) -> (f32)
842 ; SIMD128-NEXT: # %bb.0:
843 ; SIMD128-NEXT: f32x4.extract_lane $push3=, $0, 0
844 ; SIMD128-NEXT: f32x4.extract_lane $push2=, $0, 1
845 ; SIMD128-NEXT: call $push4=, fmaxf, $pop3, $pop2
846 ; SIMD128-NEXT: f32x4.extract_lane $push1=, $0, 2
847 ; SIMD128-NEXT: call $push5=, fmaxf, $pop4, $pop1
848 ; SIMD128-NEXT: f32x4.extract_lane $push0=, $0, 3
849 ; SIMD128-NEXT: call $push6=, fmaxf, $pop5, $pop0
850 ; SIMD128-NEXT: return $pop6
851 %res = tail call reassoc float @llvm.vector.reduce.fmax.v4f32(<4 x float> %arg)
855 define double @pairwise_min_v2f64(<2 x double> %arg) {
856 ; SIMD128-LABEL: pairwise_min_v2f64:
857 ; SIMD128: .functype pairwise_min_v2f64 (v128) -> (f64)
858 ; SIMD128-NEXT: # %bb.0:
859 ; SIMD128-NEXT: f64x2.extract_lane $push1=, $0, 0
860 ; SIMD128-NEXT: f64x2.extract_lane $push0=, $0, 1
861 ; SIMD128-NEXT: call $push2=, fmin, $pop1, $pop0
862 ; SIMD128-NEXT: return $pop2
863 %res = tail call double @llvm.vector.reduce.fmin.v2f64(<2 x double> %arg)
867 define double @pairwise_min_v2f64_fast(<2 x double> %arg) {
868 ; SIMD128-LABEL: pairwise_min_v2f64_fast:
869 ; SIMD128: .functype pairwise_min_v2f64_fast (v128) -> (f64)
870 ; SIMD128-NEXT: # %bb.0:
871 ; SIMD128-NEXT: i8x16.shuffle $push4=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7
872 ; SIMD128-NEXT: local.tee $push3=, $1=, $pop4
873 ; SIMD128-NEXT: f64x2.lt $push0=, $0, $1
874 ; SIMD128-NEXT: v128.bitselect $push1=, $0, $pop3, $pop0
875 ; SIMD128-NEXT: f64x2.extract_lane $push2=, $pop1, 0
876 ; SIMD128-NEXT: return $pop2
877 %res = tail call fast double @llvm.vector.reduce.fmin.v2f64(<2 x double> %arg)
881 define float @pairwise_min_v4f32(<4 x float> %arg) {
882 ; SIMD128-LABEL: pairwise_min_v4f32:
883 ; SIMD128: .functype pairwise_min_v4f32 (v128) -> (f32)
884 ; SIMD128-NEXT: # %bb.0:
885 ; SIMD128-NEXT: f32x4.extract_lane $push3=, $0, 0
886 ; SIMD128-NEXT: f32x4.extract_lane $push2=, $0, 1
887 ; SIMD128-NEXT: call $push4=, fminf, $pop3, $pop2
888 ; SIMD128-NEXT: f32x4.extract_lane $push1=, $0, 2
889 ; SIMD128-NEXT: call $push5=, fminf, $pop4, $pop1
890 ; SIMD128-NEXT: f32x4.extract_lane $push0=, $0, 3
891 ; SIMD128-NEXT: call $push6=, fminf, $pop5, $pop0
892 ; SIMD128-NEXT: return $pop6
893 %res = tail call float @llvm.vector.reduce.fmin.v4f32(<4 x float> %arg)
897 define float @pairwise_min_v4f32_fast(<4 x float> %arg) {
898 ; SIMD128-LABEL: pairwise_min_v4f32_fast:
899 ; SIMD128: .functype pairwise_min_v4f32_fast (v128) -> (f32)
900 ; SIMD128-NEXT: # %bb.0:
901 ; SIMD128-NEXT: i8x16.shuffle $push9=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 0, 1, 2, 3
902 ; SIMD128-NEXT: local.tee $push8=, $1=, $pop9
903 ; SIMD128-NEXT: f32x4.lt $push0=, $0, $1
904 ; SIMD128-NEXT: v128.bitselect $push7=, $0, $pop8, $pop0
905 ; SIMD128-NEXT: local.tee $push6=, $0=, $pop7
906 ; SIMD128-NEXT: i8x16.shuffle $push5=, $0, $0, 4, 5, 6, 7, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3
907 ; SIMD128-NEXT: local.tee $push4=, $1=, $pop5
908 ; SIMD128-NEXT: f32x4.lt $push1=, $0, $1
909 ; SIMD128-NEXT: v128.bitselect $push2=, $pop6, $pop4, $pop1
910 ; SIMD128-NEXT: f32x4.extract_lane $push3=, $pop2, 0
911 ; SIMD128-NEXT: return $pop3
912 %res = tail call fast float @llvm.vector.reduce.fmin.v4f32(<4 x float> %arg)
916 define float @pairwise_min_v4f32_reassoc(<4 x float> %arg) {
917 ; SIMD128-LABEL: pairwise_min_v4f32_reassoc:
918 ; SIMD128: .functype pairwise_min_v4f32_reassoc (v128) -> (f32)
919 ; SIMD128-NEXT: # %bb.0:
920 ; SIMD128-NEXT: f32x4.extract_lane $push3=, $0, 0
921 ; SIMD128-NEXT: f32x4.extract_lane $push2=, $0, 1
922 ; SIMD128-NEXT: call $push4=, fminf, $pop3, $pop2
923 ; SIMD128-NEXT: f32x4.extract_lane $push1=, $0, 2
924 ; SIMD128-NEXT: call $push5=, fminf, $pop4, $pop1
925 ; SIMD128-NEXT: f32x4.extract_lane $push0=, $0, 3
926 ; SIMD128-NEXT: call $push6=, fminf, $pop5, $pop0
927 ; SIMD128-NEXT: return $pop6
928 %res = tail call reassoc float @llvm.vector.reduce.fmin.v4f32(<4 x float> %arg)
932 define double @pairwise_maximum_v2f64(<2 x double> %arg) {
933 ; SIMD128-LABEL: pairwise_maximum_v2f64:
934 ; SIMD128: .functype pairwise_maximum_v2f64 (v128) -> (f64)
935 ; SIMD128-NEXT: # %bb.0:
936 ; SIMD128-NEXT: f64x2.extract_lane $push1=, $0, 0
937 ; SIMD128-NEXT: f64x2.extract_lane $push0=, $0, 1
938 ; SIMD128-NEXT: f64.max $push2=, $pop1, $pop0
939 ; SIMD128-NEXT: return $pop2
940 %res = tail call double @llvm.vector.reduce.fmaximum.v2f64(<2 x double> %arg)
944 define double @pairwise_maximum_v2f64_fast(<2 x double> %arg) {
945 ; SIMD128-LABEL: pairwise_maximum_v2f64_fast:
946 ; SIMD128: .functype pairwise_maximum_v2f64_fast (v128) -> (f64)
947 ; SIMD128-NEXT: # %bb.0:
948 ; SIMD128-NEXT: f64x2.extract_lane $push1=, $0, 0
949 ; SIMD128-NEXT: f64x2.extract_lane $push0=, $0, 1
950 ; SIMD128-NEXT: f64.max $push2=, $pop1, $pop0
951 ; SIMD128-NEXT: return $pop2
952 %res = tail call fast double @llvm.vector.reduce.fmaximum.v2f64(<2 x double> %arg)
956 define float @pairwise_maximum_v4f32(<4 x float> %arg) {
957 ; SIMD128-LABEL: pairwise_maximum_v4f32:
958 ; SIMD128: .functype pairwise_maximum_v4f32 (v128) -> (f32)
959 ; SIMD128-NEXT: # %bb.0:
960 ; SIMD128-NEXT: f32x4.extract_lane $push1=, $0, 0
961 ; SIMD128-NEXT: f32x4.extract_lane $push0=, $0, 1
962 ; SIMD128-NEXT: f32.max $push2=, $pop1, $pop0
963 ; SIMD128-NEXT: f32x4.extract_lane $push3=, $0, 2
964 ; SIMD128-NEXT: f32.max $push4=, $pop2, $pop3
965 ; SIMD128-NEXT: f32x4.extract_lane $push5=, $0, 3
966 ; SIMD128-NEXT: f32.max $push6=, $pop4, $pop5
967 ; SIMD128-NEXT: return $pop6
968 %res = tail call float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> %arg)
972 define float @pairwise_maximum_v4f32_fast(<4 x float> %arg) {
973 ; SIMD128-LABEL: pairwise_maximum_v4f32_fast:
974 ; SIMD128: .functype pairwise_maximum_v4f32_fast (v128) -> (f32)
975 ; SIMD128-NEXT: # %bb.0:
976 ; SIMD128-NEXT: f32x4.extract_lane $push1=, $0, 0
977 ; SIMD128-NEXT: f32x4.extract_lane $push0=, $0, 1
978 ; SIMD128-NEXT: f32.max $push2=, $pop1, $pop0
979 ; SIMD128-NEXT: f32x4.extract_lane $push3=, $0, 2
980 ; SIMD128-NEXT: f32.max $push4=, $pop2, $pop3
981 ; SIMD128-NEXT: f32x4.extract_lane $push5=, $0, 3
982 ; SIMD128-NEXT: f32.max $push6=, $pop4, $pop5
983 ; SIMD128-NEXT: return $pop6
984 %res = tail call fast float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> %arg)
988 define float @pairwise_maximum_v4f32_reassoc(<4 x float> %arg) {
989 ; SIMD128-LABEL: pairwise_maximum_v4f32_reassoc:
990 ; SIMD128: .functype pairwise_maximum_v4f32_reassoc (v128) -> (f32)
991 ; SIMD128-NEXT: # %bb.0:
992 ; SIMD128-NEXT: f32x4.extract_lane $push1=, $0, 0
993 ; SIMD128-NEXT: f32x4.extract_lane $push0=, $0, 1
994 ; SIMD128-NEXT: f32.max $push2=, $pop1, $pop0
995 ; SIMD128-NEXT: f32x4.extract_lane $push3=, $0, 2
996 ; SIMD128-NEXT: f32.max $push4=, $pop2, $pop3
997 ; SIMD128-NEXT: f32x4.extract_lane $push5=, $0, 3
998 ; SIMD128-NEXT: f32.max $push6=, $pop4, $pop5
999 ; SIMD128-NEXT: return $pop6
1000 %res = tail call reassoc float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> %arg)
1004 define double @pairwise_minimum_v2f64(<2 x double> %arg) {
1005 ; SIMD128-LABEL: pairwise_minimum_v2f64:
1006 ; SIMD128: .functype pairwise_minimum_v2f64 (v128) -> (f64)
1007 ; SIMD128-NEXT: # %bb.0:
1008 ; SIMD128-NEXT: f64x2.extract_lane $push1=, $0, 0
1009 ; SIMD128-NEXT: f64x2.extract_lane $push0=, $0, 1
1010 ; SIMD128-NEXT: f64.min $push2=, $pop1, $pop0
1011 ; SIMD128-NEXT: return $pop2
1012 %res = tail call double @llvm.vector.reduce.fminimum.v2f64(<2 x double> %arg)
1016 define double @pairwise_minimum_v2f64_fast(<2 x double> %arg) {
1017 ; SIMD128-LABEL: pairwise_minimum_v2f64_fast:
1018 ; SIMD128: .functype pairwise_minimum_v2f64_fast (v128) -> (f64)
1019 ; SIMD128-NEXT: # %bb.0:
1020 ; SIMD128-NEXT: f64x2.extract_lane $push1=, $0, 0
1021 ; SIMD128-NEXT: f64x2.extract_lane $push0=, $0, 1
1022 ; SIMD128-NEXT: f64.min $push2=, $pop1, $pop0
1023 ; SIMD128-NEXT: return $pop2
1024 %res = tail call fast double @llvm.vector.reduce.fminimum.v2f64(<2 x double> %arg)
1028 define float @pairwise_minimum_v4f32(<4 x float> %arg) {
1029 ; SIMD128-LABEL: pairwise_minimum_v4f32:
1030 ; SIMD128: .functype pairwise_minimum_v4f32 (v128) -> (f32)
1031 ; SIMD128-NEXT: # %bb.0:
1032 ; SIMD128-NEXT: f32x4.extract_lane $push1=, $0, 0
1033 ; SIMD128-NEXT: f32x4.extract_lane $push0=, $0, 1
1034 ; SIMD128-NEXT: f32.min $push2=, $pop1, $pop0
1035 ; SIMD128-NEXT: f32x4.extract_lane $push3=, $0, 2
1036 ; SIMD128-NEXT: f32.min $push4=, $pop2, $pop3
1037 ; SIMD128-NEXT: f32x4.extract_lane $push5=, $0, 3
1038 ; SIMD128-NEXT: f32.min $push6=, $pop4, $pop5
1039 ; SIMD128-NEXT: return $pop6
1040 %res = tail call float @llvm.vector.reduce.fminimum.v4f32(<4 x float> %arg)
1044 define float @pairwise_minimum_v4f32_fast(<4 x float> %arg) {
1045 ; SIMD128-LABEL: pairwise_minimum_v4f32_fast:
1046 ; SIMD128: .functype pairwise_minimum_v4f32_fast (v128) -> (f32)
1047 ; SIMD128-NEXT: # %bb.0:
1048 ; SIMD128-NEXT: f32x4.extract_lane $push1=, $0, 0
1049 ; SIMD128-NEXT: f32x4.extract_lane $push0=, $0, 1
1050 ; SIMD128-NEXT: f32.min $push2=, $pop1, $pop0
1051 ; SIMD128-NEXT: f32x4.extract_lane $push3=, $0, 2
1052 ; SIMD128-NEXT: f32.min $push4=, $pop2, $pop3
1053 ; SIMD128-NEXT: f32x4.extract_lane $push5=, $0, 3
1054 ; SIMD128-NEXT: f32.min $push6=, $pop4, $pop5
1055 ; SIMD128-NEXT: return $pop6
1056 %res = tail call fast float @llvm.vector.reduce.fminimum.v4f32(<4 x float> %arg)
1060 define float @pairwise_minimum_v4f32_reassoc(<4 x float> %arg) {
1061 ; SIMD128-LABEL: pairwise_minimum_v4f32_reassoc:
1062 ; SIMD128: .functype pairwise_minimum_v4f32_reassoc (v128) -> (f32)
1063 ; SIMD128-NEXT: # %bb.0:
1064 ; SIMD128-NEXT: f32x4.extract_lane $push1=, $0, 0
1065 ; SIMD128-NEXT: f32x4.extract_lane $push0=, $0, 1
1066 ; SIMD128-NEXT: f32.min $push2=, $pop1, $pop0
1067 ; SIMD128-NEXT: f32x4.extract_lane $push3=, $0, 2
1068 ; SIMD128-NEXT: f32.min $push4=, $pop2, $pop3
1069 ; SIMD128-NEXT: f32x4.extract_lane $push5=, $0, 3
1070 ; SIMD128-NEXT: f32.min $push6=, $pop4, $pop5
1071 ; SIMD128-NEXT: return $pop6
1072 %res = tail call reassoc float @llvm.vector.reduce.fminimum.v4f32(<4 x float> %arg)