1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
2 ; RUN: opt -S --passes=slp-vectorizer -mtriple=riscv64-unknown-linux-gnu -mattr=+v -pass-remarks-output=%t < %s | FileCheck %s
3 ; RUN: FileCheck --input-file=%t --check-prefix=YAML %s
5 ; YAML-LABEL: --- !Passed
6 ; YAML-NEXT: Pass: slp-vectorizer
7 ; YAML-NEXT: Name: VectorizedHorizontalReduction
8 ; YAML-NEXT: Function: test
10 ; YAML-NEXT: - String: 'Vectorized horizontal reduction with cost '
11 ; YAML-NEXT: - Cost: '-10'
12 ; YAML-NEXT: - String: ' and with tree size '
13 ; YAML-NEXT: - TreeSize: '8'
15 define i32 @test(i32 %a, i8 %b, i8 %c) {
16 ; CHECK-LABEL: define i32 @test(
17 ; CHECK-SAME: i32 [[A:%.*]], i8 [[B:%.*]], i8 [[C:%.*]]) #[[ATTR0:[0-9]+]] {
19 ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i8> poison, i8 [[C]], i32 0
20 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i8> [[TMP0]], <4 x i8> poison, <4 x i32> zeroinitializer
21 ; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i8> [[TMP1]], <i8 -1, i8 -2, i8 -3, i8 -4>
22 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i8> poison, i8 [[B]], i32 0
23 ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i8> [[TMP3]], <4 x i8> poison, <4 x i32> zeroinitializer
24 ; CHECK-NEXT: [[TMP8:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i16>
25 ; CHECK-NEXT: [[TMP9:%.*]] = sext <4 x i8> [[TMP4]] to <4 x i16>
26 ; CHECK-NEXT: [[TMP5:%.*]] = icmp sle <4 x i16> [[TMP8]], [[TMP9]]
27 ; CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x i1> [[TMP5]] to i4
28 ; CHECK-NEXT: [[TMP11:%.*]] = call i4 @llvm.ctpop.i4(i4 [[TMP10]])
29 ; CHECK-NEXT: [[TMP7:%.*]] = zext i4 [[TMP11]] to i32
30 ; CHECK-NEXT: [[OP_RDX:%.*]] = add i32 [[TMP7]], [[A]]
31 ; CHECK-NEXT: ret i32 [[OP_RDX]]
35 %dec19 = add i8 %c, -1
36 %conv20 = zext i8 %dec19 to i32
37 %conv16.1 = sext i8 %b to i32
38 %cmp17.1 = icmp sle i32 %conv20, %conv16.1
39 %conv18.1 = zext i1 %cmp17.1 to i32
40 %a.1 = add nsw i32 %conv18.1, %a
41 %dec19.1 = add i8 %c, -2
42 %conv20.1 = zext i8 %dec19.1 to i32
43 %conv16.2 = sext i8 %b to i32
44 %cmp17.2 = icmp sle i32 %conv20.1, %conv16.2
45 %conv18.2 = zext i1 %cmp17.2 to i32
46 %a.2 = add nsw i32 %a.1, %conv18.2
47 %1 = zext i8 %0 to i32
48 %conv16.158 = sext i8 %b to i32
49 %cmp17.159 = icmp sle i32 %1, %conv16.158
50 %conv18.160 = zext i1 %cmp17.159 to i32
51 %a.161 = add nsw i32 %a.2, %conv18.160
52 %dec19.162 = add i8 %c, -4
53 %conv20.163 = zext i8 %dec19.162 to i32
54 %conv16.1.1 = sext i8 %b to i32
55 %cmp17.1.1 = icmp sle i32 %conv20.163, %conv16.1.1
56 %conv18.1.1 = zext i1 %cmp17.1.1 to i32
57 %a.1.1 = add nsw i32 %a.161, %conv18.1.1