1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
2 ; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=x86_64-unknown-linux-gnu -mcpu=skylake-avx512 -pass-remarks-output=%t | FileCheck %s
3 ; RUN: FileCheck --input-file=%t --check-prefix=YAML %s
5 ; YAML-LABEL: --- !Passed
6 ; YAML-NEXT: Pass: slp-vectorizer
7 ; YAML-NEXT: Name: VectorizedList
8 ; YAML-NEXT: Function: test
10 ; YAML-NEXT: - String: 'SLP vectorized with cost '
11 ; YAML-NEXT: - Cost: '-4'
12 ; YAML-NEXT: - String: ' and with tree size '
13 ; YAML-NEXT: - TreeSize: '4'
14 ; YAML-LABEL: --- !Passed
15 ; YAML-NEXT: Pass: slp-vectorizer
16 ; YAML-NEXT: Name: VectorizedList
17 ; YAML-NEXT: Function: test
19 ; YAML-NEXT: - String: 'SLP vectorized with cost '
20 ; YAML-NEXT: - Cost: '-2'
21 ; YAML-NEXT: - String: ' and with tree size '
22 ; YAML-NEXT: - TreeSize: '2'
24 define <4 x float> @test(ptr %x, float %v, float %a) {
25 ; CHECK-LABEL: define <4 x float> @test(
26 ; CHECK-SAME: ptr [[X:%.*]], float [[V:%.*]], float [[A:%.*]]) #[[ATTR0:[0-9]+]] {
27 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr [[X]], align 4
28 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> poison, float [[A]], i32 0
29 ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> poison, <4 x i32> zeroinitializer
30 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x float> poison, float [[V]], i32 0
31 ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
32 ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 poison, i32 4, i32 5>
33 ; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x float> [[TMP6]], <4 x float> poison, <4 x i32> <i32 0, i32 0, i32 2, i32 3>
34 ; CHECK-NEXT: [[TMP8:%.*]] = fadd <4 x float> [[TMP3]], [[TMP7]]
35 ; CHECK-NEXT: ret <4 x float> [[TMP8]]
37 %gep1 = getelementptr inbounds <4 x float>, ptr %x, i64 0, i64 1
38 %x0 = load float, ptr %x, align 4
39 %x1 = load float, ptr %gep1, align 4
40 %add1 = fadd float %a, %v
41 %add2 = fadd float %a, %v
42 %add3 = fadd float %a, %x0
43 %add4 = fadd float %a, %x1
44 %i0 = insertelement <4 x float> undef, float %add1, i32 0
45 %i1 = insertelement <4 x float> %i0, float %add2, i32 1
46 %i2 = insertelement <4 x float> %i1, float %add3, i32 2
47 %i3 = insertelement <4 x float> %i2, float %add4, i32 3