1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt -S -passes=slp-vectorizer %s -slp-threshold=-10 | FileCheck %s
3 target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
4 target triple = "aarch64--linux-gnu"
6 %structA = type { [2 x float] }
8 define void @test1(ptr nocapture readonly %J, i32 %xmin, i32 %ymin) {
11 ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> poison, i32 [[XMIN:%.*]], i32 0
12 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> [[TMP0]], i32 [[YMIN:%.*]], i32 1
13 ; CHECK-NEXT: br label [[FOR_BODY3_LR_PH:%.*]]
14 ; CHECK: for.body3.lr.ph:
15 ; CHECK-NEXT: [[TMP2:%.*]] = sitofp <2 x i32> [[TMP1]] to <2 x float>
16 ; CHECK-NEXT: [[TMP4:%.*]] = load <2 x float>, ptr [[J:%.*]], align 4
17 ; CHECK-NEXT: [[TMP5:%.*]] = fsub fast <2 x float> [[TMP2]], [[TMP4]]
18 ; CHECK-NEXT: [[TMP6:%.*]] = fmul fast <2 x float> [[TMP5]], [[TMP5]]
19 ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x float> [[TMP6]], i32 0
20 ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x float> [[TMP6]], i32 1
21 ; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[TMP7]], [[TMP8]]
22 ; CHECK-NEXT: [[CMP:%.*]] = fcmp oeq float [[ADD]], 0.000000e+00
23 ; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY3_LR_PH]], label [[FOR_END27:%.*]]
25 ; CHECK-NEXT: ret void
29 br label %for.body3.lr.ph
32 %conv5 = sitofp i32 %ymin to float
33 %conv = sitofp i32 %xmin to float
34 %0 = load float, ptr %J, align 4
35 %sub = fsub fast float %conv, %0
36 %arrayidx9 = getelementptr inbounds %structA, ptr %J, i64 0, i32 0, i64 1
37 %1 = load float, ptr %arrayidx9, align 4
38 %sub10 = fsub fast float %conv5, %1
39 %mul11 = fmul fast float %sub, %sub
40 %mul12 = fmul fast float %sub10, %sub10
41 %add = fadd fast float %mul11, %mul12
42 %cmp = fcmp oeq float %add, 0.000000e+00
43 br i1 %cmp, label %for.body3.lr.ph, label %for.end27
49 define void @test2(ptr nocapture readonly %J, i32 %xmin, i32 %ymin) {
50 ; CHECK-LABEL: @test2(
52 ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> poison, i32 [[XMIN:%.*]], i32 0
53 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> [[TMP0]], i32 [[YMIN:%.*]], i32 1
54 ; CHECK-NEXT: br label [[FOR_BODY3_LR_PH:%.*]]
55 ; CHECK: for.body3.lr.ph:
56 ; CHECK-NEXT: [[TMP2:%.*]] = sitofp <2 x i32> [[TMP1]] to <2 x float>
57 ; CHECK-NEXT: [[TMP4:%.*]] = load <2 x float>, ptr [[J:%.*]], align 4
58 ; CHECK-NEXT: [[TMP5:%.*]] = fsub fast <2 x float> [[TMP2]], [[TMP4]]
59 ; CHECK-NEXT: [[TMP6:%.*]] = fmul fast <2 x float> [[TMP5]], [[TMP5]]
60 ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x float> [[TMP6]], i32 0
61 ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x float> [[TMP6]], i32 1
62 ; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[TMP8]], [[TMP7]]
63 ; CHECK-NEXT: [[CMP:%.*]] = fcmp oeq float [[ADD]], 0.000000e+00
64 ; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY3_LR_PH]], label [[FOR_END27:%.*]]
66 ; CHECK-NEXT: ret void
70 br label %for.body3.lr.ph
73 %conv5 = sitofp i32 %ymin to float
74 %conv = sitofp i32 %xmin to float
75 %0 = load float, ptr %J, align 4
76 %sub = fsub fast float %conv, %0
77 %arrayidx9 = getelementptr inbounds %structA, ptr %J, i64 0, i32 0, i64 1
78 %1 = load float, ptr %arrayidx9, align 4
79 %sub10 = fsub fast float %conv5, %1
80 %mul11 = fmul fast float %sub, %sub
81 %mul12 = fmul fast float %sub10, %sub10
82 %add = fadd fast float %mul12, %mul11 ;;;<---- Operands commuted!!
83 %cmp = fcmp oeq float %add, 0.000000e+00
84 br i1 %cmp, label %for.body3.lr.ph, label %for.end27