1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt < %s -passes=slp-vectorizer -S | FileCheck %s
4 target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
5 target triple = "aarch64--linux-gnu"
7 ; These examples correspond to input code like:
9 ; void t(long * __restrict a, long * __restrict b) {
14 ; If we SLP vectorise this then we end up with something like this because we
15 ; don't have a mul.2d:
30 ; If we don't SLP vectorise but scalarize this we get this instead:
39 define void @mul(ptr noalias nocapture %a, ptr noalias nocapture readonly %b) {
42 ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[B:%.*]], align 8
43 ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[A:%.*]], align 8
44 ; CHECK-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP1]], [[TMP0]]
45 ; CHECK-NEXT: store i64 [[MUL]], ptr [[A]], align 8
46 ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 1
47 ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[ARRAYIDX2]], align 8
48 ; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 1
49 ; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[ARRAYIDX3]], align 8
50 ; CHECK-NEXT: [[MUL4:%.*]] = mul nsw i64 [[TMP3]], [[TMP2]]
51 ; CHECK-NEXT: store i64 [[MUL4]], ptr [[ARRAYIDX3]], align 8
52 ; CHECK-NEXT: ret void
55 %0 = load i64, ptr %b, align 8
56 %1 = load i64, ptr %a, align 8
57 %mul = mul nsw i64 %1, %0
58 store i64 %mul, ptr %a, align 8
59 %arrayidx2 = getelementptr inbounds i64, ptr %b, i64 1
60 %2 = load i64, ptr %arrayidx2, align 8
61 %arrayidx3 = getelementptr inbounds i64, ptr %a, i64 1
62 %3 = load i64, ptr %arrayidx3, align 8
63 %mul4 = mul nsw i64 %3, %2
64 store i64 %mul4, ptr %arrayidx3, align 8
68 ; Similar example, but now a multiply-accumulate:
70 ; void x (long * __restrict a, long * __restrict b) {
77 define void @mac(ptr noalias nocapture %a, ptr noalias nocapture readonly %b) {
80 ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[B:%.*]], align 8
81 ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[A:%.*]], align 8
82 ; CHECK-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP1]], [[TMP0]]
83 ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 1
84 ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[ARRAYIDX2]], align 8
85 ; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 1
86 ; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[ARRAYIDX3]], align 8
87 ; CHECK-NEXT: [[MUL4:%.*]] = mul nsw i64 [[TMP3]], [[TMP2]]
88 ; CHECK-NEXT: [[ADD:%.*]] = add nsw i64 [[MUL]], [[TMP0]]
89 ; CHECK-NEXT: store i64 [[ADD]], ptr [[A]], align 8
90 ; CHECK-NEXT: [[ADD9:%.*]] = add nsw i64 [[MUL4]], [[TMP2]]
91 ; CHECK-NEXT: store i64 [[ADD9]], ptr [[ARRAYIDX3]], align 8
92 ; CHECK-NEXT: ret void
95 %0 = load i64, ptr %b, align 8
96 %1 = load i64, ptr %a, align 8
97 %mul = mul nsw i64 %1, %0
98 %arrayidx2 = getelementptr inbounds i64, ptr %b, i64 1
99 %2 = load i64, ptr %arrayidx2, align 8
100 %arrayidx3 = getelementptr inbounds i64, ptr %a, i64 1
101 %3 = load i64, ptr %arrayidx3, align 8
102 %mul4 = mul nsw i64 %3, %2
103 %add = add nsw i64 %mul, %0
104 store i64 %add, ptr %a, align 8
105 %add9 = add nsw i64 %mul4, %2
106 store i64 %add9, ptr %arrayidx3, align 8