1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt < %s -passes=slp-vectorizer -mtriple=riscv64 -mattr=+v \
3 ; RUN: -riscv-v-slp-max-vf=0 -S | FileCheck %s
5 ; This should not be vectorized, as the cost of computing the offsets nullifies
6 ; the benefits of vectorizing:
8 ; copy_with_offset_v2i8:
10 ; vsetivli zero, 2, e8, mf8, ta, ma
16 ; Compared to the scalar version where the offsets can be folded into the
19 ; copy_with_offset_v2i8:
26 define void @copy_with_offset_v2i8(ptr noalias %p, ptr noalias %q) {
27 ; CHECK-LABEL: @copy_with_offset_v2i8(
29 ; CHECK-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 8
30 ; CHECK-NEXT: [[X1:%.*]] = load i8, ptr [[P1]], align 1
31 ; CHECK-NEXT: [[Q1:%.*]] = getelementptr i8, ptr [[Q:%.*]], i32 16
32 ; CHECK-NEXT: store i8 [[X1]], ptr [[Q1]], align 1
33 ; CHECK-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 9
34 ; CHECK-NEXT: [[X2:%.*]] = load i8, ptr [[P2]], align 1
35 ; CHECK-NEXT: [[Q2:%.*]] = getelementptr i8, ptr [[Q]], i32 17
36 ; CHECK-NEXT: store i8 [[X2]], ptr [[Q2]], align 1
37 ; CHECK-NEXT: ret void
40 %p1 = getelementptr i8, ptr %p, i32 8
41 %x1 = load i8, ptr %p1
42 %q1 = getelementptr i8, ptr %q, i32 16
45 %p2 = getelementptr i8, ptr %p, i32 9
46 %x2 = load i8, ptr %p2
47 %q2 = getelementptr i8, ptr %q, i32 17
53 ; This on the other hand, should be vectorized as the vector savings outweigh
55 define void @copy_with_offset_v4i8(ptr noalias %p, ptr noalias %q) {
56 ; CHECK-LABEL: @copy_with_offset_v4i8(
58 ; CHECK-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 8
59 ; CHECK-NEXT: [[Q1:%.*]] = getelementptr i8, ptr [[Q:%.*]], i32 16
60 ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i8>, ptr [[P1]], align 1
61 ; CHECK-NEXT: store <4 x i8> [[TMP0]], ptr [[Q1]], align 1
62 ; CHECK-NEXT: ret void
65 %p1 = getelementptr i8, ptr %p, i32 8
66 %x1 = load i8, ptr %p1
67 %q1 = getelementptr i8, ptr %q, i32 16
70 %p2 = getelementptr i8, ptr %p, i32 9
71 %x2 = load i8, ptr %p2
72 %q2 = getelementptr i8, ptr %q, i32 17
75 %p3 = getelementptr i8, ptr %p, i32 10
76 %x3 = load i8, ptr %p3
77 %q3 = getelementptr i8, ptr %q, i32 18
80 %p4 = getelementptr i8, ptr %p, i32 11
81 %x4 = load i8, ptr %p4
82 %q4 = getelementptr i8, ptr %q, i32 19