1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt -o - -S -passes=load-store-vectorizer,dce %s | FileCheck %s
4 ; Make sure LoadStoreVectorizer vectorizes the loads below.
5 ; In order to prove that the vectorization is safe, it tries to
6 ; match nested adds and find an expression that adds a constant
7 ; value to an existing index and the result doesn't overflow.
9 target triple = "x86_64--"
11 define void @ld_v4i8_add_nsw(i32 %v0, i32 %v1, ptr %src, ptr %dst) {
12 ; CHECK-LABEL: @ld_v4i8_add_nsw(
14 ; CHECK-NEXT: [[TMP:%.*]] = add nsw i32 [[V0:%.*]], -1
15 ; CHECK-NEXT: [[TMP1:%.*]] = add nsw i32 [[V1:%.*]], [[TMP]]
16 ; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[TMP1]] to i64
17 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[SRC:%.*]], i64 [[TMP2]]
18 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1
19 ; CHECK-NEXT: [[TMP41:%.*]] = extractelement <4 x i8> [[TMP1]], i32 0
20 ; CHECK-NEXT: [[TMP82:%.*]] = extractelement <4 x i8> [[TMP1]], i32 1
21 ; CHECK-NEXT: [[TMP133:%.*]] = extractelement <4 x i8> [[TMP1]], i32 2
22 ; CHECK-NEXT: [[TMP184:%.*]] = extractelement <4 x i8> [[TMP1]], i32 3
23 ; CHECK-NEXT: [[TMP19:%.*]] = insertelement <4 x i8> poison, i8 [[TMP41]], i32 0
24 ; CHECK-NEXT: [[TMP20:%.*]] = insertelement <4 x i8> [[TMP19]], i8 [[TMP82]], i32 1
25 ; CHECK-NEXT: [[TMP21:%.*]] = insertelement <4 x i8> [[TMP20]], i8 [[TMP133]], i32 2
26 ; CHECK-NEXT: [[TMP22:%.*]] = insertelement <4 x i8> [[TMP21]], i8 [[TMP184]], i32 3
27 ; CHECK-NEXT: store <4 x i8> [[TMP22]], ptr [[DST:%.*]], align 4
28 ; CHECK-NEXT: ret void
31 %tmp = add nsw i32 %v0, -1
32 %tmp1 = add nsw i32 %v1, %tmp
33 %tmp2 = sext i32 %tmp1 to i64
34 %tmp3 = getelementptr inbounds i8, ptr %src, i64 %tmp2
35 %tmp4 = load i8, ptr %tmp3, align 1
36 %tmp5 = add nsw i32 %v1, %v0
37 %tmp6 = sext i32 %tmp5 to i64
38 %tmp7 = getelementptr inbounds i8, ptr %src, i64 %tmp6
39 %tmp8 = load i8, ptr %tmp7, align 1
40 %tmp9 = add nsw i32 %v0, 1
41 %tmp10 = add nsw i32 %v1, %tmp9
42 %tmp11 = sext i32 %tmp10 to i64
43 %tmp12 = getelementptr inbounds i8, ptr %src, i64 %tmp11
44 %tmp13 = load i8, ptr %tmp12, align 1
45 %tmp14 = add nsw i32 %v0, 2
46 %tmp15 = add nsw i32 %v1, %tmp14
47 %tmp16 = sext i32 %tmp15 to i64
48 %tmp17 = getelementptr inbounds i8, ptr %src, i64 %tmp16
49 %tmp18 = load i8, ptr %tmp17, align 1
50 %tmp19 = insertelement <4 x i8> poison, i8 %tmp4, i32 0
51 %tmp20 = insertelement <4 x i8> %tmp19, i8 %tmp8, i32 1
52 %tmp21 = insertelement <4 x i8> %tmp20, i8 %tmp13, i32 2
53 %tmp22 = insertelement <4 x i8> %tmp21, i8 %tmp18, i32 3
54 store <4 x i8> %tmp22, ptr %dst
58 ; Make sure we don't vectorize the loads below because the source of
59 ; sext instructions doesn't have the nsw flag.
61 define void @ld_v4i8_add_not_safe(i32 %v0, i32 %v1, ptr %src, ptr %dst) {
62 ; CHECK-LABEL: @ld_v4i8_add_not_safe(
64 ; CHECK-NEXT: [[TMP:%.*]] = add nsw i32 [[V0:%.*]], -1
65 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[V1:%.*]], [[TMP]]
66 ; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[TMP1]] to i64
67 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[SRC:%.*]], i64 [[TMP2]]
68 ; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr [[TMP3]], align 1
69 ; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[V1]], [[V0]]
70 ; CHECK-NEXT: [[TMP6:%.*]] = sext i32 [[TMP5]] to i64
71 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[TMP6]]
72 ; CHECK-NEXT: [[TMP8:%.*]] = load i8, ptr [[TMP7]], align 1
73 ; CHECK-NEXT: [[TMP9:%.*]] = add nsw i32 [[V0]], 1
74 ; CHECK-NEXT: [[TMP10:%.*]] = add i32 [[V1]], [[TMP9]]
75 ; CHECK-NEXT: [[TMP11:%.*]] = sext i32 [[TMP10]] to i64
76 ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[TMP11]]
77 ; CHECK-NEXT: [[TMP13:%.*]] = load i8, ptr [[TMP12]], align 1
78 ; CHECK-NEXT: [[TMP14:%.*]] = add nsw i32 [[V0]], 2
79 ; CHECK-NEXT: [[TMP15:%.*]] = add i32 [[V1]], [[TMP14]]
80 ; CHECK-NEXT: [[TMP16:%.*]] = sext i32 [[TMP15]] to i64
81 ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[TMP16]]
82 ; CHECK-NEXT: [[TMP18:%.*]] = load i8, ptr [[TMP17]], align 1
83 ; CHECK-NEXT: [[TMP19:%.*]] = insertelement <4 x i8> poison, i8 [[TMP4]], i32 0
84 ; CHECK-NEXT: [[TMP20:%.*]] = insertelement <4 x i8> [[TMP19]], i8 [[TMP8]], i32 1
85 ; CHECK-NEXT: [[TMP21:%.*]] = insertelement <4 x i8> [[TMP20]], i8 [[TMP13]], i32 2
86 ; CHECK-NEXT: [[TMP22:%.*]] = insertelement <4 x i8> [[TMP21]], i8 [[TMP18]], i32 3
87 ; CHECK-NEXT: store <4 x i8> [[TMP22]], ptr [[DST:%.*]], align 4
88 ; CHECK-NEXT: ret void
91 %tmp = add nsw i32 %v0, -1
92 %tmp1 = add i32 %v1, %tmp
93 %tmp2 = sext i32 %tmp1 to i64
94 %tmp3 = getelementptr inbounds i8, ptr %src, i64 %tmp2
95 %tmp4 = load i8, ptr %tmp3, align 1
96 %tmp5 = add i32 %v1, %v0
97 %tmp6 = sext i32 %tmp5 to i64
98 %tmp7 = getelementptr inbounds i8, ptr %src, i64 %tmp6
99 %tmp8 = load i8, ptr %tmp7, align 1
100 %tmp9 = add nsw i32 %v0, 1
101 %tmp10 = add i32 %v1, %tmp9
102 %tmp11 = sext i32 %tmp10 to i64
103 %tmp12 = getelementptr inbounds i8, ptr %src, i64 %tmp11
104 %tmp13 = load i8, ptr %tmp12, align 1
105 %tmp14 = add nsw i32 %v0, 2
106 %tmp15 = add i32 %v1, %tmp14
107 %tmp16 = sext i32 %tmp15 to i64
108 %tmp17 = getelementptr inbounds i8, ptr %src, i64 %tmp16
109 %tmp18 = load i8, ptr %tmp17, align 1
110 %tmp19 = insertelement <4 x i8> poison, i8 %tmp4, i32 0
111 %tmp20 = insertelement <4 x i8> %tmp19, i8 %tmp8, i32 1
112 %tmp21 = insertelement <4 x i8> %tmp20, i8 %tmp13, i32 2
113 %tmp22 = insertelement <4 x i8> %tmp21, i8 %tmp18, i32 3
114 store <4 x i8> %tmp22, ptr %dst