1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
2 ; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -mcpu=alderlake < %s| FileCheck %s
5 ; CHECK-LABEL: define void @test(
6 ; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
8 ; CHECK-NEXT: [[ARRAYIDX22:%.*]] = getelementptr i32, ptr null, i64 60
9 ; CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> getelementptr (i32, <4 x ptr> zeroinitializer, <4 x i64> <i64 1, i64 33, i64 7, i64 0>), i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> poison)
10 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[ARRAYIDX22]], align 4
11 ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
12 ; CHECK-NEXT: [[TMP3:%.*]] = mul <4 x i32> [[TMP2]], [[TMP0]]
13 ; CHECK-NEXT: [[TMP4:%.*]] = ashr <4 x i32> [[TMP3]], zeroinitializer
14 ; CHECK-NEXT: store <4 x i32> [[TMP4]], ptr getelementptr inbounds ([4 x i32], ptr null, i64 8, i64 0), align 16
15 ; CHECK-NEXT: ret void
18 %arrayidx1 = getelementptr i32, ptr null, i64 1
19 %0 = load i32, ptr %arrayidx1, align 4
20 %arrayidx2 = getelementptr i32, ptr null, i64 63
21 %1 = load i32, ptr %arrayidx2, align 4
23 %conv = sext i32 %mul to i64
24 %shr = ashr i64 %conv, 0
25 %conv3 = trunc i64 %shr to i32
26 store i32 %conv3, ptr getelementptr inbounds ([4 x i32], ptr null, i64 8, i64 0), align 16
27 %arrayidx5 = getelementptr i32, ptr null, i64 33
28 %2 = load i32, ptr %arrayidx5, align 4
29 %arrayidx6 = getelementptr i32, ptr null, i64 62
30 %3 = load i32, ptr %arrayidx6, align 4
31 %mul7 = mul i32 %3, %2
32 %conv8 = sext i32 %mul7 to i64
33 %shr10 = ashr i64 %conv8, 0
34 %conv11 = trunc i64 %shr10 to i32
35 store i32 %conv11, ptr getelementptr inbounds ([4 x i32], ptr null, i64 8, i64 1), align 4
36 %arrayidx13 = getelementptr i32, ptr null, i64 7
37 %4 = load i32, ptr %arrayidx13, align 4
38 %arrayidx14 = getelementptr i32, ptr null, i64 61
39 %5 = load i32, ptr %arrayidx14, align 4
40 %mul15 = mul i32 %5, %4
41 %conv16 = sext i32 %mul15 to i64
42 %shr18 = ashr i64 %conv16, 0
43 %conv19 = trunc i64 %shr18 to i32
44 store i32 %conv19, ptr getelementptr inbounds ([4 x i32], ptr null, i64 8, i64 2), align 8
45 %6 = load i32, ptr null, align 4
46 %arrayidx22 = getelementptr i32, ptr null, i64 60
47 %7 = load i32, ptr %arrayidx22, align 4
48 %mul23 = mul i32 %7, %6
49 %conv24 = sext i32 %mul23 to i64
50 %shr26 = ashr i64 %conv24, 0
51 %conv27 = trunc i64 %shr26 to i32
52 store i32 %conv27, ptr getelementptr inbounds ([4 x i32], ptr null, i64 8, i64 3), align 4
56 define void @test1() {
57 ; CHECK-LABEL: define void @test1(
58 ; CHECK-SAME: ) #[[ATTR0]] {
60 ; CHECK-NEXT: [[ARRAYIDX22:%.*]] = getelementptr i32, ptr null, i64 60
61 ; CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> getelementptr (i32, <4 x ptr> zeroinitializer, <4 x i64> <i64 1, i64 33, i64 7, i64 0>), i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> poison)
62 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[ARRAYIDX22]], align 4
63 ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
64 ; CHECK-NEXT: [[TMP3:%.*]] = mul <4 x i32> [[TMP2]], [[TMP0]]
65 ; CHECK-NEXT: [[TMP4:%.*]] = sext <4 x i32> [[TMP3]] to <4 x i64>
66 ; CHECK-NEXT: [[TMP5:%.*]] = lshr <4 x i64> [[TMP4]], zeroinitializer
67 ; CHECK-NEXT: [[TMP6:%.*]] = trunc <4 x i64> [[TMP5]] to <4 x i32>
68 ; CHECK-NEXT: store <4 x i32> [[TMP6]], ptr getelementptr inbounds ([4 x i32], ptr null, i64 8, i64 0), align 16
69 ; CHECK-NEXT: ret void
72 %arrayidx1 = getelementptr i32, ptr null, i64 1
73 %0 = load i32, ptr %arrayidx1, align 4
74 %arrayidx2 = getelementptr i32, ptr null, i64 63
75 %1 = load i32, ptr %arrayidx2, align 4
77 %conv = sext i32 %mul to i64
78 %shr = lshr i64 %conv, 0
79 %conv3 = trunc i64 %shr to i32
80 store i32 %conv3, ptr getelementptr inbounds ([4 x i32], ptr null, i64 8, i64 0), align 16
81 %arrayidx5 = getelementptr i32, ptr null, i64 33
82 %2 = load i32, ptr %arrayidx5, align 4
83 %arrayidx6 = getelementptr i32, ptr null, i64 62
84 %3 = load i32, ptr %arrayidx6, align 4
85 %mul7 = mul i32 %3, %2
86 %conv8 = sext i32 %mul7 to i64
87 %shr10 = lshr i64 %conv8, 0
88 %conv11 = trunc i64 %shr10 to i32
89 store i32 %conv11, ptr getelementptr inbounds ([4 x i32], ptr null, i64 8, i64 1), align 4
90 %arrayidx13 = getelementptr i32, ptr null, i64 7
91 %4 = load i32, ptr %arrayidx13, align 4
92 %arrayidx14 = getelementptr i32, ptr null, i64 61
93 %5 = load i32, ptr %arrayidx14, align 4
94 %mul15 = mul i32 %5, %4
95 %conv16 = sext i32 %mul15 to i64
96 %shr18 = lshr i64 %conv16, 0
97 %conv19 = trunc i64 %shr18 to i32
98 store i32 %conv19, ptr getelementptr inbounds ([4 x i32], ptr null, i64 8, i64 2), align 8
99 %6 = load i32, ptr null, align 4
100 %arrayidx22 = getelementptr i32, ptr null, i64 60
101 %7 = load i32, ptr %arrayidx22, align 4
102 %mul23 = mul i32 %7, %6
103 %conv24 = sext i32 %mul23 to i64
104 %shr26 = lshr i64 %conv24, 0
105 %conv27 = trunc i64 %shr26 to i32
106 store i32 %conv27, ptr getelementptr inbounds ([4 x i32], ptr null, i64 8, i64 3), align 4
110 define void @test_div() {
111 ; CHECK-LABEL: define void @test_div(
112 ; CHECK-SAME: ) #[[ATTR0]] {
114 ; CHECK-NEXT: [[ARRAYIDX22:%.*]] = getelementptr i32, ptr null, i64 60
115 ; CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> getelementptr (i32, <4 x ptr> zeroinitializer, <4 x i64> <i64 1, i64 33, i64 7, i64 0>), i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> poison)
116 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[ARRAYIDX22]], align 4
117 ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
118 ; CHECK-NEXT: [[TMP3:%.*]] = mul <4 x i32> [[TMP2]], [[TMP0]]
119 ; CHECK-NEXT: [[TMP6:%.*]] = udiv <4 x i32> [[TMP3]], <i32 1, i32 2, i32 1, i32 2>
120 ; CHECK-NEXT: store <4 x i32> [[TMP6]], ptr getelementptr inbounds ([4 x i32], ptr null, i64 8, i64 0), align 16
121 ; CHECK-NEXT: ret void
124 %arrayidx1 = getelementptr i32, ptr null, i64 1
125 %0 = load i32, ptr %arrayidx1, align 4
126 %arrayidx2 = getelementptr i32, ptr null, i64 63
127 %1 = load i32, ptr %arrayidx2, align 4
128 %mul = mul i32 %1, %0
129 %conv = zext i32 %mul to i64
130 %shr = udiv i64 %conv, 1
131 %conv3 = trunc i64 %shr to i32
132 store i32 %conv3, ptr getelementptr inbounds ([4 x i32], ptr null, i64 8, i64 0), align 16
133 %arrayidx5 = getelementptr i32, ptr null, i64 33
134 %2 = load i32, ptr %arrayidx5, align 4
135 %arrayidx6 = getelementptr i32, ptr null, i64 62
136 %3 = load i32, ptr %arrayidx6, align 4
137 %mul7 = mul i32 %3, %2
138 %conv8 = zext i32 %mul7 to i64
139 %shr10 = udiv i64 %conv8, 2
140 %conv11 = trunc i64 %shr10 to i32
141 store i32 %conv11, ptr getelementptr inbounds ([4 x i32], ptr null, i64 8, i64 1), align 4
142 %arrayidx13 = getelementptr i32, ptr null, i64 7
143 %4 = load i32, ptr %arrayidx13, align 4
144 %arrayidx14 = getelementptr i32, ptr null, i64 61
145 %5 = load i32, ptr %arrayidx14, align 4
146 %mul15 = mul i32 %5, %4
147 %conv16 = zext i32 %mul15 to i64
148 %shr18 = udiv i64 %conv16, 1
149 %conv19 = trunc i64 %shr18 to i32
150 store i32 %conv19, ptr getelementptr inbounds ([4 x i32], ptr null, i64 8, i64 2), align 8
151 %6 = load i32, ptr null, align 4
152 %arrayidx22 = getelementptr i32, ptr null, i64 60
153 %7 = load i32, ptr %arrayidx22, align 4
154 %mul23 = mul i32 %7, %6
155 %conv24 = zext i32 %mul23 to i64
156 %shr26 = udiv i64 %conv24, 2
157 %conv27 = trunc i64 %shr26 to i32
158 store i32 %conv27, ptr getelementptr inbounds ([4 x i32], ptr null, i64 8, i64 3), align 4
162 define void @test_rem() {
163 ; CHECK-LABEL: define void @test_rem(
164 ; CHECK-SAME: ) #[[ATTR0]] {
166 ; CHECK-NEXT: [[ARRAYIDX22:%.*]] = getelementptr i32, ptr null, i64 60
167 ; CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> getelementptr (i32, <4 x ptr> zeroinitializer, <4 x i64> <i64 1, i64 33, i64 7, i64 0>), i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> poison)
168 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[ARRAYIDX22]], align 4
169 ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
170 ; CHECK-NEXT: [[TMP3:%.*]] = mul <4 x i32> [[TMP2]], [[TMP0]]
171 ; CHECK-NEXT: [[TMP6:%.*]] = urem <4 x i32> [[TMP3]], <i32 1, i32 2, i32 1, i32 1>
172 ; CHECK-NEXT: store <4 x i32> [[TMP6]], ptr getelementptr inbounds ([4 x i32], ptr null, i64 8, i64 0), align 16
173 ; CHECK-NEXT: ret void
176 %arrayidx1 = getelementptr i32, ptr null, i64 1
177 %0 = load i32, ptr %arrayidx1, align 4
178 %arrayidx2 = getelementptr i32, ptr null, i64 63
179 %1 = load i32, ptr %arrayidx2, align 4
180 %mul = mul i32 %1, %0
181 %conv = zext i32 %mul to i64
182 %shr = urem i64 %conv, 1
183 %conv3 = trunc i64 %shr to i32
184 store i32 %conv3, ptr getelementptr inbounds ([4 x i32], ptr null, i64 8, i64 0), align 16
185 %arrayidx5 = getelementptr i32, ptr null, i64 33
186 %2 = load i32, ptr %arrayidx5, align 4
187 %arrayidx6 = getelementptr i32, ptr null, i64 62
188 %3 = load i32, ptr %arrayidx6, align 4
189 %mul7 = mul i32 %3, %2
190 %conv8 = zext i32 %mul7 to i64
191 %shr10 = urem i64 %conv8, 2
192 %conv11 = trunc i64 %shr10 to i32
193 store i32 %conv11, ptr getelementptr inbounds ([4 x i32], ptr null, i64 8, i64 1), align 4
194 %arrayidx13 = getelementptr i32, ptr null, i64 7
195 %4 = load i32, ptr %arrayidx13, align 4
196 %arrayidx14 = getelementptr i32, ptr null, i64 61
197 %5 = load i32, ptr %arrayidx14, align 4
198 %mul15 = mul i32 %5, %4
199 %conv16 = zext i32 %mul15 to i64
200 %shr18 = urem i64 %conv16, 1
201 %conv19 = trunc i64 %shr18 to i32
202 store i32 %conv19, ptr getelementptr inbounds ([4 x i32], ptr null, i64 8, i64 2), align 8
203 %6 = load i32, ptr null, align 4
204 %arrayidx22 = getelementptr i32, ptr null, i64 60
205 %7 = load i32, ptr %arrayidx22, align 4
206 %mul23 = mul i32 %7, %6
207 %conv24 = zext i32 %mul23 to i64
208 %shr26 = urem i64 %conv24, 1
209 %conv27 = trunc i64 %shr26 to i32
210 store i32 %conv27, ptr getelementptr inbounds ([4 x i32], ptr null, i64 8, i64 3), align 4