1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt %s -passes='function(scalarizer,dce)' -scalarize-load-store -S | FileCheck %s
3 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
5 declare <4 x float> @ext(<4 x float>)
6 @g = global <4 x float> zeroinitializer
8 define void @f1(<4 x float> %init, ptr %base, i32 %count) {
11 ; CHECK-NEXT: [[INIT_I0:%.*]] = extractelement <4 x float> [[INIT:%.*]], i64 0
12 ; CHECK-NEXT: [[INIT_I1:%.*]] = extractelement <4 x float> [[INIT]], i64 1
13 ; CHECK-NEXT: [[INIT_I2:%.*]] = extractelement <4 x float> [[INIT]], i64 2
14 ; CHECK-NEXT: [[INIT_I3:%.*]] = extractelement <4 x float> [[INIT]], i64 3
15 ; CHECK-NEXT: br label [[LOOP:%.*]]
17 ; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[COUNT:%.*]], [[ENTRY:%.*]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
18 ; CHECK-NEXT: [[ACC_I0:%.*]] = phi float [ [[INIT_I0]], [[ENTRY]] ], [ [[SEL_I0:%.*]], [[LOOP]] ]
19 ; CHECK-NEXT: [[ACC_I1:%.*]] = phi float [ [[INIT_I1]], [[ENTRY]] ], [ [[SEL_I1:%.*]], [[LOOP]] ]
20 ; CHECK-NEXT: [[ACC_I2:%.*]] = phi float [ [[INIT_I2]], [[ENTRY]] ], [ [[SEL_I2:%.*]], [[LOOP]] ]
21 ; CHECK-NEXT: [[ACC_I3:%.*]] = phi float [ [[INIT_I3]], [[ENTRY]] ], [ [[SEL_I3:%.*]], [[LOOP]] ]
22 ; CHECK-NEXT: [[NEXTI]] = sub i32 [[I]], 1
23 ; CHECK-NEXT: [[PTR:%.*]] = getelementptr <4 x float>, ptr [[BASE:%.*]], i32 [[I]]
24 ; CHECK-NEXT: [[VAL_I0:%.*]] = load float, ptr [[PTR]], align 16
25 ; CHECK-NEXT: [[PTR_I1:%.*]] = getelementptr float, ptr [[PTR]], i32 1
26 ; CHECK-NEXT: [[VAL_I1:%.*]] = load float, ptr [[PTR_I1]], align 4
27 ; CHECK-NEXT: [[PTR_I2:%.*]] = getelementptr float, ptr [[PTR]], i32 2
28 ; CHECK-NEXT: [[VAL_I2:%.*]] = load float, ptr [[PTR_I2]], align 8
29 ; CHECK-NEXT: [[PTR_I3:%.*]] = getelementptr float, ptr [[PTR]], i32 3
30 ; CHECK-NEXT: [[VAL_I3:%.*]] = load float, ptr [[PTR_I3]], align 4
31 ; CHECK-NEXT: [[ADD_I0:%.*]] = fadd float [[VAL_I0]], [[VAL_I2]]
32 ; CHECK-NEXT: [[ADD_I1:%.*]] = fadd float [[VAL_I1]], [[VAL_I3]]
33 ; CHECK-NEXT: [[ADD_I2:%.*]] = fadd float [[ACC_I0]], [[ACC_I2]]
34 ; CHECK-NEXT: [[ADD_I3:%.*]] = fadd float [[ACC_I1]], [[ACC_I3]]
35 ; CHECK-NEXT: [[ADD_UPTO0:%.*]] = insertelement <4 x float> poison, float [[ADD_I0]], i64 0
36 ; CHECK-NEXT: [[ADD_UPTO1:%.*]] = insertelement <4 x float> [[ADD_UPTO0]], float [[ADD_I1]], i64 1
37 ; CHECK-NEXT: [[ADD_UPTO2:%.*]] = insertelement <4 x float> [[ADD_UPTO1]], float [[ADD_I2]], i64 2
38 ; CHECK-NEXT: [[ADD:%.*]] = insertelement <4 x float> [[ADD_UPTO2]], float [[ADD_I3]], i64 3
39 ; CHECK-NEXT: [[CALL:%.*]] = call <4 x float> @ext(<4 x float> [[ADD]])
40 ; CHECK-NEXT: [[CALL_I0:%.*]] = extractelement <4 x float> [[CALL]], i64 0
41 ; CHECK-NEXT: [[CMP_I0:%.*]] = fcmp ogt float [[CALL_I0]], 1.000000e+00
42 ; CHECK-NEXT: [[CALL_I1:%.*]] = extractelement <4 x float> [[CALL]], i64 1
43 ; CHECK-NEXT: [[CMP_I1:%.*]] = fcmp ogt float [[CALL_I1]], 2.000000e+00
44 ; CHECK-NEXT: [[CALL_I2:%.*]] = extractelement <4 x float> [[CALL]], i64 2
45 ; CHECK-NEXT: [[CMP_I2:%.*]] = fcmp ogt float [[CALL_I2]], 3.000000e+00
46 ; CHECK-NEXT: [[CALL_I3:%.*]] = extractelement <4 x float> [[CALL]], i64 3
47 ; CHECK-NEXT: [[CMP_I3:%.*]] = fcmp ogt float [[CALL_I3]], 4.000000e+00
48 ; CHECK-NEXT: [[SEL_I0]] = select i1 [[CMP_I0]], float [[CALL_I0]], float 5.000000e+00
49 ; CHECK-NEXT: [[SEL_I1]] = select i1 [[CMP_I1]], float [[CALL_I1]], float 6.000000e+00
50 ; CHECK-NEXT: [[SEL_I2]] = select i1 [[CMP_I2]], float [[CALL_I2]], float 7.000000e+00
51 ; CHECK-NEXT: [[SEL_I3]] = select i1 [[CMP_I3]], float [[CALL_I3]], float 8.000000e+00
52 ; CHECK-NEXT: store float [[SEL_I0]], ptr [[PTR]], align 16
53 ; CHECK-NEXT: store float [[SEL_I1]], ptr [[PTR_I1]], align 4
54 ; CHECK-NEXT: store float [[SEL_I2]], ptr [[PTR_I2]], align 8
55 ; CHECK-NEXT: store float [[SEL_I3]], ptr [[PTR_I3]], align 4
56 ; CHECK-NEXT: [[TEST:%.*]] = icmp eq i32 [[NEXTI]], 0
57 ; CHECK-NEXT: br i1 [[TEST]], label [[LOOP]], label [[EXIT:%.*]]
59 ; CHECK-NEXT: ret void
65 %i = phi i32 [ %count, %entry ], [ %nexti, %loop ]
66 %acc = phi <4 x float> [ %init, %entry ], [ %sel, %loop ]
67 %nexti = sub i32 %i, 1
69 %ptr = getelementptr <4 x float>, ptr %base, i32 %i
70 %val = load <4 x float> , ptr %ptr
71 %dval = bitcast <4 x float> %val to <2 x double>
72 %dacc = bitcast <4 x float> %acc to <2 x double>
73 %shuffle1 = shufflevector <2 x double> %dval, <2 x double> %dacc,
74 <2 x i32> <i32 0, i32 2>
75 %shuffle2 = shufflevector <2 x double> %dval, <2 x double> %dacc,
76 <2 x i32> <i32 1, i32 3>
77 %f1 = bitcast <2 x double> %shuffle1 to <4 x float>
78 %f2 = bitcast <2 x double> %shuffle2 to <4 x float>
79 %add = fadd <4 x float> %f1, %f2
80 %call = call <4 x float> @ext(<4 x float> %add)
81 %cmp = fcmp ogt <4 x float> %call,
82 <float 1.0, float 2.0, float 3.0, float 4.0>
83 %sel = select <4 x i1> %cmp, <4 x float> %call,
84 <4 x float> <float 5.0, float 6.0, float 7.0, float 8.0>
85 store <4 x float> %sel, ptr %ptr
87 %test = icmp eq i32 %nexti, 0
88 br i1 %test, label %loop, label %exit
94 define void @f2(<4 x i32> %init, ptr %base, i32 %count) {
97 ; CHECK-NEXT: [[INIT_I0:%.*]] = extractelement <4 x i32> [[INIT:%.*]], i64 0
98 ; CHECK-NEXT: [[INIT_I1:%.*]] = extractelement <4 x i32> [[INIT]], i64 1
99 ; CHECK-NEXT: [[INIT_I2:%.*]] = extractelement <4 x i32> [[INIT]], i64 2
100 ; CHECK-NEXT: [[INIT_I3:%.*]] = extractelement <4 x i32> [[INIT]], i64 3
101 ; CHECK-NEXT: br label [[LOOP:%.*]]
103 ; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[COUNT:%.*]], [[ENTRY:%.*]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
104 ; CHECK-NEXT: [[ACC_I0:%.*]] = phi i32 [ [[INIT_I0]], [[ENTRY]] ], [ [[SEL_I0:%.*]], [[LOOP]] ]
105 ; CHECK-NEXT: [[ACC_I1:%.*]] = phi i32 [ [[INIT_I1]], [[ENTRY]] ], [ [[SEL_I1:%.*]], [[LOOP]] ]
106 ; CHECK-NEXT: [[ACC_I2:%.*]] = phi i32 [ [[INIT_I2]], [[ENTRY]] ], [ [[SEL_I2:%.*]], [[LOOP]] ]
107 ; CHECK-NEXT: [[ACC_I3:%.*]] = phi i32 [ [[INIT_I3]], [[ENTRY]] ], [ [[SEL_I3:%.*]], [[LOOP]] ]
108 ; CHECK-NEXT: [[NEXTI]] = sub i32 [[I]], 1
109 ; CHECK-NEXT: [[PTR:%.*]] = getelementptr <4 x i8>, ptr [[BASE:%.*]], i32 [[I]]
110 ; CHECK-NEXT: [[VAL_I0:%.*]] = load i8, ptr [[PTR]], align 4
111 ; CHECK-NEXT: [[PTR_I1:%.*]] = getelementptr i8, ptr [[PTR]], i32 1
112 ; CHECK-NEXT: [[VAL_I1:%.*]] = load i8, ptr [[PTR_I1]], align 1
113 ; CHECK-NEXT: [[PTR_I2:%.*]] = getelementptr i8, ptr [[PTR]], i32 2
114 ; CHECK-NEXT: [[VAL_I2:%.*]] = load i8, ptr [[PTR_I2]], align 2
115 ; CHECK-NEXT: [[PTR_I3:%.*]] = getelementptr i8, ptr [[PTR]], i32 3
116 ; CHECK-NEXT: [[VAL_I3:%.*]] = load i8, ptr [[PTR_I3]], align 1
117 ; CHECK-NEXT: [[EXT_I0:%.*]] = sext i8 [[VAL_I0]] to i32
118 ; CHECK-NEXT: [[EXT_I1:%.*]] = sext i8 [[VAL_I1]] to i32
119 ; CHECK-NEXT: [[EXT_I2:%.*]] = sext i8 [[VAL_I2]] to i32
120 ; CHECK-NEXT: [[EXT_I3:%.*]] = sext i8 [[VAL_I3]] to i32
121 ; CHECK-NEXT: [[ADD_I0:%.*]] = add i32 [[EXT_I0]], [[ACC_I0]]
122 ; CHECK-NEXT: [[ADD_I1:%.*]] = add i32 [[EXT_I1]], [[ACC_I1]]
123 ; CHECK-NEXT: [[ADD_I2:%.*]] = add i32 [[EXT_I2]], [[ACC_I2]]
124 ; CHECK-NEXT: [[ADD_I3:%.*]] = add i32 [[EXT_I3]], [[ACC_I3]]
125 ; CHECK-NEXT: [[CMP_I0:%.*]] = icmp slt i32 [[ADD_I0]], -10
126 ; CHECK-NEXT: [[CMP_I1:%.*]] = icmp slt i32 [[ADD_I1]], -11
127 ; CHECK-NEXT: [[CMP_I2:%.*]] = icmp slt i32 [[ADD_I2]], -12
128 ; CHECK-NEXT: [[CMP_I3:%.*]] = icmp slt i32 [[ADD_I3]], -13
129 ; CHECK-NEXT: [[SEL_I0]] = select i1 [[CMP_I0]], i32 [[ADD_I0]], i32 [[I]]
130 ; CHECK-NEXT: [[SEL_I1]] = select i1 [[CMP_I1]], i32 [[ADD_I1]], i32 [[I]]
131 ; CHECK-NEXT: [[SEL_I2]] = select i1 [[CMP_I2]], i32 [[ADD_I2]], i32 [[I]]
132 ; CHECK-NEXT: [[SEL_I3]] = select i1 [[CMP_I3]], i32 [[ADD_I3]], i32 [[I]]
133 ; CHECK-NEXT: [[TRUNC_I0:%.*]] = trunc i32 [[SEL_I0]] to i8
134 ; CHECK-NEXT: [[TRUNC_I1:%.*]] = trunc i32 [[SEL_I1]] to i8
135 ; CHECK-NEXT: [[TRUNC_I2:%.*]] = trunc i32 [[SEL_I2]] to i8
136 ; CHECK-NEXT: [[TRUNC_I3:%.*]] = trunc i32 [[SEL_I3]] to i8
137 ; CHECK-NEXT: store i8 [[TRUNC_I0]], ptr [[PTR]], align 4
138 ; CHECK-NEXT: store i8 [[TRUNC_I1]], ptr [[PTR_I1]], align 1
139 ; CHECK-NEXT: store i8 [[TRUNC_I2]], ptr [[PTR_I2]], align 2
140 ; CHECK-NEXT: store i8 [[TRUNC_I3]], ptr [[PTR_I3]], align 1
141 ; CHECK-NEXT: [[TEST:%.*]] = icmp eq i32 [[NEXTI]], 0
142 ; CHECK-NEXT: br i1 [[TEST]], label [[LOOP]], label [[EXIT:%.*]]
144 ; CHECK-NEXT: ret void
150 %i = phi i32 [ %count, %entry ], [ %nexti, %loop ]
151 %acc = phi <4 x i32> [ %init, %entry ], [ %sel, %loop ]
152 %nexti = sub i32 %i, 1
154 %ptr = getelementptr <4 x i8>, ptr %base, i32 %i
155 %val = load <4 x i8> , ptr %ptr
156 %ext = sext <4 x i8> %val to <4 x i32>
157 %add = add <4 x i32> %ext, %acc
158 %cmp = icmp slt <4 x i32> %add, <i32 -10, i32 -11, i32 -12, i32 -13>
159 %single = insertelement <4 x i32> undef, i32 %i, i32 0
160 %limit = shufflevector <4 x i32> %single, <4 x i32> undef,
161 <4 x i32> zeroinitializer
162 %sel = select <4 x i1> %cmp, <4 x i32> %add, <4 x i32> %limit
163 %trunc = trunc <4 x i32> %sel to <4 x i8>
164 store <4 x i8> %trunc, ptr %ptr
166 %test = icmp eq i32 %nexti, 0
167 br i1 %test, label %loop, label %exit
173 ; Check that !tbaa information is preserved.
174 define void @f3(ptr %src, ptr %dst) {
176 ; CHECK-NEXT: [[DST_I1:%.*]] = getelementptr i32, ptr [[DST:%.*]], i32 1
177 ; CHECK-NEXT: [[DST_I2:%.*]] = getelementptr i32, ptr [[DST]], i32 2
178 ; CHECK-NEXT: [[DST_I3:%.*]] = getelementptr i32, ptr [[DST]], i32 3
179 ; CHECK-NEXT: [[VAL_I0:%.*]] = load i32, ptr [[SRC:%.*]], align 16, !tbaa [[TBAA0:![0-9]+]]
180 ; CHECK-NEXT: [[SRC_I1:%.*]] = getelementptr i32, ptr [[SRC]], i32 1
181 ; CHECK-NEXT: [[VAL_I1:%.*]] = load i32, ptr [[SRC_I1]], align 4, !tbaa [[TBAA0]]
182 ; CHECK-NEXT: [[SRC_I2:%.*]] = getelementptr i32, ptr [[SRC]], i32 2
183 ; CHECK-NEXT: [[VAL_I2:%.*]] = load i32, ptr [[SRC_I2]], align 8, !tbaa [[TBAA0]]
184 ; CHECK-NEXT: [[SRC_I3:%.*]] = getelementptr i32, ptr [[SRC]], i32 3
185 ; CHECK-NEXT: [[VAL_I3:%.*]] = load i32, ptr [[SRC_I3]], align 4, !tbaa [[TBAA0]]
186 ; CHECK-NEXT: [[ADD_I0:%.*]] = add i32 [[VAL_I0]], [[VAL_I0]]
187 ; CHECK-NEXT: [[ADD_I1:%.*]] = add i32 [[VAL_I1]], [[VAL_I1]]
188 ; CHECK-NEXT: [[ADD_I2:%.*]] = add i32 [[VAL_I2]], [[VAL_I2]]
189 ; CHECK-NEXT: [[ADD_I3:%.*]] = add i32 [[VAL_I3]], [[VAL_I3]]
190 ; CHECK-NEXT: store i32 [[ADD_I0]], ptr [[DST]], align 16, !tbaa [[TBAA3:![0-9]+]]
191 ; CHECK-NEXT: store i32 [[ADD_I1]], ptr [[DST_I1]], align 4, !tbaa [[TBAA3]]
192 ; CHECK-NEXT: store i32 [[ADD_I2]], ptr [[DST_I2]], align 8, !tbaa [[TBAA3]]
193 ; CHECK-NEXT: store i32 [[ADD_I3]], ptr [[DST_I3]], align 4, !tbaa [[TBAA3]]
194 ; CHECK-NEXT: ret void
196 %val = load <4 x i32> , ptr %src, !tbaa !1
197 %add = add <4 x i32> %val, %val
198 store <4 x i32> %add, ptr %dst, !tbaa !2
202 ; Check that !tbaa.struct information is preserved.
203 define void @f4(ptr %src, ptr %dst) {
205 ; CHECK-NEXT: [[DST_I1:%.*]] = getelementptr i32, ptr [[DST:%.*]], i32 1
206 ; CHECK-NEXT: [[DST_I2:%.*]] = getelementptr i32, ptr [[DST]], i32 2
207 ; CHECK-NEXT: [[DST_I3:%.*]] = getelementptr i32, ptr [[DST]], i32 3
208 ; CHECK-NEXT: [[VAL_I0:%.*]] = load i32, ptr [[SRC:%.*]], align 16, !tbaa.struct [[TBAA_STRUCT5:![0-9]+]]
209 ; CHECK-NEXT: [[SRC_I1:%.*]] = getelementptr i32, ptr [[SRC]], i32 1
210 ; CHECK-NEXT: [[VAL_I1:%.*]] = load i32, ptr [[SRC_I1]], align 4, !tbaa.struct [[TBAA_STRUCT5]]
211 ; CHECK-NEXT: [[SRC_I2:%.*]] = getelementptr i32, ptr [[SRC]], i32 2
212 ; CHECK-NEXT: [[VAL_I2:%.*]] = load i32, ptr [[SRC_I2]], align 8, !tbaa.struct [[TBAA_STRUCT5]]
213 ; CHECK-NEXT: [[SRC_I3:%.*]] = getelementptr i32, ptr [[SRC]], i32 3
214 ; CHECK-NEXT: [[VAL_I3:%.*]] = load i32, ptr [[SRC_I3]], align 4, !tbaa.struct [[TBAA_STRUCT5]]
215 ; CHECK-NEXT: [[ADD_I0:%.*]] = add i32 [[VAL_I0]], [[VAL_I0]]
216 ; CHECK-NEXT: [[ADD_I1:%.*]] = add i32 [[VAL_I1]], [[VAL_I1]]
217 ; CHECK-NEXT: [[ADD_I2:%.*]] = add i32 [[VAL_I2]], [[VAL_I2]]
218 ; CHECK-NEXT: [[ADD_I3:%.*]] = add i32 [[VAL_I3]], [[VAL_I3]]
219 ; CHECK-NEXT: store i32 [[ADD_I0]], ptr [[DST]], align 16, !tbaa.struct [[TBAA_STRUCT5]]
220 ; CHECK-NEXT: store i32 [[ADD_I1]], ptr [[DST_I1]], align 4, !tbaa.struct [[TBAA_STRUCT5]]
221 ; CHECK-NEXT: store i32 [[ADD_I2]], ptr [[DST_I2]], align 8, !tbaa.struct [[TBAA_STRUCT5]]
222 ; CHECK-NEXT: store i32 [[ADD_I3]], ptr [[DST_I3]], align 4, !tbaa.struct [[TBAA_STRUCT5]]
223 ; CHECK-NEXT: ret void
225 %val = load <4 x i32> , ptr %src, !tbaa.struct !5
226 %add = add <4 x i32> %val, %val
227 store <4 x i32> %add, ptr %dst, !tbaa.struct !5
231 ; Check that llvm.access.group information is preserved.
232 define void @f5(i32 %count, ptr %src, ptr %dst) {
235 ; CHECK-NEXT: br label [[LOOP:%.*]]
237 ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[NEXT_INDEX:%.*]], [[LOOP]] ]
238 ; CHECK-NEXT: [[THIS_SRC:%.*]] = getelementptr <4 x i32>, ptr [[SRC:%.*]], i32 [[INDEX]]
239 ; CHECK-NEXT: [[THIS_SRC_I1:%.*]] = getelementptr i32, ptr [[THIS_SRC]], i32 1
240 ; CHECK-NEXT: [[THIS_SRC_I2:%.*]] = getelementptr i32, ptr [[THIS_SRC]], i32 2
241 ; CHECK-NEXT: [[THIS_SRC_I3:%.*]] = getelementptr i32, ptr [[THIS_SRC]], i32 3
242 ; CHECK-NEXT: [[THIS_DST:%.*]] = getelementptr <4 x i32>, ptr [[DST:%.*]], i32 [[INDEX]]
243 ; CHECK-NEXT: [[THIS_DST_I1:%.*]] = getelementptr i32, ptr [[THIS_DST]], i32 1
244 ; CHECK-NEXT: [[THIS_DST_I2:%.*]] = getelementptr i32, ptr [[THIS_DST]], i32 2
245 ; CHECK-NEXT: [[THIS_DST_I3:%.*]] = getelementptr i32, ptr [[THIS_DST]], i32 3
246 ; CHECK-NEXT: [[VAL_I0:%.*]] = load i32, ptr [[THIS_SRC]], align 16, !llvm.access.group [[ACC_GRP6:![0-9]+]]
247 ; CHECK-NEXT: [[VAL_I1:%.*]] = load i32, ptr [[THIS_SRC_I1]], align 4, !llvm.access.group [[ACC_GRP6]]
248 ; CHECK-NEXT: [[VAL_I2:%.*]] = load i32, ptr [[THIS_SRC_I2]], align 8, !llvm.access.group [[ACC_GRP6]]
249 ; CHECK-NEXT: [[VAL_I3:%.*]] = load i32, ptr [[THIS_SRC_I3]], align 4, !llvm.access.group [[ACC_GRP6]]
250 ; CHECK-NEXT: [[ADD_I0:%.*]] = add i32 [[VAL_I0]], [[VAL_I0]]
251 ; CHECK-NEXT: [[ADD_I1:%.*]] = add i32 [[VAL_I1]], [[VAL_I1]]
252 ; CHECK-NEXT: [[ADD_I2:%.*]] = add i32 [[VAL_I2]], [[VAL_I2]]
253 ; CHECK-NEXT: [[ADD_I3:%.*]] = add i32 [[VAL_I3]], [[VAL_I3]]
254 ; CHECK-NEXT: store i32 [[ADD_I0]], ptr [[THIS_DST]], align 16, !llvm.access.group [[ACC_GRP6]]
255 ; CHECK-NEXT: store i32 [[ADD_I1]], ptr [[THIS_DST_I1]], align 4, !llvm.access.group [[ACC_GRP6]]
256 ; CHECK-NEXT: store i32 [[ADD_I2]], ptr [[THIS_DST_I2]], align 8, !llvm.access.group [[ACC_GRP6]]
257 ; CHECK-NEXT: store i32 [[ADD_I3]], ptr [[THIS_DST_I3]], align 4, !llvm.access.group [[ACC_GRP6]]
258 ; CHECK-NEXT: [[NEXT_INDEX]] = add i32 [[INDEX]], -1
259 ; CHECK-NEXT: [[CONTINUE:%.*]] = icmp ne i32 [[NEXT_INDEX]], [[COUNT:%.*]]
260 ; CHECK-NEXT: br i1 [[CONTINUE]], label [[LOOP]], label [[END:%.*]], !llvm.loop [[LOOP7:![0-9]+]]
262 ; CHECK-NEXT: ret void
268 %index = phi i32 [ 0, %entry ], [ %next_index, %loop ]
269 %this_src = getelementptr <4 x i32>, ptr %src, i32 %index
270 %this_dst = getelementptr <4 x i32>, ptr %dst, i32 %index
271 %val = load <4 x i32> , ptr %this_src, !llvm.access.group !13
272 %add = add <4 x i32> %val, %val
273 store <4 x i32> %add, ptr %this_dst, !llvm.access.group !13
274 %next_index = add i32 %index, -1
275 %continue = icmp ne i32 %next_index, %count
276 br i1 %continue, label %loop, label %end, !llvm.loop !3
282 ; Check that fpmath information is preserved.
283 define <4 x float> @f6(<4 x float> %x) {
285 ; CHECK-NEXT: [[X_I0:%.*]] = extractelement <4 x float> [[X:%.*]], i64 0
286 ; CHECK-NEXT: [[RES_I0:%.*]] = fadd float [[X_I0]], 1.000000e+00, !fpmath [[META9:![0-9]+]]
287 ; CHECK-NEXT: [[X_I1:%.*]] = extractelement <4 x float> [[X]], i64 1
288 ; CHECK-NEXT: [[RES_I1:%.*]] = fadd float [[X_I1]], 2.000000e+00, !fpmath [[META9]]
289 ; CHECK-NEXT: [[X_I2:%.*]] = extractelement <4 x float> [[X]], i64 2
290 ; CHECK-NEXT: [[RES_I2:%.*]] = fadd float [[X_I2]], 3.000000e+00, !fpmath [[META9]]
291 ; CHECK-NEXT: [[X_I3:%.*]] = extractelement <4 x float> [[X]], i64 3
292 ; CHECK-NEXT: [[RES_I3:%.*]] = fadd float [[X_I3]], 4.000000e+00, !fpmath [[META9]]
293 ; CHECK-NEXT: [[RES_UPTO0:%.*]] = insertelement <4 x float> poison, float [[RES_I0]], i64 0
294 ; CHECK-NEXT: [[RES_UPTO1:%.*]] = insertelement <4 x float> [[RES_UPTO0]], float [[RES_I1]], i64 1
295 ; CHECK-NEXT: [[RES_UPTO2:%.*]] = insertelement <4 x float> [[RES_UPTO1]], float [[RES_I2]], i64 2
296 ; CHECK-NEXT: [[RES:%.*]] = insertelement <4 x float> [[RES_UPTO2]], float [[RES_I3]], i64 3
297 ; CHECK-NEXT: ret <4 x float> [[RES]]
299 %res = fadd <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0>,
304 ; Check that random metadata isn't kept.
305 define void @f7(ptr %src, ptr %dst) {
307 ; CHECK-NEXT: [[DST_I1:%.*]] = getelementptr i32, ptr [[DST:%.*]], i32 1
308 ; CHECK-NEXT: [[DST_I2:%.*]] = getelementptr i32, ptr [[DST]], i32 2
309 ; CHECK-NEXT: [[DST_I3:%.*]] = getelementptr i32, ptr [[DST]], i32 3
310 ; CHECK-NEXT: [[VAL_I0:%.*]] = load i32, ptr [[SRC:%.*]], align 16
311 ; CHECK-NEXT: [[SRC_I1:%.*]] = getelementptr i32, ptr [[SRC]], i32 1
312 ; CHECK-NEXT: [[VAL_I1:%.*]] = load i32, ptr [[SRC_I1]], align 4
313 ; CHECK-NEXT: [[SRC_I2:%.*]] = getelementptr i32, ptr [[SRC]], i32 2
314 ; CHECK-NEXT: [[VAL_I2:%.*]] = load i32, ptr [[SRC_I2]], align 8
315 ; CHECK-NEXT: [[SRC_I3:%.*]] = getelementptr i32, ptr [[SRC]], i32 3
316 ; CHECK-NEXT: [[VAL_I3:%.*]] = load i32, ptr [[SRC_I3]], align 4
317 ; CHECK-NEXT: [[ADD_I0:%.*]] = add i32 [[VAL_I0]], [[VAL_I0]]
318 ; CHECK-NEXT: [[ADD_I1:%.*]] = add i32 [[VAL_I1]], [[VAL_I1]]
319 ; CHECK-NEXT: [[ADD_I2:%.*]] = add i32 [[VAL_I2]], [[VAL_I2]]
320 ; CHECK-NEXT: [[ADD_I3:%.*]] = add i32 [[VAL_I3]], [[VAL_I3]]
321 ; CHECK-NEXT: store i32 [[ADD_I0]], ptr [[DST]], align 16
322 ; CHECK-NEXT: store i32 [[ADD_I1]], ptr [[DST_I1]], align 4
323 ; CHECK-NEXT: store i32 [[ADD_I2]], ptr [[DST_I2]], align 8
324 ; CHECK-NEXT: store i32 [[ADD_I3]], ptr [[DST_I3]], align 4
325 ; CHECK-NEXT: ret void
327 %val = load <4 x i32> , ptr %src, !foo !5
328 %add = add <4 x i32> %val, %val
329 store <4 x i32> %add, ptr %dst, !foo !5
333 ; Test GEP with vectors.
334 define void @f8(ptr %dest, <4 x ptr> %ptr0, <4 x i32> %i0,
336 ; CHECK-NEXT: [[DEST_I1:%.*]] = getelementptr ptr, ptr [[DEST:%.*]], i32 1
337 ; CHECK-NEXT: [[DEST_I2:%.*]] = getelementptr ptr, ptr [[DEST]], i32 2
338 ; CHECK-NEXT: [[DEST_I3:%.*]] = getelementptr ptr, ptr [[DEST]], i32 3
339 ; CHECK-NEXT: [[PTR0_I0:%.*]] = extractelement <4 x ptr> [[PTR0:%.*]], i64 0
340 ; CHECK-NEXT: [[PTR0_I2:%.*]] = extractelement <4 x ptr> [[PTR0]], i64 2
341 ; CHECK-NEXT: [[PTR0_I3:%.*]] = extractelement <4 x ptr> [[PTR0]], i64 3
342 ; CHECK-NEXT: [[I0_I1:%.*]] = extractelement <4 x i32> [[I0:%.*]], i64 1
343 ; CHECK-NEXT: [[I0_I3:%.*]] = extractelement <4 x i32> [[I0]], i64 3
344 ; CHECK-NEXT: [[VAL_I0:%.*]] = getelementptr float, ptr [[PTR0_I0]], i32 100
345 ; CHECK-NEXT: [[VAL_I1:%.*]] = getelementptr float, ptr [[OTHER:%.*]], i32 [[I0_I1]]
346 ; CHECK-NEXT: [[VAL_I2:%.*]] = getelementptr float, ptr [[PTR0_I2]], i32 100
347 ; CHECK-NEXT: [[VAL_I3:%.*]] = getelementptr float, ptr [[PTR0_I3]], i32 [[I0_I3]]
348 ; CHECK-NEXT: store ptr [[VAL_I0]], ptr [[DEST]], align 32
349 ; CHECK-NEXT: store ptr [[VAL_I1]], ptr [[DEST_I1]], align 8
350 ; CHECK-NEXT: store ptr [[VAL_I2]], ptr [[DEST_I2]], align 16
351 ; CHECK-NEXT: store ptr [[VAL_I3]], ptr [[DEST_I3]], align 8
352 ; CHECK-NEXT: ret void
355 %i1 = insertelement <4 x i32> %i0, i32 100, i32 0
356 %i2 = insertelement <4 x i32> %i1, i32 100, i32 2
357 %ptr1 = insertelement <4 x ptr> %ptr0, ptr %other, i32 1
358 %val = getelementptr float, <4 x ptr> %ptr1, <4 x i32> %i2
359 store <4 x ptr> %val, ptr %dest
363 ; Test the handling of unaligned loads.
364 define void @f9(ptr %dest, ptr %src) {
366 ; CHECK-NEXT: [[DEST_I1:%.*]] = getelementptr float, ptr [[DEST:%.*]], i32 1
367 ; CHECK-NEXT: [[DEST_I2:%.*]] = getelementptr float, ptr [[DEST]], i32 2
368 ; CHECK-NEXT: [[DEST_I3:%.*]] = getelementptr float, ptr [[DEST]], i32 3
369 ; CHECK-NEXT: [[VAL_I0:%.*]] = load float, ptr [[SRC:%.*]], align 4
370 ; CHECK-NEXT: [[SRC_I1:%.*]] = getelementptr float, ptr [[SRC]], i32 1
371 ; CHECK-NEXT: [[VAL_I1:%.*]] = load float, ptr [[SRC_I1]], align 4
372 ; CHECK-NEXT: [[SRC_I2:%.*]] = getelementptr float, ptr [[SRC]], i32 2
373 ; CHECK-NEXT: [[VAL_I2:%.*]] = load float, ptr [[SRC_I2]], align 4
374 ; CHECK-NEXT: [[SRC_I3:%.*]] = getelementptr float, ptr [[SRC]], i32 3
375 ; CHECK-NEXT: [[VAL_I3:%.*]] = load float, ptr [[SRC_I3]], align 4
376 ; CHECK-NEXT: store float [[VAL_I0]], ptr [[DEST]], align 8
377 ; CHECK-NEXT: store float [[VAL_I1]], ptr [[DEST_I1]], align 4
378 ; CHECK-NEXT: store float [[VAL_I2]], ptr [[DEST_I2]], align 8
379 ; CHECK-NEXT: store float [[VAL_I3]], ptr [[DEST_I3]], align 4
380 ; CHECK-NEXT: ret void
382 %val = load <4 x float> , ptr %src, align 4
383 store <4 x float> %val, ptr %dest, align 8
387 ; ...and again with subelement alignment.
388 define void @f10(ptr %dest, ptr %src) {
390 ; CHECK-NEXT: [[DEST_I1:%.*]] = getelementptr float, ptr [[DEST:%.*]], i32 1
391 ; CHECK-NEXT: [[DEST_I2:%.*]] = getelementptr float, ptr [[DEST]], i32 2
392 ; CHECK-NEXT: [[DEST_I3:%.*]] = getelementptr float, ptr [[DEST]], i32 3
393 ; CHECK-NEXT: [[VAL_I0:%.*]] = load float, ptr [[SRC:%.*]], align 1
394 ; CHECK-NEXT: [[SRC_I1:%.*]] = getelementptr float, ptr [[SRC]], i32 1
395 ; CHECK-NEXT: [[VAL_I1:%.*]] = load float, ptr [[SRC_I1]], align 1
396 ; CHECK-NEXT: [[SRC_I2:%.*]] = getelementptr float, ptr [[SRC]], i32 2
397 ; CHECK-NEXT: [[VAL_I2:%.*]] = load float, ptr [[SRC_I2]], align 1
398 ; CHECK-NEXT: [[SRC_I3:%.*]] = getelementptr float, ptr [[SRC]], i32 3
399 ; CHECK-NEXT: [[VAL_I3:%.*]] = load float, ptr [[SRC_I3]], align 1
400 ; CHECK-NEXT: store float [[VAL_I0]], ptr [[DEST]], align 2
401 ; CHECK-NEXT: store float [[VAL_I1]], ptr [[DEST_I1]], align 2
402 ; CHECK-NEXT: store float [[VAL_I2]], ptr [[DEST_I2]], align 2
403 ; CHECK-NEXT: store float [[VAL_I3]], ptr [[DEST_I3]], align 2
404 ; CHECK-NEXT: ret void
406 %val = load <4 x float> , ptr %src, align 1
407 store <4 x float> %val, ptr %dest, align 2
411 ; Test that sub-byte loads aren't scalarized.
412 define void @f11(ptr %dest, ptr %src0) {
414 ; CHECK-NEXT: [[SRC1:%.*]] = getelementptr <32 x i1>, ptr [[SRC0:%.*]], i32 1
415 ; CHECK-NEXT: [[VAL0:%.*]] = load <32 x i1>, ptr [[SRC0]], align 4
416 ; CHECK-NEXT: [[VAL0_I0:%.*]] = extractelement <32 x i1> [[VAL0]], i64 0
417 ; CHECK-NEXT: [[VAL0_I1:%.*]] = extractelement <32 x i1> [[VAL0]], i64 1
418 ; CHECK-NEXT: [[VAL0_I2:%.*]] = extractelement <32 x i1> [[VAL0]], i64 2
419 ; CHECK-NEXT: [[VAL0_I3:%.*]] = extractelement <32 x i1> [[VAL0]], i64 3
420 ; CHECK-NEXT: [[VAL0_I4:%.*]] = extractelement <32 x i1> [[VAL0]], i64 4
421 ; CHECK-NEXT: [[VAL0_I5:%.*]] = extractelement <32 x i1> [[VAL0]], i64 5
422 ; CHECK-NEXT: [[VAL0_I6:%.*]] = extractelement <32 x i1> [[VAL0]], i64 6
423 ; CHECK-NEXT: [[VAL0_I7:%.*]] = extractelement <32 x i1> [[VAL0]], i64 7
424 ; CHECK-NEXT: [[VAL0_I8:%.*]] = extractelement <32 x i1> [[VAL0]], i64 8
425 ; CHECK-NEXT: [[VAL0_I9:%.*]] = extractelement <32 x i1> [[VAL0]], i64 9
426 ; CHECK-NEXT: [[VAL0_I10:%.*]] = extractelement <32 x i1> [[VAL0]], i64 10
427 ; CHECK-NEXT: [[VAL0_I11:%.*]] = extractelement <32 x i1> [[VAL0]], i64 11
428 ; CHECK-NEXT: [[VAL0_I12:%.*]] = extractelement <32 x i1> [[VAL0]], i64 12
429 ; CHECK-NEXT: [[VAL0_I13:%.*]] = extractelement <32 x i1> [[VAL0]], i64 13
430 ; CHECK-NEXT: [[VAL0_I14:%.*]] = extractelement <32 x i1> [[VAL0]], i64 14
431 ; CHECK-NEXT: [[VAL0_I15:%.*]] = extractelement <32 x i1> [[VAL0]], i64 15
432 ; CHECK-NEXT: [[VAL0_I16:%.*]] = extractelement <32 x i1> [[VAL0]], i64 16
433 ; CHECK-NEXT: [[VAL0_I17:%.*]] = extractelement <32 x i1> [[VAL0]], i64 17
434 ; CHECK-NEXT: [[VAL0_I18:%.*]] = extractelement <32 x i1> [[VAL0]], i64 18
435 ; CHECK-NEXT: [[VAL0_I19:%.*]] = extractelement <32 x i1> [[VAL0]], i64 19
436 ; CHECK-NEXT: [[VAL0_I20:%.*]] = extractelement <32 x i1> [[VAL0]], i64 20
437 ; CHECK-NEXT: [[VAL0_I21:%.*]] = extractelement <32 x i1> [[VAL0]], i64 21
438 ; CHECK-NEXT: [[VAL0_I22:%.*]] = extractelement <32 x i1> [[VAL0]], i64 22
439 ; CHECK-NEXT: [[VAL0_I23:%.*]] = extractelement <32 x i1> [[VAL0]], i64 23
440 ; CHECK-NEXT: [[VAL0_I24:%.*]] = extractelement <32 x i1> [[VAL0]], i64 24
441 ; CHECK-NEXT: [[VAL0_I25:%.*]] = extractelement <32 x i1> [[VAL0]], i64 25
442 ; CHECK-NEXT: [[VAL0_I26:%.*]] = extractelement <32 x i1> [[VAL0]], i64 26
443 ; CHECK-NEXT: [[VAL0_I27:%.*]] = extractelement <32 x i1> [[VAL0]], i64 27
444 ; CHECK-NEXT: [[VAL0_I28:%.*]] = extractelement <32 x i1> [[VAL0]], i64 28
445 ; CHECK-NEXT: [[VAL0_I29:%.*]] = extractelement <32 x i1> [[VAL0]], i64 29
446 ; CHECK-NEXT: [[VAL0_I30:%.*]] = extractelement <32 x i1> [[VAL0]], i64 30
447 ; CHECK-NEXT: [[VAL0_I31:%.*]] = extractelement <32 x i1> [[VAL0]], i64 31
448 ; CHECK-NEXT: [[VAL1:%.*]] = load <32 x i1>, ptr [[SRC1]], align 4
449 ; CHECK-NEXT: [[VAL1_I0:%.*]] = extractelement <32 x i1> [[VAL1]], i64 0
450 ; CHECK-NEXT: [[AND_I0:%.*]] = and i1 [[VAL0_I0]], [[VAL1_I0]]
451 ; CHECK-NEXT: [[VAL1_I1:%.*]] = extractelement <32 x i1> [[VAL1]], i64 1
452 ; CHECK-NEXT: [[AND_I1:%.*]] = and i1 [[VAL0_I1]], [[VAL1_I1]]
453 ; CHECK-NEXT: [[VAL1_I2:%.*]] = extractelement <32 x i1> [[VAL1]], i64 2
454 ; CHECK-NEXT: [[AND_I2:%.*]] = and i1 [[VAL0_I2]], [[VAL1_I2]]
455 ; CHECK-NEXT: [[VAL1_I3:%.*]] = extractelement <32 x i1> [[VAL1]], i64 3
456 ; CHECK-NEXT: [[AND_I3:%.*]] = and i1 [[VAL0_I3]], [[VAL1_I3]]
457 ; CHECK-NEXT: [[VAL1_I4:%.*]] = extractelement <32 x i1> [[VAL1]], i64 4
458 ; CHECK-NEXT: [[AND_I4:%.*]] = and i1 [[VAL0_I4]], [[VAL1_I4]]
459 ; CHECK-NEXT: [[VAL1_I5:%.*]] = extractelement <32 x i1> [[VAL1]], i64 5
460 ; CHECK-NEXT: [[AND_I5:%.*]] = and i1 [[VAL0_I5]], [[VAL1_I5]]
461 ; CHECK-NEXT: [[VAL1_I6:%.*]] = extractelement <32 x i1> [[VAL1]], i64 6
462 ; CHECK-NEXT: [[AND_I6:%.*]] = and i1 [[VAL0_I6]], [[VAL1_I6]]
463 ; CHECK-NEXT: [[VAL1_I7:%.*]] = extractelement <32 x i1> [[VAL1]], i64 7
464 ; CHECK-NEXT: [[AND_I7:%.*]] = and i1 [[VAL0_I7]], [[VAL1_I7]]
465 ; CHECK-NEXT: [[VAL1_I8:%.*]] = extractelement <32 x i1> [[VAL1]], i64 8
466 ; CHECK-NEXT: [[AND_I8:%.*]] = and i1 [[VAL0_I8]], [[VAL1_I8]]
467 ; CHECK-NEXT: [[VAL1_I9:%.*]] = extractelement <32 x i1> [[VAL1]], i64 9
468 ; CHECK-NEXT: [[AND_I9:%.*]] = and i1 [[VAL0_I9]], [[VAL1_I9]]
469 ; CHECK-NEXT: [[VAL1_I10:%.*]] = extractelement <32 x i1> [[VAL1]], i64 10
470 ; CHECK-NEXT: [[AND_I10:%.*]] = and i1 [[VAL0_I10]], [[VAL1_I10]]
471 ; CHECK-NEXT: [[VAL1_I11:%.*]] = extractelement <32 x i1> [[VAL1]], i64 11
472 ; CHECK-NEXT: [[AND_I11:%.*]] = and i1 [[VAL0_I11]], [[VAL1_I11]]
473 ; CHECK-NEXT: [[VAL1_I12:%.*]] = extractelement <32 x i1> [[VAL1]], i64 12
474 ; CHECK-NEXT: [[AND_I12:%.*]] = and i1 [[VAL0_I12]], [[VAL1_I12]]
475 ; CHECK-NEXT: [[VAL1_I13:%.*]] = extractelement <32 x i1> [[VAL1]], i64 13
476 ; CHECK-NEXT: [[AND_I13:%.*]] = and i1 [[VAL0_I13]], [[VAL1_I13]]
477 ; CHECK-NEXT: [[VAL1_I14:%.*]] = extractelement <32 x i1> [[VAL1]], i64 14
478 ; CHECK-NEXT: [[AND_I14:%.*]] = and i1 [[VAL0_I14]], [[VAL1_I14]]
479 ; CHECK-NEXT: [[VAL1_I15:%.*]] = extractelement <32 x i1> [[VAL1]], i64 15
480 ; CHECK-NEXT: [[AND_I15:%.*]] = and i1 [[VAL0_I15]], [[VAL1_I15]]
481 ; CHECK-NEXT: [[VAL1_I16:%.*]] = extractelement <32 x i1> [[VAL1]], i64 16
482 ; CHECK-NEXT: [[AND_I16:%.*]] = and i1 [[VAL0_I16]], [[VAL1_I16]]
483 ; CHECK-NEXT: [[VAL1_I17:%.*]] = extractelement <32 x i1> [[VAL1]], i64 17
484 ; CHECK-NEXT: [[AND_I17:%.*]] = and i1 [[VAL0_I17]], [[VAL1_I17]]
485 ; CHECK-NEXT: [[VAL1_I18:%.*]] = extractelement <32 x i1> [[VAL1]], i64 18
486 ; CHECK-NEXT: [[AND_I18:%.*]] = and i1 [[VAL0_I18]], [[VAL1_I18]]
487 ; CHECK-NEXT: [[VAL1_I19:%.*]] = extractelement <32 x i1> [[VAL1]], i64 19
488 ; CHECK-NEXT: [[AND_I19:%.*]] = and i1 [[VAL0_I19]], [[VAL1_I19]]
489 ; CHECK-NEXT: [[VAL1_I20:%.*]] = extractelement <32 x i1> [[VAL1]], i64 20
490 ; CHECK-NEXT: [[AND_I20:%.*]] = and i1 [[VAL0_I20]], [[VAL1_I20]]
491 ; CHECK-NEXT: [[VAL1_I21:%.*]] = extractelement <32 x i1> [[VAL1]], i64 21
492 ; CHECK-NEXT: [[AND_I21:%.*]] = and i1 [[VAL0_I21]], [[VAL1_I21]]
493 ; CHECK-NEXT: [[VAL1_I22:%.*]] = extractelement <32 x i1> [[VAL1]], i64 22
494 ; CHECK-NEXT: [[AND_I22:%.*]] = and i1 [[VAL0_I22]], [[VAL1_I22]]
495 ; CHECK-NEXT: [[VAL1_I23:%.*]] = extractelement <32 x i1> [[VAL1]], i64 23
496 ; CHECK-NEXT: [[AND_I23:%.*]] = and i1 [[VAL0_I23]], [[VAL1_I23]]
497 ; CHECK-NEXT: [[VAL1_I24:%.*]] = extractelement <32 x i1> [[VAL1]], i64 24
498 ; CHECK-NEXT: [[AND_I24:%.*]] = and i1 [[VAL0_I24]], [[VAL1_I24]]
499 ; CHECK-NEXT: [[VAL1_I25:%.*]] = extractelement <32 x i1> [[VAL1]], i64 25
500 ; CHECK-NEXT: [[AND_I25:%.*]] = and i1 [[VAL0_I25]], [[VAL1_I25]]
501 ; CHECK-NEXT: [[VAL1_I26:%.*]] = extractelement <32 x i1> [[VAL1]], i64 26
502 ; CHECK-NEXT: [[AND_I26:%.*]] = and i1 [[VAL0_I26]], [[VAL1_I26]]
503 ; CHECK-NEXT: [[VAL1_I27:%.*]] = extractelement <32 x i1> [[VAL1]], i64 27
504 ; CHECK-NEXT: [[AND_I27:%.*]] = and i1 [[VAL0_I27]], [[VAL1_I27]]
505 ; CHECK-NEXT: [[VAL1_I28:%.*]] = extractelement <32 x i1> [[VAL1]], i64 28
506 ; CHECK-NEXT: [[AND_I28:%.*]] = and i1 [[VAL0_I28]], [[VAL1_I28]]
507 ; CHECK-NEXT: [[VAL1_I29:%.*]] = extractelement <32 x i1> [[VAL1]], i64 29
508 ; CHECK-NEXT: [[AND_I29:%.*]] = and i1 [[VAL0_I29]], [[VAL1_I29]]
509 ; CHECK-NEXT: [[VAL1_I30:%.*]] = extractelement <32 x i1> [[VAL1]], i64 30
510 ; CHECK-NEXT: [[AND_I30:%.*]] = and i1 [[VAL0_I30]], [[VAL1_I30]]
511 ; CHECK-NEXT: [[VAL1_I31:%.*]] = extractelement <32 x i1> [[VAL1]], i64 31
512 ; CHECK-NEXT: [[AND_I31:%.*]] = and i1 [[VAL0_I31]], [[VAL1_I31]]
513 ; CHECK-NEXT: [[AND_UPTO0:%.*]] = insertelement <32 x i1> poison, i1 [[AND_I0]], i64 0
514 ; CHECK-NEXT: [[AND_UPTO1:%.*]] = insertelement <32 x i1> [[AND_UPTO0]], i1 [[AND_I1]], i64 1
515 ; CHECK-NEXT: [[AND_UPTO2:%.*]] = insertelement <32 x i1> [[AND_UPTO1]], i1 [[AND_I2]], i64 2
516 ; CHECK-NEXT: [[AND_UPTO3:%.*]] = insertelement <32 x i1> [[AND_UPTO2]], i1 [[AND_I3]], i64 3
517 ; CHECK-NEXT: [[AND_UPTO4:%.*]] = insertelement <32 x i1> [[AND_UPTO3]], i1 [[AND_I4]], i64 4
518 ; CHECK-NEXT: [[AND_UPTO5:%.*]] = insertelement <32 x i1> [[AND_UPTO4]], i1 [[AND_I5]], i64 5
519 ; CHECK-NEXT: [[AND_UPTO6:%.*]] = insertelement <32 x i1> [[AND_UPTO5]], i1 [[AND_I6]], i64 6
520 ; CHECK-NEXT: [[AND_UPTO7:%.*]] = insertelement <32 x i1> [[AND_UPTO6]], i1 [[AND_I7]], i64 7
521 ; CHECK-NEXT: [[AND_UPTO8:%.*]] = insertelement <32 x i1> [[AND_UPTO7]], i1 [[AND_I8]], i64 8
522 ; CHECK-NEXT: [[AND_UPTO9:%.*]] = insertelement <32 x i1> [[AND_UPTO8]], i1 [[AND_I9]], i64 9
523 ; CHECK-NEXT: [[AND_UPTO10:%.*]] = insertelement <32 x i1> [[AND_UPTO9]], i1 [[AND_I10]], i64 10
524 ; CHECK-NEXT: [[AND_UPTO11:%.*]] = insertelement <32 x i1> [[AND_UPTO10]], i1 [[AND_I11]], i64 11
525 ; CHECK-NEXT: [[AND_UPTO12:%.*]] = insertelement <32 x i1> [[AND_UPTO11]], i1 [[AND_I12]], i64 12
526 ; CHECK-NEXT: [[AND_UPTO13:%.*]] = insertelement <32 x i1> [[AND_UPTO12]], i1 [[AND_I13]], i64 13
527 ; CHECK-NEXT: [[AND_UPTO14:%.*]] = insertelement <32 x i1> [[AND_UPTO13]], i1 [[AND_I14]], i64 14
528 ; CHECK-NEXT: [[AND_UPTO15:%.*]] = insertelement <32 x i1> [[AND_UPTO14]], i1 [[AND_I15]], i64 15
529 ; CHECK-NEXT: [[AND_UPTO16:%.*]] = insertelement <32 x i1> [[AND_UPTO15]], i1 [[AND_I16]], i64 16
530 ; CHECK-NEXT: [[AND_UPTO17:%.*]] = insertelement <32 x i1> [[AND_UPTO16]], i1 [[AND_I17]], i64 17
531 ; CHECK-NEXT: [[AND_UPTO18:%.*]] = insertelement <32 x i1> [[AND_UPTO17]], i1 [[AND_I18]], i64 18
532 ; CHECK-NEXT: [[AND_UPTO19:%.*]] = insertelement <32 x i1> [[AND_UPTO18]], i1 [[AND_I19]], i64 19
533 ; CHECK-NEXT: [[AND_UPTO20:%.*]] = insertelement <32 x i1> [[AND_UPTO19]], i1 [[AND_I20]], i64 20
534 ; CHECK-NEXT: [[AND_UPTO21:%.*]] = insertelement <32 x i1> [[AND_UPTO20]], i1 [[AND_I21]], i64 21
535 ; CHECK-NEXT: [[AND_UPTO22:%.*]] = insertelement <32 x i1> [[AND_UPTO21]], i1 [[AND_I22]], i64 22
536 ; CHECK-NEXT: [[AND_UPTO23:%.*]] = insertelement <32 x i1> [[AND_UPTO22]], i1 [[AND_I23]], i64 23
537 ; CHECK-NEXT: [[AND_UPTO24:%.*]] = insertelement <32 x i1> [[AND_UPTO23]], i1 [[AND_I24]], i64 24
538 ; CHECK-NEXT: [[AND_UPTO25:%.*]] = insertelement <32 x i1> [[AND_UPTO24]], i1 [[AND_I25]], i64 25
539 ; CHECK-NEXT: [[AND_UPTO26:%.*]] = insertelement <32 x i1> [[AND_UPTO25]], i1 [[AND_I26]], i64 26
540 ; CHECK-NEXT: [[AND_UPTO27:%.*]] = insertelement <32 x i1> [[AND_UPTO26]], i1 [[AND_I27]], i64 27
541 ; CHECK-NEXT: [[AND_UPTO28:%.*]] = insertelement <32 x i1> [[AND_UPTO27]], i1 [[AND_I28]], i64 28
542 ; CHECK-NEXT: [[AND_UPTO29:%.*]] = insertelement <32 x i1> [[AND_UPTO28]], i1 [[AND_I29]], i64 29
543 ; CHECK-NEXT: [[AND_UPTO30:%.*]] = insertelement <32 x i1> [[AND_UPTO29]], i1 [[AND_I30]], i64 30
544 ; CHECK-NEXT: [[AND:%.*]] = insertelement <32 x i1> [[AND_UPTO30]], i1 [[AND_I31]], i64 31
545 ; CHECK-NEXT: store <32 x i1> [[AND]], ptr [[DEST:%.*]], align 4
546 ; CHECK-NEXT: ret void
548 %src1 = getelementptr <32 x i1>, ptr %src0, i32 1
549 %val0 = load <32 x i1> , ptr %src0
550 %val1 = load <32 x i1> , ptr %src1
551 %and = and <32 x i1> %val0, %val1
552 store <32 x i1> %and, ptr %dest
556 ; Test vector GEPs with more than one index.
557 define void @f13(ptr %dest, <4 x ptr> %ptr, <4 x i32> %i,
559 ; CHECK-NEXT: [[DEST_I1:%.*]] = getelementptr ptr, ptr [[DEST:%.*]], i32 1
560 ; CHECK-NEXT: [[DEST_I2:%.*]] = getelementptr ptr, ptr [[DEST]], i32 2
561 ; CHECK-NEXT: [[DEST_I3:%.*]] = getelementptr ptr, ptr [[DEST]], i32 3
562 ; CHECK-NEXT: [[PTR_I0:%.*]] = extractelement <4 x ptr> [[PTR:%.*]], i64 0
563 ; CHECK-NEXT: [[I_I0:%.*]] = extractelement <4 x i32> [[I:%.*]], i64 0
564 ; CHECK-NEXT: [[VAL_I0:%.*]] = getelementptr inbounds [4 x float], ptr [[PTR_I0]], i32 0, i32 [[I_I0]]
565 ; CHECK-NEXT: [[PTR_I1:%.*]] = extractelement <4 x ptr> [[PTR]], i64 1
566 ; CHECK-NEXT: [[I_I1:%.*]] = extractelement <4 x i32> [[I]], i64 1
567 ; CHECK-NEXT: [[VAL_I1:%.*]] = getelementptr inbounds [4 x float], ptr [[PTR_I1]], i32 1, i32 [[I_I1]]
568 ; CHECK-NEXT: [[PTR_I2:%.*]] = extractelement <4 x ptr> [[PTR]], i64 2
569 ; CHECK-NEXT: [[I_I2:%.*]] = extractelement <4 x i32> [[I]], i64 2
570 ; CHECK-NEXT: [[VAL_I2:%.*]] = getelementptr inbounds [4 x float], ptr [[PTR_I2]], i32 2, i32 [[I_I2]]
571 ; CHECK-NEXT: [[PTR_I3:%.*]] = extractelement <4 x ptr> [[PTR]], i64 3
572 ; CHECK-NEXT: [[I_I3:%.*]] = extractelement <4 x i32> [[I]], i64 3
573 ; CHECK-NEXT: [[VAL_I3:%.*]] = getelementptr inbounds [4 x float], ptr [[PTR_I3]], i32 3, i32 [[I_I3]]
574 ; CHECK-NEXT: store ptr [[VAL_I0]], ptr [[DEST]], align 32
575 ; CHECK-NEXT: store ptr [[VAL_I1]], ptr [[DEST_I1]], align 8
576 ; CHECK-NEXT: store ptr [[VAL_I2]], ptr [[DEST_I2]], align 16
577 ; CHECK-NEXT: store ptr [[VAL_I3]], ptr [[DEST_I3]], align 8
578 ; CHECK-NEXT: ret void
581 %val = getelementptr inbounds [4 x float], <4 x ptr> %ptr,
582 <4 x i32> <i32 0, i32 1, i32 2, i32 3>,
584 store <4 x ptr> %val, ptr %dest
588 ; Test combinations of vector and non-vector PHIs.
589 define <4 x float> @f14(<4 x float> %acc, i32 %count) {
592 ; CHECK-NEXT: [[ACC_I0:%.*]] = extractelement <4 x float> [[ACC:%.*]], i64 0
593 ; CHECK-NEXT: [[ACC_I1:%.*]] = extractelement <4 x float> [[ACC]], i64 1
594 ; CHECK-NEXT: [[ACC_I2:%.*]] = extractelement <4 x float> [[ACC]], i64 2
595 ; CHECK-NEXT: [[ACC_I3:%.*]] = extractelement <4 x float> [[ACC]], i64 3
596 ; CHECK-NEXT: br label [[LOOP:%.*]]
598 ; CHECK-NEXT: [[THIS_ACC_I0:%.*]] = phi float [ [[ACC_I0]], [[ENTRY:%.*]] ], [ [[NEXT_ACC_I0:%.*]], [[LOOP]] ]
599 ; CHECK-NEXT: [[THIS_ACC_I1:%.*]] = phi float [ [[ACC_I1]], [[ENTRY]] ], [ [[NEXT_ACC_I1:%.*]], [[LOOP]] ]
600 ; CHECK-NEXT: [[THIS_ACC_I2:%.*]] = phi float [ [[ACC_I2]], [[ENTRY]] ], [ [[NEXT_ACC_I2:%.*]], [[LOOP]] ]
601 ; CHECK-NEXT: [[THIS_ACC_I3:%.*]] = phi float [ [[ACC_I3]], [[ENTRY]] ], [ [[NEXT_ACC_I3:%.*]], [[LOOP]] ]
602 ; CHECK-NEXT: [[THIS_COUNT:%.*]] = phi i32 [ [[COUNT:%.*]], [[ENTRY]] ], [ [[NEXT_COUNT:%.*]], [[LOOP]] ]
603 ; CHECK-NEXT: [[THIS_ACC_UPTO0:%.*]] = insertelement <4 x float> poison, float [[THIS_ACC_I0]], i64 0
604 ; CHECK-NEXT: [[THIS_ACC_UPTO1:%.*]] = insertelement <4 x float> [[THIS_ACC_UPTO0]], float [[THIS_ACC_I1]], i64 1
605 ; CHECK-NEXT: [[THIS_ACC_UPTO2:%.*]] = insertelement <4 x float> [[THIS_ACC_UPTO1]], float [[THIS_ACC_I2]], i64 2
606 ; CHECK-NEXT: [[THIS_ACC:%.*]] = insertelement <4 x float> [[THIS_ACC_UPTO2]], float [[THIS_ACC_I3]], i64 3
607 ; CHECK-NEXT: [[FOO:%.*]] = call <4 x float> @ext(<4 x float> [[THIS_ACC]])
608 ; CHECK-NEXT: [[FOO_I0:%.*]] = extractelement <4 x float> [[FOO]], i64 0
609 ; CHECK-NEXT: [[NEXT_ACC_I0]] = fadd float [[THIS_ACC_I0]], [[FOO_I0]]
610 ; CHECK-NEXT: [[FOO_I1:%.*]] = extractelement <4 x float> [[FOO]], i64 1
611 ; CHECK-NEXT: [[NEXT_ACC_I1]] = fadd float [[THIS_ACC_I1]], [[FOO_I1]]
612 ; CHECK-NEXT: [[FOO_I2:%.*]] = extractelement <4 x float> [[FOO]], i64 2
613 ; CHECK-NEXT: [[NEXT_ACC_I2]] = fadd float [[THIS_ACC_I2]], [[FOO_I2]]
614 ; CHECK-NEXT: [[FOO_I3:%.*]] = extractelement <4 x float> [[FOO]], i64 3
615 ; CHECK-NEXT: [[NEXT_ACC_I3]] = fadd float [[THIS_ACC_I3]], [[FOO_I3]]
616 ; CHECK-NEXT: [[NEXT_ACC_UPTO0:%.*]] = insertelement <4 x float> poison, float [[NEXT_ACC_I0]], i64 0
617 ; CHECK-NEXT: [[NEXT_ACC_UPTO1:%.*]] = insertelement <4 x float> [[NEXT_ACC_UPTO0]], float [[NEXT_ACC_I1]], i64 1
618 ; CHECK-NEXT: [[NEXT_ACC_UPTO2:%.*]] = insertelement <4 x float> [[NEXT_ACC_UPTO1]], float [[NEXT_ACC_I2]], i64 2
619 ; CHECK-NEXT: [[NEXT_ACC:%.*]] = insertelement <4 x float> [[NEXT_ACC_UPTO2]], float [[NEXT_ACC_I3]], i64 3
620 ; CHECK-NEXT: [[NEXT_COUNT]] = sub i32 [[THIS_COUNT]], 1
621 ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[NEXT_COUNT]], 0
622 ; CHECK-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT:%.*]]
624 ; CHECK-NEXT: ret <4 x float> [[NEXT_ACC]]
630 %this_acc = phi <4 x float> [ %acc, %entry ], [ %next_acc, %loop ]
631 %this_count = phi i32 [ %count, %entry ], [ %next_count, %loop ]
632 %foo = call <4 x float> @ext(<4 x float> %this_acc)
633 %next_acc = fadd <4 x float> %this_acc, %foo
634 %next_count = sub i32 %this_count, 1
635 %cmp = icmp eq i32 %next_count, 0
636 br i1 %cmp, label %loop, label %exit
639 ret <4 x float> %next_acc
642 ; Test unary operator scalarization.
643 define void @f15(<4 x float> %init, ptr %base, i32 %count) {
646 ; CHECK-NEXT: br label [[LOOP:%.*]]
648 ; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[COUNT:%.*]], [[ENTRY:%.*]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
649 ; CHECK-NEXT: [[NEXTI]] = sub i32 [[I]], 1
650 ; CHECK-NEXT: [[PTR:%.*]] = getelementptr <4 x float>, ptr [[BASE:%.*]], i32 [[I]]
651 ; CHECK-NEXT: [[VAL_I0:%.*]] = load float, ptr [[PTR]], align 16
652 ; CHECK-NEXT: [[PTR_I1:%.*]] = getelementptr float, ptr [[PTR]], i32 1
653 ; CHECK-NEXT: [[VAL_I1:%.*]] = load float, ptr [[PTR_I1]], align 4
654 ; CHECK-NEXT: [[PTR_I2:%.*]] = getelementptr float, ptr [[PTR]], i32 2
655 ; CHECK-NEXT: [[VAL_I2:%.*]] = load float, ptr [[PTR_I2]], align 8
656 ; CHECK-NEXT: [[PTR_I3:%.*]] = getelementptr float, ptr [[PTR]], i32 3
657 ; CHECK-NEXT: [[VAL_I3:%.*]] = load float, ptr [[PTR_I3]], align 4
658 ; CHECK-NEXT: [[NEG_I0:%.*]] = fneg float [[VAL_I0]]
659 ; CHECK-NEXT: [[NEG_I1:%.*]] = fneg float [[VAL_I1]]
660 ; CHECK-NEXT: [[NEG_I2:%.*]] = fneg float [[VAL_I2]]
661 ; CHECK-NEXT: [[NEG_I3:%.*]] = fneg float [[VAL_I3]]
662 ; CHECK-NEXT: [[NEG_UPTO0:%.*]] = insertelement <4 x float> poison, float [[NEG_I0]], i64 0
663 ; CHECK-NEXT: [[NEG_UPTO1:%.*]] = insertelement <4 x float> [[NEG_UPTO0]], float [[NEG_I1]], i64 1
664 ; CHECK-NEXT: [[NEG_UPTO2:%.*]] = insertelement <4 x float> [[NEG_UPTO1]], float [[NEG_I2]], i64 2
665 ; CHECK-NEXT: [[NEG:%.*]] = insertelement <4 x float> [[NEG_UPTO2]], float [[NEG_I3]], i64 3
666 ; CHECK-NEXT: [[CALL:%.*]] = call <4 x float> @ext(<4 x float> [[NEG]])
667 ; CHECK-NEXT: [[CALL_I0:%.*]] = extractelement <4 x float> [[CALL]], i64 0
668 ; CHECK-NEXT: [[CMP_I0:%.*]] = fcmp ogt float [[CALL_I0]], 1.000000e+00
669 ; CHECK-NEXT: [[CALL_I1:%.*]] = extractelement <4 x float> [[CALL]], i64 1
670 ; CHECK-NEXT: [[CMP_I1:%.*]] = fcmp ogt float [[CALL_I1]], 2.000000e+00
671 ; CHECK-NEXT: [[CALL_I2:%.*]] = extractelement <4 x float> [[CALL]], i64 2
672 ; CHECK-NEXT: [[CMP_I2:%.*]] = fcmp ogt float [[CALL_I2]], 3.000000e+00
673 ; CHECK-NEXT: [[CALL_I3:%.*]] = extractelement <4 x float> [[CALL]], i64 3
674 ; CHECK-NEXT: [[CMP_I3:%.*]] = fcmp ogt float [[CALL_I3]], 4.000000e+00
675 ; CHECK-NEXT: [[SEL_I0:%.*]] = select i1 [[CMP_I0]], float [[CALL_I0]], float 5.000000e+00
676 ; CHECK-NEXT: [[SEL_I1:%.*]] = select i1 [[CMP_I1]], float [[CALL_I1]], float 6.000000e+00
677 ; CHECK-NEXT: [[SEL_I2:%.*]] = select i1 [[CMP_I2]], float [[CALL_I2]], float 7.000000e+00
678 ; CHECK-NEXT: [[SEL_I3:%.*]] = select i1 [[CMP_I3]], float [[CALL_I3]], float 8.000000e+00
679 ; CHECK-NEXT: store float [[SEL_I0]], ptr [[PTR]], align 16
680 ; CHECK-NEXT: store float [[SEL_I1]], ptr [[PTR_I1]], align 4
681 ; CHECK-NEXT: store float [[SEL_I2]], ptr [[PTR_I2]], align 8
682 ; CHECK-NEXT: store float [[SEL_I3]], ptr [[PTR_I3]], align 4
683 ; CHECK-NEXT: [[TEST:%.*]] = icmp eq i32 [[NEXTI]], 0
684 ; CHECK-NEXT: br i1 [[TEST]], label [[LOOP]], label [[EXIT:%.*]]
686 ; CHECK-NEXT: ret void
692 %i = phi i32 [ %count, %entry ], [ %nexti, %loop ]
693 %acc = phi <4 x float> [ %init, %entry ], [ %sel, %loop ]
694 %nexti = sub i32 %i, 1
696 %ptr = getelementptr <4 x float>, ptr %base, i32 %i
697 %val = load <4 x float> , ptr %ptr
698 %neg = fneg <4 x float> %val
699 %call = call <4 x float> @ext(<4 x float> %neg)
700 %cmp = fcmp ogt <4 x float> %call,
701 <float 1.0, float 2.0, float 3.0, float 4.0>
702 %sel = select <4 x i1> %cmp, <4 x float> %call,
703 <4 x float> <float 5.0, float 6.0, float 7.0, float 8.0>
704 store <4 x float> %sel, ptr %ptr
706 %test = icmp eq i32 %nexti, 0
707 br i1 %test, label %loop, label %exit
713 ; Check that IR flags are preserved.
714 define <2 x i32> @f16(<2 x i32> %i, <2 x i32> %j) {
716 ; CHECK-NEXT: [[I_I0:%.*]] = extractelement <2 x i32> [[I:%.*]], i64 0
717 ; CHECK-NEXT: [[J_I0:%.*]] = extractelement <2 x i32> [[J:%.*]], i64 0
718 ; CHECK-NEXT: [[RES_I0:%.*]] = add nuw nsw i32 [[I_I0]], [[J_I0]]
719 ; CHECK-NEXT: [[I_I1:%.*]] = extractelement <2 x i32> [[I]], i64 1
720 ; CHECK-NEXT: [[J_I1:%.*]] = extractelement <2 x i32> [[J]], i64 1
721 ; CHECK-NEXT: [[RES_I1:%.*]] = add nuw nsw i32 [[I_I1]], [[J_I1]]
722 ; CHECK-NEXT: [[RES_UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[RES_I0]], i64 0
723 ; CHECK-NEXT: [[RES:%.*]] = insertelement <2 x i32> [[RES_UPTO0]], i32 [[RES_I1]], i64 1
724 ; CHECK-NEXT: ret <2 x i32> [[RES]]
726 %res = add nuw nsw <2 x i32> %i, %j
729 define <2 x i32> @f17(<2 x i32> %i, <2 x i32> %j) {
731 ; CHECK-NEXT: [[I_I0:%.*]] = extractelement <2 x i32> [[I:%.*]], i64 0
732 ; CHECK-NEXT: [[J_I0:%.*]] = extractelement <2 x i32> [[J:%.*]], i64 0
733 ; CHECK-NEXT: [[RES_I0:%.*]] = sdiv exact i32 [[I_I0]], [[J_I0]]
734 ; CHECK-NEXT: [[I_I1:%.*]] = extractelement <2 x i32> [[I]], i64 1
735 ; CHECK-NEXT: [[J_I1:%.*]] = extractelement <2 x i32> [[J]], i64 1
736 ; CHECK-NEXT: [[RES_I1:%.*]] = sdiv exact i32 [[I_I1]], [[J_I1]]
737 ; CHECK-NEXT: [[RES_UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[RES_I0]], i64 0
738 ; CHECK-NEXT: [[RES:%.*]] = insertelement <2 x i32> [[RES_UPTO0]], i32 [[RES_I1]], i64 1
739 ; CHECK-NEXT: ret <2 x i32> [[RES]]
741 %res = sdiv exact <2 x i32> %i, %j
744 define <2 x float> @f18(<2 x float> %x, <2 x float> %y) {
746 ; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i64 0
747 ; CHECK-NEXT: [[Y_I0:%.*]] = extractelement <2 x float> [[Y:%.*]], i64 0
748 ; CHECK-NEXT: [[RES_I0:%.*]] = fadd fast float [[X_I0]], [[Y_I0]]
749 ; CHECK-NEXT: [[X_I1:%.*]] = extractelement <2 x float> [[X]], i64 1
750 ; CHECK-NEXT: [[Y_I1:%.*]] = extractelement <2 x float> [[Y]], i64 1
751 ; CHECK-NEXT: [[RES_I1:%.*]] = fadd fast float [[X_I1]], [[Y_I1]]
752 ; CHECK-NEXT: [[RES_UPTO0:%.*]] = insertelement <2 x float> poison, float [[RES_I0]], i64 0
753 ; CHECK-NEXT: [[RES:%.*]] = insertelement <2 x float> [[RES_UPTO0]], float [[RES_I1]], i64 1
754 ; CHECK-NEXT: ret <2 x float> [[RES]]
756 %res = fadd fast <2 x float> %x, %y
759 define <2 x float> @f19(<2 x float> %x) {
761 ; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i64 0
762 ; CHECK-NEXT: [[RES_I0:%.*]] = fneg fast float [[X_I0]]
763 ; CHECK-NEXT: [[X_I1:%.*]] = extractelement <2 x float> [[X]], i64 1
764 ; CHECK-NEXT: [[RES_I1:%.*]] = fneg fast float [[X_I1]]
765 ; CHECK-NEXT: [[RES_UPTO0:%.*]] = insertelement <2 x float> poison, float [[RES_I0]], i64 0
766 ; CHECK-NEXT: [[RES:%.*]] = insertelement <2 x float> [[RES_UPTO0]], float [[RES_I1]], i64 1
767 ; CHECK-NEXT: ret <2 x float> [[RES]]
769 %res = fneg fast <2 x float> %x
772 define <2 x i1> @f20(<2 x float> %x, <2 x float> %y) {
774 ; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i64 0
775 ; CHECK-NEXT: [[Y_I0:%.*]] = extractelement <2 x float> [[Y:%.*]], i64 0
776 ; CHECK-NEXT: [[RES_I0:%.*]] = fcmp fast ogt float [[X_I0]], [[Y_I0]]
777 ; CHECK-NEXT: [[X_I1:%.*]] = extractelement <2 x float> [[X]], i64 1
778 ; CHECK-NEXT: [[Y_I1:%.*]] = extractelement <2 x float> [[Y]], i64 1
779 ; CHECK-NEXT: [[RES_I1:%.*]] = fcmp fast ogt float [[X_I1]], [[Y_I1]]
780 ; CHECK-NEXT: [[RES_UPTO0:%.*]] = insertelement <2 x i1> poison, i1 [[RES_I0]], i64 0
781 ; CHECK-NEXT: [[RES:%.*]] = insertelement <2 x i1> [[RES_UPTO0]], i1 [[RES_I1]], i64 1
782 ; CHECK-NEXT: ret <2 x i1> [[RES]]
784 %res = fcmp fast ogt <2 x float> %x, %y
787 declare <2 x float> @llvm.sqrt.v2f32(<2 x float>)
788 define <2 x float> @f21(<2 x float> %x) {
790 ; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i64 0
791 ; CHECK-NEXT: [[RES_I0:%.*]] = call fast float @llvm.sqrt.f32(float [[X_I0]])
792 ; CHECK-NEXT: [[X_I1:%.*]] = extractelement <2 x float> [[X]], i64 1
793 ; CHECK-NEXT: [[RES_I1:%.*]] = call fast float @llvm.sqrt.f32(float [[X_I1]])
794 ; CHECK-NEXT: [[RES_UPTO0:%.*]] = insertelement <2 x float> poison, float [[RES_I0]], i64 0
795 ; CHECK-NEXT: [[RES:%.*]] = insertelement <2 x float> [[RES_UPTO0]], float [[RES_I1]], i64 1
796 ; CHECK-NEXT: ret <2 x float> [[RES]]
798 %res = call fast <2 x float> @llvm.sqrt.v2f32(<2 x float> %x)
801 declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>)
802 define <2 x float> @f22(<2 x float> %x, <2 x float> %y, <2 x float> %z) {
804 ; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i64 0
805 ; CHECK-NEXT: [[Y_I0:%.*]] = extractelement <2 x float> [[Y:%.*]], i64 0
806 ; CHECK-NEXT: [[Z_I0:%.*]] = extractelement <2 x float> [[Z:%.*]], i64 0
807 ; CHECK-NEXT: [[RES_I0:%.*]] = call fast float @llvm.fma.f32(float [[X_I0]], float [[Y_I0]], float [[Z_I0]])
808 ; CHECK-NEXT: [[X_I1:%.*]] = extractelement <2 x float> [[X]], i64 1
809 ; CHECK-NEXT: [[Y_I1:%.*]] = extractelement <2 x float> [[Y]], i64 1
810 ; CHECK-NEXT: [[Z_I1:%.*]] = extractelement <2 x float> [[Z]], i64 1
811 ; CHECK-NEXT: [[RES_I1:%.*]] = call fast float @llvm.fma.f32(float [[X_I1]], float [[Y_I1]], float [[Z_I1]])
812 ; CHECK-NEXT: [[RES_UPTO0:%.*]] = insertelement <2 x float> poison, float [[RES_I0]], i64 0
813 ; CHECK-NEXT: [[RES:%.*]] = insertelement <2 x float> [[RES_UPTO0]], float [[RES_I1]], i64 1
814 ; CHECK-NEXT: ret <2 x float> [[RES]]
816 %res = call fast <2 x float> @llvm.fma.v2f32(<2 x float> %x, <2 x float> %y, <2 x float> %z)
820 ; See https://reviews.llvm.org/D83101#2133062
821 define <2 x i32> @f23_crash(<2 x i32> %srcvec, i32 %v1) {
822 ; CHECK-LABEL: @f23_crash(
823 ; CHECK-NEXT: [[SRCVEC_I0:%.*]] = extractelement <2 x i32> [[SRCVEC:%.*]], i64 0
824 ; CHECK-NEXT: [[T1_UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[SRCVEC_I0]], i64 0
825 ; CHECK-NEXT: [[T1:%.*]] = insertelement <2 x i32> [[T1_UPTO0]], i32 [[V1:%.*]], i64 1
826 ; CHECK-NEXT: ret <2 x i32> [[T1]]
828 %v0 = extractelement <2 x i32> %srcvec, i32 0
829 %t0 = insertelement <2 x i32> undef, i32 %v0, i32 0
830 %t1 = insertelement <2 x i32> %t0, i32 %v1, i32 1
834 define <2 x i32> @f24(<2 x i32> %src) {
836 ; CHECK-NEXT: [[SRC_I0:%.*]] = extractelement <2 x i32> [[SRC:%.*]], i64 0
837 ; CHECK-NEXT: [[FRZ_I0:%.*]] = freeze i32 [[SRC_I0]]
838 ; CHECK-NEXT: [[SRC_I1:%.*]] = extractelement <2 x i32> [[SRC]], i64 1
839 ; CHECK-NEXT: [[FRZ_I1:%.*]] = freeze i32 [[SRC_I1]]
840 ; CHECK-NEXT: [[FRZ_UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[FRZ_I0]], i64 0
841 ; CHECK-NEXT: [[FRZ:%.*]] = insertelement <2 x i32> [[FRZ_UPTO0]], i32 [[FRZ_I1]], i64 1
842 ; CHECK-NEXT: ret <2 x i32> [[FRZ]]
844 %frz = freeze <2 x i32> %src
848 define <2 x float> @f25(<2 x float> %src) {
850 ; CHECK-NEXT: [[SRC_I0:%.*]] = extractelement <2 x float> [[SRC:%.*]], i64 0
851 ; CHECK-NEXT: [[ADD_I0:%.*]] = fadd float [[SRC_I0]], [[SRC_I0]]
852 ; CHECK-NEXT: [[SRC_I1:%.*]] = extractelement <2 x float> [[SRC]], i64 1
853 ; CHECK-NEXT: [[ADD_I1:%.*]] = fadd float [[SRC_I1]], [[SRC_I1]]
854 ; CHECK-NEXT: [[FRZ_I0:%.*]] = freeze float [[ADD_I0]]
855 ; CHECK-NEXT: [[FRZ_I1:%.*]] = freeze float [[ADD_I1]]
856 ; CHECK-NEXT: [[MUL_I0:%.*]] = fmul float [[FRZ_I0]], [[FRZ_I0]]
857 ; CHECK-NEXT: [[MUL_I1:%.*]] = fmul float [[FRZ_I1]], [[FRZ_I1]]
858 ; CHECK-NEXT: [[MUL_UPTO0:%.*]] = insertelement <2 x float> poison, float [[MUL_I0]], i64 0
859 ; CHECK-NEXT: [[MUL:%.*]] = insertelement <2 x float> [[MUL_UPTO0]], float [[MUL_I1]], i64 1
860 ; CHECK-NEXT: ret <2 x float> [[MUL]]
862 %add = fadd <2 x float> %src, %src
863 %frz = freeze <2 x float> %add
864 %mul = fmul <2 x float> %frz, %frz
868 define <2 x i8> @test_copy_trunc_flags(<2 x i32> %src) {
869 ; CHECK-LABEL: @test_copy_trunc_flags(
870 ; CHECK-NEXT: [[SRC_I0:%.*]] = extractelement <2 x i32> [[SRC:%.*]], i64 0
871 ; CHECK-NEXT: [[TRUNC_I0:%.*]] = trunc nuw nsw i32 [[SRC_I0]] to i8
872 ; CHECK-NEXT: [[SRC_I1:%.*]] = extractelement <2 x i32> [[SRC]], i64 1
873 ; CHECK-NEXT: [[TRUNC_I1:%.*]] = trunc nuw nsw i32 [[SRC_I1]] to i8
874 ; CHECK-NEXT: [[TRUNC_UPTO0:%.*]] = insertelement <2 x i8> poison, i8 [[TRUNC_I0]], i64 0
875 ; CHECK-NEXT: [[TRUNC:%.*]] = insertelement <2 x i8> [[TRUNC_UPTO0]], i8 [[TRUNC_I1]], i64 1
876 ; CHECK-NEXT: ret <2 x i8> [[TRUNC]]
878 %trunc = trunc nuw nsw <2 x i32> %src to <2 x i8>
883 !1 = !{ !"set1", !0 }
884 !2 = !{ !"set2", !0 }
885 !3 = !{ !3, !{!"llvm.loop.parallel_accesses", !13} }
887 !5 = !{ i64 0, i64 8, null }