1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt < %s -passes=slp-vectorizer,instcombine -S -mtriple=x86_64-unknown-linux -mattr=+sse2 | FileCheck %s --check-prefixes=SSE
3 ; RUN: opt < %s -passes=slp-vectorizer,instcombine -S -mtriple=x86_64-unknown-linux -mattr=+avx | FileCheck %s --check-prefixes=AVX
4 ; RUN: opt < %s -passes=slp-vectorizer,instcombine -S -mtriple=x86_64-unknown-linux -mattr=+avx2 | FileCheck %s --check-prefixes=AVX2
5 ; RUN: opt < %s -passes=slp-vectorizer,instcombine -S -mtriple=x86_64-unknown-linux -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512F
6 ; RUN: opt < %s -passes=slp-vectorizer,instcombine -S -mtriple=x86_64-unknown-linux -mattr=+avx512vl | FileCheck %s --check-prefixes=AVX512VL
8 define void @gather_load(ptr noalias nocapture %0, ptr noalias nocapture readonly %1) {
9 ; SSE-LABEL: @gather_load(
10 ; SSE-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1:%.*]], i64 4
11 ; SSE-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[TBAA0:![0-9]+]]
12 ; SSE-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 44
13 ; SSE-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4, !tbaa [[TBAA0]]
14 ; SSE-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 16
15 ; SSE-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !tbaa [[TBAA0]]
16 ; SSE-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[TBAA0]]
17 ; SSE-NEXT: [[TMP10:%.*]] = insertelement <4 x i32> poison, i32 [[TMP4]], i64 0
18 ; SSE-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> [[TMP10]], i32 [[TMP6]], i64 1
19 ; SSE-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP8]], i64 2
20 ; SSE-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[TMP9]], i64 3
21 ; SSE-NEXT: [[TMP14:%.*]] = add nsw <4 x i32> [[TMP13]], <i32 1, i32 2, i32 3, i32 4>
22 ; SSE-NEXT: store <4 x i32> [[TMP14]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]]
25 ; AVX-LABEL: @gather_load(
26 ; AVX-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1:%.*]], i64 4
27 ; AVX-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[TBAA0:![0-9]+]]
28 ; AVX-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 44
29 ; AVX-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4, !tbaa [[TBAA0]]
30 ; AVX-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 16
31 ; AVX-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !tbaa [[TBAA0]]
32 ; AVX-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[TBAA0]]
33 ; AVX-NEXT: [[TMP10:%.*]] = insertelement <4 x i32> poison, i32 [[TMP4]], i64 0
34 ; AVX-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> [[TMP10]], i32 [[TMP6]], i64 1
35 ; AVX-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP8]], i64 2
36 ; AVX-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[TMP9]], i64 3
37 ; AVX-NEXT: [[TMP14:%.*]] = add nsw <4 x i32> [[TMP13]], <i32 1, i32 2, i32 3, i32 4>
38 ; AVX-NEXT: store <4 x i32> [[TMP14]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]]
41 ; AVX2-LABEL: @gather_load(
42 ; AVX2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1:%.*]], i64 4
43 ; AVX2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[TBAA0:![0-9]+]]
44 ; AVX2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 44
45 ; AVX2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4, !tbaa [[TBAA0]]
46 ; AVX2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 16
47 ; AVX2-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !tbaa [[TBAA0]]
48 ; AVX2-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[TBAA0]]
49 ; AVX2-NEXT: [[TMP10:%.*]] = insertelement <4 x i32> poison, i32 [[TMP4]], i64 0
50 ; AVX2-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> [[TMP10]], i32 [[TMP6]], i64 1
51 ; AVX2-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP8]], i64 2
52 ; AVX2-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[TMP9]], i64 3
53 ; AVX2-NEXT: [[TMP14:%.*]] = add nsw <4 x i32> [[TMP13]], <i32 1, i32 2, i32 3, i32 4>
54 ; AVX2-NEXT: store <4 x i32> [[TMP14]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]]
57 ; AVX512F-LABEL: @gather_load(
58 ; AVX512F-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1:%.*]], i64 4
59 ; AVX512F-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[TBAA0:![0-9]+]]
60 ; AVX512F-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 44
61 ; AVX512F-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4, !tbaa [[TBAA0]]
62 ; AVX512F-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 16
63 ; AVX512F-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !tbaa [[TBAA0]]
64 ; AVX512F-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[TBAA0]]
65 ; AVX512F-NEXT: [[TMP10:%.*]] = insertelement <4 x i32> poison, i32 [[TMP4]], i64 0
66 ; AVX512F-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> [[TMP10]], i32 [[TMP6]], i64 1
67 ; AVX512F-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP8]], i64 2
68 ; AVX512F-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[TMP9]], i64 3
69 ; AVX512F-NEXT: [[TMP14:%.*]] = add nsw <4 x i32> [[TMP13]], <i32 1, i32 2, i32 3, i32 4>
70 ; AVX512F-NEXT: store <4 x i32> [[TMP14]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]]
71 ; AVX512F-NEXT: ret void
73 ; AVX512VL-LABEL: @gather_load(
74 ; AVX512VL-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1:%.*]], i64 4
75 ; AVX512VL-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[TBAA0:![0-9]+]]
76 ; AVX512VL-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 44
77 ; AVX512VL-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4, !tbaa [[TBAA0]]
78 ; AVX512VL-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 16
79 ; AVX512VL-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !tbaa [[TBAA0]]
80 ; AVX512VL-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[TBAA0]]
81 ; AVX512VL-NEXT: [[TMP10:%.*]] = insertelement <4 x i32> poison, i32 [[TMP4]], i64 0
82 ; AVX512VL-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> [[TMP10]], i32 [[TMP6]], i64 1
83 ; AVX512VL-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP8]], i64 2
84 ; AVX512VL-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[TMP9]], i64 3
85 ; AVX512VL-NEXT: [[TMP14:%.*]] = add nsw <4 x i32> [[TMP13]], <i32 1, i32 2, i32 3, i32 4>
86 ; AVX512VL-NEXT: store <4 x i32> [[TMP14]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]]
87 ; AVX512VL-NEXT: ret void
89 %3 = getelementptr inbounds i32, ptr %1, i64 1
90 %4 = load i32, ptr %1, align 4, !tbaa !2
91 %5 = getelementptr inbounds i32, ptr %0, i64 1
92 %6 = getelementptr inbounds i32, ptr %1, i64 11
93 %7 = load i32, ptr %6, align 4, !tbaa !2
94 %8 = getelementptr inbounds i32, ptr %0, i64 2
95 %9 = getelementptr inbounds i32, ptr %1, i64 4
96 %10 = load i32, ptr %9, align 4, !tbaa !2
97 %11 = getelementptr inbounds i32, ptr %0, i64 3
98 %12 = load i32, ptr %3, align 4, !tbaa !2
99 %13 = insertelement <4 x i32> undef, i32 %4, i32 0
100 %14 = insertelement <4 x i32> %13, i32 %7, i32 1
101 %15 = insertelement <4 x i32> %14, i32 %10, i32 2
102 %16 = insertelement <4 x i32> %15, i32 %12, i32 3
103 %17 = add nsw <4 x i32> %16, <i32 1, i32 2, i32 3, i32 4>
104 store <4 x i32> %17, ptr %0, align 4, !tbaa !2
108 define void @gather_load_2(ptr noalias nocapture %0, ptr noalias nocapture readonly %1) {
109 ; SSE-LABEL: @gather_load_2(
110 ; SSE-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1:%.*]], i64 4
111 ; SSE-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[TBAA0]]
112 ; SSE-NEXT: [[TMP5:%.*]] = add nsw i32 [[TMP4]], 1
113 ; SSE-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP0:%.*]], i64 4
114 ; SSE-NEXT: store i32 [[TMP5]], ptr [[TMP0]], align 4, !tbaa [[TBAA0]]
115 ; SSE-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 40
116 ; SSE-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !tbaa [[TBAA0]]
117 ; SSE-NEXT: [[TMP9:%.*]] = add nsw i32 [[TMP8]], 2
118 ; SSE-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 8
119 ; SSE-NEXT: store i32 [[TMP9]], ptr [[TMP6]], align 4, !tbaa [[TBAA0]]
120 ; SSE-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 12
121 ; SSE-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4, !tbaa [[TBAA0]]
122 ; SSE-NEXT: [[TMP13:%.*]] = add nsw i32 [[TMP12]], 3
123 ; SSE-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 12
124 ; SSE-NEXT: store i32 [[TMP13]], ptr [[TMP10]], align 4, !tbaa [[TBAA0]]
125 ; SSE-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 20
126 ; SSE-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4, !tbaa [[TBAA0]]
127 ; SSE-NEXT: [[TMP17:%.*]] = add nsw i32 [[TMP16]], 4
128 ; SSE-NEXT: store i32 [[TMP17]], ptr [[TMP14]], align 4, !tbaa [[TBAA0]]
131 ; AVX-LABEL: @gather_load_2(
132 ; AVX-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1:%.*]], i64 4
133 ; AVX-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[TBAA0]]
134 ; AVX-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 40
135 ; AVX-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4, !tbaa [[TBAA0]]
136 ; AVX-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 12
137 ; AVX-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !tbaa [[TBAA0]]
138 ; AVX-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 20
139 ; AVX-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !tbaa [[TBAA0]]
140 ; AVX-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> poison, i32 [[TMP4]], i64 0
141 ; AVX-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP6]], i64 1
142 ; AVX-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[TMP8]], i64 2
143 ; AVX-NEXT: [[TMP14:%.*]] = insertelement <4 x i32> [[TMP13]], i32 [[TMP10]], i64 3
144 ; AVX-NEXT: [[TMP15:%.*]] = add nsw <4 x i32> [[TMP14]], <i32 1, i32 2, i32 3, i32 4>
145 ; AVX-NEXT: store <4 x i32> [[TMP15]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]]
148 ; AVX2-LABEL: @gather_load_2(
149 ; AVX2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1:%.*]], i64 4
150 ; AVX2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[TBAA0]]
151 ; AVX2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 40
152 ; AVX2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4, !tbaa [[TBAA0]]
153 ; AVX2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 12
154 ; AVX2-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !tbaa [[TBAA0]]
155 ; AVX2-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 20
156 ; AVX2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !tbaa [[TBAA0]]
157 ; AVX2-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> poison, i32 [[TMP4]], i64 0
158 ; AVX2-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP6]], i64 1
159 ; AVX2-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[TMP8]], i64 2
160 ; AVX2-NEXT: [[TMP14:%.*]] = insertelement <4 x i32> [[TMP13]], i32 [[TMP10]], i64 3
161 ; AVX2-NEXT: [[TMP15:%.*]] = add nsw <4 x i32> [[TMP14]], <i32 1, i32 2, i32 3, i32 4>
162 ; AVX2-NEXT: store <4 x i32> [[TMP15]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]]
163 ; AVX2-NEXT: ret void
165 ; AVX512F-LABEL: @gather_load_2(
166 ; AVX512F-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1:%.*]], i64 4
167 ; AVX512F-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[TBAA0]]
168 ; AVX512F-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 40
169 ; AVX512F-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4, !tbaa [[TBAA0]]
170 ; AVX512F-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 12
171 ; AVX512F-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !tbaa [[TBAA0]]
172 ; AVX512F-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 20
173 ; AVX512F-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !tbaa [[TBAA0]]
174 ; AVX512F-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> poison, i32 [[TMP4]], i64 0
175 ; AVX512F-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP6]], i64 1
176 ; AVX512F-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[TMP8]], i64 2
177 ; AVX512F-NEXT: [[TMP14:%.*]] = insertelement <4 x i32> [[TMP13]], i32 [[TMP10]], i64 3
178 ; AVX512F-NEXT: [[TMP15:%.*]] = add nsw <4 x i32> [[TMP14]], <i32 1, i32 2, i32 3, i32 4>
179 ; AVX512F-NEXT: store <4 x i32> [[TMP15]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]]
180 ; AVX512F-NEXT: ret void
182 ; AVX512VL-LABEL: @gather_load_2(
183 ; AVX512VL-NEXT: [[TMP3:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP1:%.*]], i64 0
184 ; AVX512VL-NEXT: [[TMP4:%.*]] = shufflevector <4 x ptr> [[TMP3]], <4 x ptr> poison, <4 x i32> zeroinitializer
185 ; AVX512VL-NEXT: [[TMP5:%.*]] = getelementptr i32, <4 x ptr> [[TMP4]], <4 x i64> <i64 1, i64 10, i64 3, i64 5>
186 ; AVX512VL-NEXT: [[TMP6:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP5]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> poison), !tbaa [[TBAA0]]
187 ; AVX512VL-NEXT: [[TMP7:%.*]] = add nsw <4 x i32> [[TMP6]], <i32 1, i32 2, i32 3, i32 4>
188 ; AVX512VL-NEXT: store <4 x i32> [[TMP7]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]]
189 ; AVX512VL-NEXT: ret void
191 %3 = getelementptr inbounds i32, ptr %1, i64 1
192 %4 = load i32, ptr %3, align 4, !tbaa !2
193 %5 = add nsw i32 %4, 1
194 %6 = getelementptr inbounds i32, ptr %0, i64 1
195 store i32 %5, ptr %0, align 4, !tbaa !2
196 %7 = getelementptr inbounds i32, ptr %1, i64 10
197 %8 = load i32, ptr %7, align 4, !tbaa !2
198 %9 = add nsw i32 %8, 2
199 %10 = getelementptr inbounds i32, ptr %0, i64 2
200 store i32 %9, ptr %6, align 4, !tbaa !2
201 %11 = getelementptr inbounds i32, ptr %1, i64 3
202 %12 = load i32, ptr %11, align 4, !tbaa !2
203 %13 = add nsw i32 %12, 3
204 %14 = getelementptr inbounds i32, ptr %0, i64 3
205 store i32 %13, ptr %10, align 4, !tbaa !2
206 %15 = getelementptr inbounds i32, ptr %1, i64 5
207 %16 = load i32, ptr %15, align 4, !tbaa !2
208 %17 = add nsw i32 %16, 4
209 store i32 %17, ptr %14, align 4, !tbaa !2
214 define void @gather_load_3(ptr noalias nocapture %0, ptr noalias nocapture readonly %1) {
215 ; SSE-LABEL: @gather_load_3(
216 ; SSE-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP1:%.*]], align 4, !tbaa [[TBAA0]]
217 ; SSE-NEXT: [[TMP4:%.*]] = add i32 [[TMP3]], 1
218 ; SSE-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP0:%.*]], i64 4
219 ; SSE-NEXT: store i32 [[TMP4]], ptr [[TMP0]], align 4, !tbaa [[TBAA0]]
220 ; SSE-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 44
221 ; SSE-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4, !tbaa [[TBAA0]]
222 ; SSE-NEXT: [[TMP8:%.*]] = add i32 [[TMP7]], 2
223 ; SSE-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 8
224 ; SSE-NEXT: store i32 [[TMP8]], ptr [[TMP5]], align 4, !tbaa [[TBAA0]]
225 ; SSE-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 16
226 ; SSE-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4, !tbaa [[TBAA0]]
227 ; SSE-NEXT: [[TMP12:%.*]] = add i32 [[TMP11]], 3
228 ; SSE-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 12
229 ; SSE-NEXT: store i32 [[TMP12]], ptr [[TMP9]], align 4, !tbaa [[TBAA0]]
230 ; SSE-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 60
231 ; SSE-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4, !tbaa [[TBAA0]]
232 ; SSE-NEXT: [[TMP16:%.*]] = add i32 [[TMP15]], 4
233 ; SSE-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 16
234 ; SSE-NEXT: store i32 [[TMP16]], ptr [[TMP13]], align 4, !tbaa [[TBAA0]]
235 ; SSE-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 72
236 ; SSE-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4, !tbaa [[TBAA0]]
237 ; SSE-NEXT: [[TMP20:%.*]] = add i32 [[TMP19]], 1
238 ; SSE-NEXT: [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 20
239 ; SSE-NEXT: store i32 [[TMP20]], ptr [[TMP17]], align 4, !tbaa [[TBAA0]]
240 ; SSE-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 36
241 ; SSE-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4, !tbaa [[TBAA0]]
242 ; SSE-NEXT: [[TMP24:%.*]] = add i32 [[TMP23]], 2
243 ; SSE-NEXT: [[TMP25:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 24
244 ; SSE-NEXT: store i32 [[TMP24]], ptr [[TMP21]], align 4, !tbaa [[TBAA0]]
245 ; SSE-NEXT: [[TMP26:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 24
246 ; SSE-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4, !tbaa [[TBAA0]]
247 ; SSE-NEXT: [[TMP28:%.*]] = add i32 [[TMP27]], 3
248 ; SSE-NEXT: [[TMP29:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 28
249 ; SSE-NEXT: store i32 [[TMP28]], ptr [[TMP25]], align 4, !tbaa [[TBAA0]]
250 ; SSE-NEXT: [[TMP30:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 84
251 ; SSE-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4, !tbaa [[TBAA0]]
252 ; SSE-NEXT: [[TMP32:%.*]] = add i32 [[TMP31]], 4
253 ; SSE-NEXT: store i32 [[TMP32]], ptr [[TMP29]], align 4, !tbaa [[TBAA0]]
256 ; AVX-LABEL: @gather_load_3(
257 ; AVX-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP1:%.*]], align 4, !tbaa [[TBAA0]]
258 ; AVX-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 44
259 ; AVX-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4, !tbaa [[TBAA0]]
260 ; AVX-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 16
261 ; AVX-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4, !tbaa [[TBAA0]]
262 ; AVX-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 60
263 ; AVX-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4, !tbaa [[TBAA0]]
264 ; AVX-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 72
265 ; AVX-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4, !tbaa [[TBAA0]]
266 ; AVX-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 36
267 ; AVX-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4, !tbaa [[TBAA0]]
268 ; AVX-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 24
269 ; AVX-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4, !tbaa [[TBAA0]]
270 ; AVX-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 84
271 ; AVX-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4, !tbaa [[TBAA0]]
272 ; AVX-NEXT: [[TMP18:%.*]] = insertelement <8 x i32> poison, i32 [[TMP3]], i64 0
273 ; AVX-NEXT: [[TMP19:%.*]] = insertelement <8 x i32> [[TMP18]], i32 [[TMP5]], i64 1
274 ; AVX-NEXT: [[TMP20:%.*]] = insertelement <8 x i32> [[TMP19]], i32 [[TMP7]], i64 2
275 ; AVX-NEXT: [[TMP21:%.*]] = insertelement <8 x i32> [[TMP20]], i32 [[TMP9]], i64 3
276 ; AVX-NEXT: [[TMP22:%.*]] = insertelement <8 x i32> [[TMP21]], i32 [[TMP11]], i64 4
277 ; AVX-NEXT: [[TMP23:%.*]] = insertelement <8 x i32> [[TMP22]], i32 [[TMP13]], i64 5
278 ; AVX-NEXT: [[TMP24:%.*]] = insertelement <8 x i32> [[TMP23]], i32 [[TMP15]], i64 6
279 ; AVX-NEXT: [[TMP25:%.*]] = insertelement <8 x i32> [[TMP24]], i32 [[TMP17]], i64 7
280 ; AVX-NEXT: [[TMP26:%.*]] = add <8 x i32> [[TMP25]], <i32 1, i32 2, i32 3, i32 4, i32 1, i32 2, i32 3, i32 4>
281 ; AVX-NEXT: store <8 x i32> [[TMP26]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]]
284 ; AVX2-LABEL: @gather_load_3(
285 ; AVX2-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP1:%.*]], align 4, !tbaa [[TBAA0]]
286 ; AVX2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 44
287 ; AVX2-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4, !tbaa [[TBAA0]]
288 ; AVX2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 16
289 ; AVX2-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4, !tbaa [[TBAA0]]
290 ; AVX2-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 60
291 ; AVX2-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4, !tbaa [[TBAA0]]
292 ; AVX2-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 72
293 ; AVX2-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4, !tbaa [[TBAA0]]
294 ; AVX2-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 36
295 ; AVX2-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4, !tbaa [[TBAA0]]
296 ; AVX2-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 24
297 ; AVX2-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4, !tbaa [[TBAA0]]
298 ; AVX2-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 84
299 ; AVX2-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4, !tbaa [[TBAA0]]
300 ; AVX2-NEXT: [[TMP18:%.*]] = insertelement <8 x i32> poison, i32 [[TMP3]], i64 0
301 ; AVX2-NEXT: [[TMP19:%.*]] = insertelement <8 x i32> [[TMP18]], i32 [[TMP5]], i64 1
302 ; AVX2-NEXT: [[TMP20:%.*]] = insertelement <8 x i32> [[TMP19]], i32 [[TMP7]], i64 2
303 ; AVX2-NEXT: [[TMP21:%.*]] = insertelement <8 x i32> [[TMP20]], i32 [[TMP9]], i64 3
304 ; AVX2-NEXT: [[TMP22:%.*]] = insertelement <8 x i32> [[TMP21]], i32 [[TMP11]], i64 4
305 ; AVX2-NEXT: [[TMP23:%.*]] = insertelement <8 x i32> [[TMP22]], i32 [[TMP13]], i64 5
306 ; AVX2-NEXT: [[TMP24:%.*]] = insertelement <8 x i32> [[TMP23]], i32 [[TMP15]], i64 6
307 ; AVX2-NEXT: [[TMP25:%.*]] = insertelement <8 x i32> [[TMP24]], i32 [[TMP17]], i64 7
308 ; AVX2-NEXT: [[TMP26:%.*]] = add <8 x i32> [[TMP25]], <i32 1, i32 2, i32 3, i32 4, i32 1, i32 2, i32 3, i32 4>
309 ; AVX2-NEXT: store <8 x i32> [[TMP26]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]]
310 ; AVX2-NEXT: ret void
312 ; AVX512F-LABEL: @gather_load_3(
313 ; AVX512F-NEXT: [[TMP3:%.*]] = insertelement <8 x ptr> poison, ptr [[TMP1:%.*]], i64 0
314 ; AVX512F-NEXT: [[TMP4:%.*]] = shufflevector <8 x ptr> [[TMP3]], <8 x ptr> poison, <8 x i32> zeroinitializer
315 ; AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i32, <8 x ptr> [[TMP4]], <8 x i64> <i64 0, i64 11, i64 4, i64 15, i64 18, i64 9, i64 6, i64 21>
316 ; AVX512F-NEXT: [[TMP6:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP5]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> poison), !tbaa [[TBAA0]]
317 ; AVX512F-NEXT: [[TMP7:%.*]] = add <8 x i32> [[TMP6]], <i32 1, i32 2, i32 3, i32 4, i32 1, i32 2, i32 3, i32 4>
318 ; AVX512F-NEXT: store <8 x i32> [[TMP7]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]]
319 ; AVX512F-NEXT: ret void
321 ; AVX512VL-LABEL: @gather_load_3(
322 ; AVX512VL-NEXT: [[TMP3:%.*]] = insertelement <8 x ptr> poison, ptr [[TMP1:%.*]], i64 0
323 ; AVX512VL-NEXT: [[TMP4:%.*]] = shufflevector <8 x ptr> [[TMP3]], <8 x ptr> poison, <8 x i32> zeroinitializer
324 ; AVX512VL-NEXT: [[TMP5:%.*]] = getelementptr i32, <8 x ptr> [[TMP4]], <8 x i64> <i64 0, i64 11, i64 4, i64 15, i64 18, i64 9, i64 6, i64 21>
325 ; AVX512VL-NEXT: [[TMP6:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP5]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> poison), !tbaa [[TBAA0]]
326 ; AVX512VL-NEXT: [[TMP7:%.*]] = add <8 x i32> [[TMP6]], <i32 1, i32 2, i32 3, i32 4, i32 1, i32 2, i32 3, i32 4>
327 ; AVX512VL-NEXT: store <8 x i32> [[TMP7]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]]
328 ; AVX512VL-NEXT: ret void
330 %3 = load i32, ptr %1, align 4, !tbaa !2
332 %5 = getelementptr inbounds i32, ptr %0, i64 1
333 store i32 %4, ptr %0, align 4, !tbaa !2
334 %6 = getelementptr inbounds i32, ptr %1, i64 11
335 %7 = load i32, ptr %6, align 4, !tbaa !2
337 %9 = getelementptr inbounds i32, ptr %0, i64 2
338 store i32 %8, ptr %5, align 4, !tbaa !2
339 %10 = getelementptr inbounds i32, ptr %1, i64 4
340 %11 = load i32, ptr %10, align 4, !tbaa !2
342 %13 = getelementptr inbounds i32, ptr %0, i64 3
343 store i32 %12, ptr %9, align 4, !tbaa !2
344 %14 = getelementptr inbounds i32, ptr %1, i64 15
345 %15 = load i32, ptr %14, align 4, !tbaa !2
347 %17 = getelementptr inbounds i32, ptr %0, i64 4
348 store i32 %16, ptr %13, align 4, !tbaa !2
349 %18 = getelementptr inbounds i32, ptr %1, i64 18
350 %19 = load i32, ptr %18, align 4, !tbaa !2
352 %21 = getelementptr inbounds i32, ptr %0, i64 5
353 store i32 %20, ptr %17, align 4, !tbaa !2
354 %22 = getelementptr inbounds i32, ptr %1, i64 9
355 %23 = load i32, ptr %22, align 4, !tbaa !2
357 %25 = getelementptr inbounds i32, ptr %0, i64 6
358 store i32 %24, ptr %21, align 4, !tbaa !2
359 %26 = getelementptr inbounds i32, ptr %1, i64 6
360 %27 = load i32, ptr %26, align 4, !tbaa !2
362 %29 = getelementptr inbounds i32, ptr %0, i64 7
363 store i32 %28, ptr %25, align 4, !tbaa !2
364 %30 = getelementptr inbounds i32, ptr %1, i64 21
365 %31 = load i32, ptr %30, align 4, !tbaa !2
367 store i32 %32, ptr %29, align 4, !tbaa !2
371 define void @gather_load_4(ptr noalias nocapture %t0, ptr noalias nocapture readonly %t1) {
372 ; SSE-LABEL: @gather_load_4(
373 ; SSE-NEXT: [[T5:%.*]] = getelementptr inbounds i8, ptr [[T0:%.*]], i64 4
374 ; SSE-NEXT: [[T6:%.*]] = getelementptr inbounds i8, ptr [[T1:%.*]], i64 44
375 ; SSE-NEXT: [[T9:%.*]] = getelementptr inbounds i8, ptr [[T0]], i64 8
376 ; SSE-NEXT: [[T10:%.*]] = getelementptr inbounds i8, ptr [[T1]], i64 16
377 ; SSE-NEXT: [[T13:%.*]] = getelementptr inbounds i8, ptr [[T0]], i64 12
378 ; SSE-NEXT: [[T14:%.*]] = getelementptr inbounds i8, ptr [[T1]], i64 60
379 ; SSE-NEXT: [[T17:%.*]] = getelementptr inbounds i8, ptr [[T0]], i64 16
380 ; SSE-NEXT: [[T18:%.*]] = getelementptr inbounds i8, ptr [[T1]], i64 72
381 ; SSE-NEXT: [[T21:%.*]] = getelementptr inbounds i8, ptr [[T0]], i64 20
382 ; SSE-NEXT: [[T22:%.*]] = getelementptr inbounds i8, ptr [[T1]], i64 36
383 ; SSE-NEXT: [[T25:%.*]] = getelementptr inbounds i8, ptr [[T0]], i64 24
384 ; SSE-NEXT: [[T26:%.*]] = getelementptr inbounds i8, ptr [[T1]], i64 24
385 ; SSE-NEXT: [[T29:%.*]] = getelementptr inbounds i8, ptr [[T0]], i64 28
386 ; SSE-NEXT: [[T30:%.*]] = getelementptr inbounds i8, ptr [[T1]], i64 84
387 ; SSE-NEXT: [[T3:%.*]] = load i32, ptr [[T1]], align 4, !tbaa [[TBAA0]]
388 ; SSE-NEXT: [[T7:%.*]] = load i32, ptr [[T6]], align 4, !tbaa [[TBAA0]]
389 ; SSE-NEXT: [[T11:%.*]] = load i32, ptr [[T10]], align 4, !tbaa [[TBAA0]]
390 ; SSE-NEXT: [[T15:%.*]] = load i32, ptr [[T14]], align 4, !tbaa [[TBAA0]]
391 ; SSE-NEXT: [[T19:%.*]] = load i32, ptr [[T18]], align 4, !tbaa [[TBAA0]]
392 ; SSE-NEXT: [[T23:%.*]] = load i32, ptr [[T22]], align 4, !tbaa [[TBAA0]]
393 ; SSE-NEXT: [[T27:%.*]] = load i32, ptr [[T26]], align 4, !tbaa [[TBAA0]]
394 ; SSE-NEXT: [[T31:%.*]] = load i32, ptr [[T30]], align 4, !tbaa [[TBAA0]]
395 ; SSE-NEXT: [[T4:%.*]] = add i32 [[T3]], 1
396 ; SSE-NEXT: [[T8:%.*]] = add i32 [[T7]], 2
397 ; SSE-NEXT: [[T12:%.*]] = add i32 [[T11]], 3
398 ; SSE-NEXT: [[T16:%.*]] = add i32 [[T15]], 4
399 ; SSE-NEXT: [[T20:%.*]] = add i32 [[T19]], 1
400 ; SSE-NEXT: [[T24:%.*]] = add i32 [[T23]], 2
401 ; SSE-NEXT: [[T28:%.*]] = add i32 [[T27]], 3
402 ; SSE-NEXT: [[T32:%.*]] = add i32 [[T31]], 4
403 ; SSE-NEXT: store i32 [[T4]], ptr [[T0]], align 4, !tbaa [[TBAA0]]
404 ; SSE-NEXT: store i32 [[T8]], ptr [[T5]], align 4, !tbaa [[TBAA0]]
405 ; SSE-NEXT: store i32 [[T12]], ptr [[T9]], align 4, !tbaa [[TBAA0]]
406 ; SSE-NEXT: store i32 [[T16]], ptr [[T13]], align 4, !tbaa [[TBAA0]]
407 ; SSE-NEXT: store i32 [[T20]], ptr [[T17]], align 4, !tbaa [[TBAA0]]
408 ; SSE-NEXT: store i32 [[T24]], ptr [[T21]], align 4, !tbaa [[TBAA0]]
409 ; SSE-NEXT: store i32 [[T28]], ptr [[T25]], align 4, !tbaa [[TBAA0]]
410 ; SSE-NEXT: store i32 [[T32]], ptr [[T29]], align 4, !tbaa [[TBAA0]]
413 ; AVX-LABEL: @gather_load_4(
414 ; AVX-NEXT: [[T6:%.*]] = getelementptr inbounds i8, ptr [[T1:%.*]], i64 44
415 ; AVX-NEXT: [[T10:%.*]] = getelementptr inbounds i8, ptr [[T1]], i64 16
416 ; AVX-NEXT: [[T14:%.*]] = getelementptr inbounds i8, ptr [[T1]], i64 60
417 ; AVX-NEXT: [[T18:%.*]] = getelementptr inbounds i8, ptr [[T1]], i64 72
418 ; AVX-NEXT: [[T22:%.*]] = getelementptr inbounds i8, ptr [[T1]], i64 36
419 ; AVX-NEXT: [[T26:%.*]] = getelementptr inbounds i8, ptr [[T1]], i64 24
420 ; AVX-NEXT: [[T30:%.*]] = getelementptr inbounds i8, ptr [[T1]], i64 84
421 ; AVX-NEXT: [[T3:%.*]] = load i32, ptr [[T1]], align 4, !tbaa [[TBAA0]]
422 ; AVX-NEXT: [[T7:%.*]] = load i32, ptr [[T6]], align 4, !tbaa [[TBAA0]]
423 ; AVX-NEXT: [[T11:%.*]] = load i32, ptr [[T10]], align 4, !tbaa [[TBAA0]]
424 ; AVX-NEXT: [[T15:%.*]] = load i32, ptr [[T14]], align 4, !tbaa [[TBAA0]]
425 ; AVX-NEXT: [[T19:%.*]] = load i32, ptr [[T18]], align 4, !tbaa [[TBAA0]]
426 ; AVX-NEXT: [[T23:%.*]] = load i32, ptr [[T22]], align 4, !tbaa [[TBAA0]]
427 ; AVX-NEXT: [[T27:%.*]] = load i32, ptr [[T26]], align 4, !tbaa [[TBAA0]]
428 ; AVX-NEXT: [[T31:%.*]] = load i32, ptr [[T30]], align 4, !tbaa [[TBAA0]]
429 ; AVX-NEXT: [[TMP1:%.*]] = insertelement <8 x i32> poison, i32 [[T3]], i64 0
430 ; AVX-NEXT: [[TMP2:%.*]] = insertelement <8 x i32> [[TMP1]], i32 [[T7]], i64 1
431 ; AVX-NEXT: [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 [[T11]], i64 2
432 ; AVX-NEXT: [[TMP4:%.*]] = insertelement <8 x i32> [[TMP3]], i32 [[T15]], i64 3
433 ; AVX-NEXT: [[TMP5:%.*]] = insertelement <8 x i32> [[TMP4]], i32 [[T19]], i64 4
434 ; AVX-NEXT: [[TMP6:%.*]] = insertelement <8 x i32> [[TMP5]], i32 [[T23]], i64 5
435 ; AVX-NEXT: [[TMP7:%.*]] = insertelement <8 x i32> [[TMP6]], i32 [[T27]], i64 6
436 ; AVX-NEXT: [[TMP8:%.*]] = insertelement <8 x i32> [[TMP7]], i32 [[T31]], i64 7
437 ; AVX-NEXT: [[TMP9:%.*]] = add <8 x i32> [[TMP8]], <i32 1, i32 2, i32 3, i32 4, i32 1, i32 2, i32 3, i32 4>
438 ; AVX-NEXT: store <8 x i32> [[TMP9]], ptr [[T0:%.*]], align 4, !tbaa [[TBAA0]]
441 ; AVX2-LABEL: @gather_load_4(
442 ; AVX2-NEXT: [[T6:%.*]] = getelementptr inbounds i8, ptr [[T1:%.*]], i64 44
443 ; AVX2-NEXT: [[T10:%.*]] = getelementptr inbounds i8, ptr [[T1]], i64 16
444 ; AVX2-NEXT: [[T14:%.*]] = getelementptr inbounds i8, ptr [[T1]], i64 60
445 ; AVX2-NEXT: [[T18:%.*]] = getelementptr inbounds i8, ptr [[T1]], i64 72
446 ; AVX2-NEXT: [[T22:%.*]] = getelementptr inbounds i8, ptr [[T1]], i64 36
447 ; AVX2-NEXT: [[T26:%.*]] = getelementptr inbounds i8, ptr [[T1]], i64 24
448 ; AVX2-NEXT: [[T30:%.*]] = getelementptr inbounds i8, ptr [[T1]], i64 84
449 ; AVX2-NEXT: [[T3:%.*]] = load i32, ptr [[T1]], align 4, !tbaa [[TBAA0]]
450 ; AVX2-NEXT: [[T7:%.*]] = load i32, ptr [[T6]], align 4, !tbaa [[TBAA0]]
451 ; AVX2-NEXT: [[T11:%.*]] = load i32, ptr [[T10]], align 4, !tbaa [[TBAA0]]
452 ; AVX2-NEXT: [[T15:%.*]] = load i32, ptr [[T14]], align 4, !tbaa [[TBAA0]]
453 ; AVX2-NEXT: [[T19:%.*]] = load i32, ptr [[T18]], align 4, !tbaa [[TBAA0]]
454 ; AVX2-NEXT: [[T23:%.*]] = load i32, ptr [[T22]], align 4, !tbaa [[TBAA0]]
455 ; AVX2-NEXT: [[T27:%.*]] = load i32, ptr [[T26]], align 4, !tbaa [[TBAA0]]
456 ; AVX2-NEXT: [[T31:%.*]] = load i32, ptr [[T30]], align 4, !tbaa [[TBAA0]]
457 ; AVX2-NEXT: [[TMP1:%.*]] = insertelement <8 x i32> poison, i32 [[T3]], i64 0
458 ; AVX2-NEXT: [[TMP2:%.*]] = insertelement <8 x i32> [[TMP1]], i32 [[T7]], i64 1
459 ; AVX2-NEXT: [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 [[T11]], i64 2
460 ; AVX2-NEXT: [[TMP4:%.*]] = insertelement <8 x i32> [[TMP3]], i32 [[T15]], i64 3
461 ; AVX2-NEXT: [[TMP5:%.*]] = insertelement <8 x i32> [[TMP4]], i32 [[T19]], i64 4
462 ; AVX2-NEXT: [[TMP6:%.*]] = insertelement <8 x i32> [[TMP5]], i32 [[T23]], i64 5
463 ; AVX2-NEXT: [[TMP7:%.*]] = insertelement <8 x i32> [[TMP6]], i32 [[T27]], i64 6
464 ; AVX2-NEXT: [[TMP8:%.*]] = insertelement <8 x i32> [[TMP7]], i32 [[T31]], i64 7
465 ; AVX2-NEXT: [[TMP9:%.*]] = add <8 x i32> [[TMP8]], <i32 1, i32 2, i32 3, i32 4, i32 1, i32 2, i32 3, i32 4>
466 ; AVX2-NEXT: store <8 x i32> [[TMP9]], ptr [[T0:%.*]], align 4, !tbaa [[TBAA0]]
467 ; AVX2-NEXT: ret void
469 ; AVX512F-LABEL: @gather_load_4(
470 ; AVX512F-NEXT: [[TMP1:%.*]] = insertelement <8 x ptr> poison, ptr [[T1:%.*]], i64 0
471 ; AVX512F-NEXT: [[TMP2:%.*]] = shufflevector <8 x ptr> [[TMP1]], <8 x ptr> poison, <8 x i32> zeroinitializer
472 ; AVX512F-NEXT: [[TMP3:%.*]] = getelementptr i32, <8 x ptr> [[TMP2]], <8 x i64> <i64 0, i64 11, i64 4, i64 15, i64 18, i64 9, i64 6, i64 21>
473 ; AVX512F-NEXT: [[TMP4:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP3]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> poison), !tbaa [[TBAA0]]
474 ; AVX512F-NEXT: [[TMP5:%.*]] = add <8 x i32> [[TMP4]], <i32 1, i32 2, i32 3, i32 4, i32 1, i32 2, i32 3, i32 4>
475 ; AVX512F-NEXT: store <8 x i32> [[TMP5]], ptr [[T0:%.*]], align 4, !tbaa [[TBAA0]]
476 ; AVX512F-NEXT: ret void
478 ; AVX512VL-LABEL: @gather_load_4(
479 ; AVX512VL-NEXT: [[TMP1:%.*]] = insertelement <8 x ptr> poison, ptr [[T1:%.*]], i64 0
480 ; AVX512VL-NEXT: [[TMP2:%.*]] = shufflevector <8 x ptr> [[TMP1]], <8 x ptr> poison, <8 x i32> zeroinitializer
481 ; AVX512VL-NEXT: [[TMP3:%.*]] = getelementptr i32, <8 x ptr> [[TMP2]], <8 x i64> <i64 0, i64 11, i64 4, i64 15, i64 18, i64 9, i64 6, i64 21>
482 ; AVX512VL-NEXT: [[TMP4:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP3]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> poison), !tbaa [[TBAA0]]
483 ; AVX512VL-NEXT: [[TMP5:%.*]] = add <8 x i32> [[TMP4]], <i32 1, i32 2, i32 3, i32 4, i32 1, i32 2, i32 3, i32 4>
484 ; AVX512VL-NEXT: store <8 x i32> [[TMP5]], ptr [[T0:%.*]], align 4, !tbaa [[TBAA0]]
485 ; AVX512VL-NEXT: ret void
487 %t5 = getelementptr inbounds i32, ptr %t0, i64 1
488 %t6 = getelementptr inbounds i32, ptr %t1, i64 11
489 %t9 = getelementptr inbounds i32, ptr %t0, i64 2
490 %t10 = getelementptr inbounds i32, ptr %t1, i64 4
491 %t13 = getelementptr inbounds i32, ptr %t0, i64 3
492 %t14 = getelementptr inbounds i32, ptr %t1, i64 15
493 %t17 = getelementptr inbounds i32, ptr %t0, i64 4
494 %t18 = getelementptr inbounds i32, ptr %t1, i64 18
495 %t21 = getelementptr inbounds i32, ptr %t0, i64 5
496 %t22 = getelementptr inbounds i32, ptr %t1, i64 9
497 %t25 = getelementptr inbounds i32, ptr %t0, i64 6
498 %t26 = getelementptr inbounds i32, ptr %t1, i64 6
499 %t29 = getelementptr inbounds i32, ptr %t0, i64 7
500 %t30 = getelementptr inbounds i32, ptr %t1, i64 21
502 %t3 = load i32, ptr %t1, align 4, !tbaa !2
503 %t7 = load i32, ptr %t6, align 4, !tbaa !2
504 %t11 = load i32, ptr %t10, align 4, !tbaa !2
505 %t15 = load i32, ptr %t14, align 4, !tbaa !2
506 %t19 = load i32, ptr %t18, align 4, !tbaa !2
507 %t23 = load i32, ptr %t22, align 4, !tbaa !2
508 %t27 = load i32, ptr %t26, align 4, !tbaa !2
509 %t31 = load i32, ptr %t30, align 4, !tbaa !2
513 %t12 = add i32 %t11, 3
514 %t16 = add i32 %t15, 4
515 %t20 = add i32 %t19, 1
516 %t24 = add i32 %t23, 2
517 %t28 = add i32 %t27, 3
518 %t32 = add i32 %t31, 4
520 store i32 %t4, ptr %t0, align 4, !tbaa !2
521 store i32 %t8, ptr %t5, align 4, !tbaa !2
522 store i32 %t12, ptr %t9, align 4, !tbaa !2
523 store i32 %t16, ptr %t13, align 4, !tbaa !2
524 store i32 %t20, ptr %t17, align 4, !tbaa !2
525 store i32 %t24, ptr %t21, align 4, !tbaa !2
526 store i32 %t28, ptr %t25, align 4, !tbaa !2
527 store i32 %t32, ptr %t29, align 4, !tbaa !2
533 define void @gather_load_div(ptr noalias nocapture %0, ptr noalias nocapture readonly %1) {
534 ; SSE-LABEL: @gather_load_div(
535 ; SSE-NEXT: [[TMP3:%.*]] = load float, ptr [[TMP1:%.*]], align 4, !tbaa [[TBAA0]]
536 ; SSE-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 16
537 ; SSE-NEXT: [[TMP5:%.*]] = load float, ptr [[TMP4]], align 4, !tbaa [[TBAA0]]
538 ; SSE-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 40
539 ; SSE-NEXT: [[TMP7:%.*]] = load float, ptr [[TMP6]], align 4, !tbaa [[TBAA0]]
540 ; SSE-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 52
541 ; SSE-NEXT: [[TMP9:%.*]] = load float, ptr [[TMP8]], align 4, !tbaa [[TBAA0]]
542 ; SSE-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 12
543 ; SSE-NEXT: [[TMP11:%.*]] = load float, ptr [[TMP10]], align 4, !tbaa [[TBAA0]]
544 ; SSE-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 44
545 ; SSE-NEXT: [[TMP13:%.*]] = load float, ptr [[TMP12]], align 4, !tbaa [[TBAA0]]
546 ; SSE-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 56
547 ; SSE-NEXT: [[TMP15:%.*]] = load float, ptr [[TMP14]], align 4, !tbaa [[TBAA0]]
548 ; SSE-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 176
549 ; SSE-NEXT: [[TMP17:%.*]] = load float, ptr [[TMP16]], align 4, !tbaa [[TBAA0]]
550 ; SSE-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[TMP0:%.*]], i64 16
551 ; SSE-NEXT: [[TMP19:%.*]] = insertelement <4 x float> poison, float [[TMP3]], i64 0
552 ; SSE-NEXT: [[TMP20:%.*]] = insertelement <4 x float> [[TMP19]], float [[TMP7]], i64 1
553 ; SSE-NEXT: [[TMP21:%.*]] = insertelement <4 x float> [[TMP20]], float [[TMP11]], i64 2
554 ; SSE-NEXT: [[TMP22:%.*]] = insertelement <4 x float> [[TMP21]], float [[TMP15]], i64 3
555 ; SSE-NEXT: [[TMP23:%.*]] = insertelement <4 x float> poison, float [[TMP5]], i64 0
556 ; SSE-NEXT: [[TMP24:%.*]] = insertelement <4 x float> [[TMP23]], float [[TMP9]], i64 1
557 ; SSE-NEXT: [[TMP25:%.*]] = insertelement <4 x float> [[TMP24]], float [[TMP13]], i64 2
558 ; SSE-NEXT: [[TMP26:%.*]] = insertelement <4 x float> [[TMP25]], float [[TMP17]], i64 3
559 ; SSE-NEXT: [[TMP27:%.*]] = fdiv <4 x float> [[TMP22]], [[TMP26]]
560 ; SSE-NEXT: store <4 x float> [[TMP27]], ptr [[TMP0]], align 4, !tbaa [[TBAA0]]
561 ; SSE-NEXT: [[TMP28:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 68
562 ; SSE-NEXT: [[TMP29:%.*]] = load float, ptr [[TMP28]], align 4, !tbaa [[TBAA0]]
563 ; SSE-NEXT: [[TMP30:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 132
564 ; SSE-NEXT: [[TMP31:%.*]] = load float, ptr [[TMP30]], align 4, !tbaa [[TBAA0]]
565 ; SSE-NEXT: [[TMP32:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 32
566 ; SSE-NEXT: [[TMP33:%.*]] = load float, ptr [[TMP32]], align 4, !tbaa [[TBAA0]]
567 ; SSE-NEXT: [[TMP34:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 120
568 ; SSE-NEXT: [[TMP35:%.*]] = load float, ptr [[TMP34]], align 4, !tbaa [[TBAA0]]
569 ; SSE-NEXT: [[TMP36:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 20
570 ; SSE-NEXT: [[TMP37:%.*]] = load float, ptr [[TMP36]], align 4, !tbaa [[TBAA0]]
571 ; SSE-NEXT: [[TMP38:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 108
572 ; SSE-NEXT: [[TMP39:%.*]] = load float, ptr [[TMP38]], align 4, !tbaa [[TBAA0]]
573 ; SSE-NEXT: [[TMP40:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 80
574 ; SSE-NEXT: [[TMP41:%.*]] = load float, ptr [[TMP40]], align 4, !tbaa [[TBAA0]]
575 ; SSE-NEXT: [[TMP42:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 92
576 ; SSE-NEXT: [[TMP43:%.*]] = load float, ptr [[TMP42]], align 4, !tbaa [[TBAA0]]
577 ; SSE-NEXT: [[TMP44:%.*]] = insertelement <4 x float> poison, float [[TMP29]], i64 0
578 ; SSE-NEXT: [[TMP45:%.*]] = insertelement <4 x float> [[TMP44]], float [[TMP33]], i64 1
579 ; SSE-NEXT: [[TMP46:%.*]] = insertelement <4 x float> [[TMP45]], float [[TMP37]], i64 2
580 ; SSE-NEXT: [[TMP47:%.*]] = insertelement <4 x float> [[TMP46]], float [[TMP41]], i64 3
581 ; SSE-NEXT: [[TMP48:%.*]] = insertelement <4 x float> poison, float [[TMP31]], i64 0
582 ; SSE-NEXT: [[TMP49:%.*]] = insertelement <4 x float> [[TMP48]], float [[TMP35]], i64 1
583 ; SSE-NEXT: [[TMP50:%.*]] = insertelement <4 x float> [[TMP49]], float [[TMP39]], i64 2
584 ; SSE-NEXT: [[TMP51:%.*]] = insertelement <4 x float> [[TMP50]], float [[TMP43]], i64 3
585 ; SSE-NEXT: [[TMP52:%.*]] = fdiv <4 x float> [[TMP47]], [[TMP51]]
586 ; SSE-NEXT: store <4 x float> [[TMP52]], ptr [[TMP18]], align 4, !tbaa [[TBAA0]]
589 ; AVX-LABEL: @gather_load_div(
590 ; AVX-NEXT: [[TMP3:%.*]] = load float, ptr [[TMP1:%.*]], align 4, !tbaa [[TBAA0]]
591 ; AVX-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 16
592 ; AVX-NEXT: [[TMP5:%.*]] = load float, ptr [[TMP4]], align 4, !tbaa [[TBAA0]]
593 ; AVX-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 40
594 ; AVX-NEXT: [[TMP7:%.*]] = load float, ptr [[TMP6]], align 4, !tbaa [[TBAA0]]
595 ; AVX-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 52
596 ; AVX-NEXT: [[TMP9:%.*]] = load float, ptr [[TMP8]], align 4, !tbaa [[TBAA0]]
597 ; AVX-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 12
598 ; AVX-NEXT: [[TMP11:%.*]] = load float, ptr [[TMP10]], align 4, !tbaa [[TBAA0]]
599 ; AVX-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 44
600 ; AVX-NEXT: [[TMP13:%.*]] = load float, ptr [[TMP12]], align 4, !tbaa [[TBAA0]]
601 ; AVX-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 56
602 ; AVX-NEXT: [[TMP15:%.*]] = load float, ptr [[TMP14]], align 4, !tbaa [[TBAA0]]
603 ; AVX-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 176
604 ; AVX-NEXT: [[TMP17:%.*]] = load float, ptr [[TMP16]], align 4, !tbaa [[TBAA0]]
605 ; AVX-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 68
606 ; AVX-NEXT: [[TMP19:%.*]] = load float, ptr [[TMP18]], align 4, !tbaa [[TBAA0]]
607 ; AVX-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 132
608 ; AVX-NEXT: [[TMP21:%.*]] = load float, ptr [[TMP20]], align 4, !tbaa [[TBAA0]]
609 ; AVX-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 32
610 ; AVX-NEXT: [[TMP23:%.*]] = load float, ptr [[TMP22]], align 4, !tbaa [[TBAA0]]
611 ; AVX-NEXT: [[TMP24:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 120
612 ; AVX-NEXT: [[TMP25:%.*]] = load float, ptr [[TMP24]], align 4, !tbaa [[TBAA0]]
613 ; AVX-NEXT: [[TMP26:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 20
614 ; AVX-NEXT: [[TMP27:%.*]] = load float, ptr [[TMP26]], align 4, !tbaa [[TBAA0]]
615 ; AVX-NEXT: [[TMP28:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 108
616 ; AVX-NEXT: [[TMP29:%.*]] = load float, ptr [[TMP28]], align 4, !tbaa [[TBAA0]]
617 ; AVX-NEXT: [[TMP30:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 80
618 ; AVX-NEXT: [[TMP31:%.*]] = load float, ptr [[TMP30]], align 4, !tbaa [[TBAA0]]
619 ; AVX-NEXT: [[TMP32:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 92
620 ; AVX-NEXT: [[TMP33:%.*]] = load float, ptr [[TMP32]], align 4, !tbaa [[TBAA0]]
621 ; AVX-NEXT: [[TMP34:%.*]] = insertelement <8 x float> poison, float [[TMP3]], i64 0
622 ; AVX-NEXT: [[TMP35:%.*]] = insertelement <8 x float> [[TMP34]], float [[TMP7]], i64 1
623 ; AVX-NEXT: [[TMP36:%.*]] = insertelement <8 x float> [[TMP35]], float [[TMP11]], i64 2
624 ; AVX-NEXT: [[TMP37:%.*]] = insertelement <8 x float> [[TMP36]], float [[TMP15]], i64 3
625 ; AVX-NEXT: [[TMP38:%.*]] = insertelement <8 x float> [[TMP37]], float [[TMP19]], i64 4
626 ; AVX-NEXT: [[TMP39:%.*]] = insertelement <8 x float> [[TMP38]], float [[TMP23]], i64 5
627 ; AVX-NEXT: [[TMP40:%.*]] = insertelement <8 x float> [[TMP39]], float [[TMP27]], i64 6
628 ; AVX-NEXT: [[TMP41:%.*]] = insertelement <8 x float> [[TMP40]], float [[TMP31]], i64 7
629 ; AVX-NEXT: [[TMP42:%.*]] = insertelement <8 x float> poison, float [[TMP5]], i64 0
630 ; AVX-NEXT: [[TMP43:%.*]] = insertelement <8 x float> [[TMP42]], float [[TMP9]], i64 1
631 ; AVX-NEXT: [[TMP44:%.*]] = insertelement <8 x float> [[TMP43]], float [[TMP13]], i64 2
632 ; AVX-NEXT: [[TMP45:%.*]] = insertelement <8 x float> [[TMP44]], float [[TMP17]], i64 3
633 ; AVX-NEXT: [[TMP46:%.*]] = insertelement <8 x float> [[TMP45]], float [[TMP21]], i64 4
634 ; AVX-NEXT: [[TMP47:%.*]] = insertelement <8 x float> [[TMP46]], float [[TMP25]], i64 5
635 ; AVX-NEXT: [[TMP48:%.*]] = insertelement <8 x float> [[TMP47]], float [[TMP29]], i64 6
636 ; AVX-NEXT: [[TMP49:%.*]] = insertelement <8 x float> [[TMP48]], float [[TMP33]], i64 7
637 ; AVX-NEXT: [[TMP50:%.*]] = fdiv <8 x float> [[TMP41]], [[TMP49]]
638 ; AVX-NEXT: store <8 x float> [[TMP50]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]]
641 ; AVX2-LABEL: @gather_load_div(
642 ; AVX2-NEXT: [[TMP3:%.*]] = load float, ptr [[TMP1:%.*]], align 4, !tbaa [[TBAA0]]
643 ; AVX2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 16
644 ; AVX2-NEXT: [[TMP5:%.*]] = load float, ptr [[TMP4]], align 4, !tbaa [[TBAA0]]
645 ; AVX2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 40
646 ; AVX2-NEXT: [[TMP7:%.*]] = load float, ptr [[TMP6]], align 4, !tbaa [[TBAA0]]
647 ; AVX2-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 52
648 ; AVX2-NEXT: [[TMP9:%.*]] = load float, ptr [[TMP8]], align 4, !tbaa [[TBAA0]]
649 ; AVX2-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 12
650 ; AVX2-NEXT: [[TMP11:%.*]] = load float, ptr [[TMP10]], align 4, !tbaa [[TBAA0]]
651 ; AVX2-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 44
652 ; AVX2-NEXT: [[TMP13:%.*]] = load float, ptr [[TMP12]], align 4, !tbaa [[TBAA0]]
653 ; AVX2-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 56
654 ; AVX2-NEXT: [[TMP15:%.*]] = load float, ptr [[TMP14]], align 4, !tbaa [[TBAA0]]
655 ; AVX2-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 176
656 ; AVX2-NEXT: [[TMP17:%.*]] = load float, ptr [[TMP16]], align 4, !tbaa [[TBAA0]]
657 ; AVX2-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 68
658 ; AVX2-NEXT: [[TMP19:%.*]] = load float, ptr [[TMP18]], align 4, !tbaa [[TBAA0]]
659 ; AVX2-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 132
660 ; AVX2-NEXT: [[TMP21:%.*]] = load float, ptr [[TMP20]], align 4, !tbaa [[TBAA0]]
661 ; AVX2-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 32
662 ; AVX2-NEXT: [[TMP23:%.*]] = load float, ptr [[TMP22]], align 4, !tbaa [[TBAA0]]
663 ; AVX2-NEXT: [[TMP24:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 120
664 ; AVX2-NEXT: [[TMP25:%.*]] = load float, ptr [[TMP24]], align 4, !tbaa [[TBAA0]]
665 ; AVX2-NEXT: [[TMP26:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 20
666 ; AVX2-NEXT: [[TMP27:%.*]] = load float, ptr [[TMP26]], align 4, !tbaa [[TBAA0]]
667 ; AVX2-NEXT: [[TMP28:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 108
668 ; AVX2-NEXT: [[TMP29:%.*]] = load float, ptr [[TMP28]], align 4, !tbaa [[TBAA0]]
669 ; AVX2-NEXT: [[TMP30:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 80
670 ; AVX2-NEXT: [[TMP31:%.*]] = load float, ptr [[TMP30]], align 4, !tbaa [[TBAA0]]
671 ; AVX2-NEXT: [[TMP32:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 92
672 ; AVX2-NEXT: [[TMP33:%.*]] = load float, ptr [[TMP32]], align 4, !tbaa [[TBAA0]]
673 ; AVX2-NEXT: [[TMP34:%.*]] = insertelement <8 x float> poison, float [[TMP3]], i64 0
674 ; AVX2-NEXT: [[TMP35:%.*]] = insertelement <8 x float> [[TMP34]], float [[TMP7]], i64 1
675 ; AVX2-NEXT: [[TMP36:%.*]] = insertelement <8 x float> [[TMP35]], float [[TMP11]], i64 2
676 ; AVX2-NEXT: [[TMP37:%.*]] = insertelement <8 x float> [[TMP36]], float [[TMP15]], i64 3
677 ; AVX2-NEXT: [[TMP38:%.*]] = insertelement <8 x float> [[TMP37]], float [[TMP19]], i64 4
678 ; AVX2-NEXT: [[TMP39:%.*]] = insertelement <8 x float> [[TMP38]], float [[TMP23]], i64 5
679 ; AVX2-NEXT: [[TMP40:%.*]] = insertelement <8 x float> [[TMP39]], float [[TMP27]], i64 6
680 ; AVX2-NEXT: [[TMP41:%.*]] = insertelement <8 x float> [[TMP40]], float [[TMP31]], i64 7
681 ; AVX2-NEXT: [[TMP42:%.*]] = insertelement <8 x float> poison, float [[TMP5]], i64 0
682 ; AVX2-NEXT: [[TMP43:%.*]] = insertelement <8 x float> [[TMP42]], float [[TMP9]], i64 1
683 ; AVX2-NEXT: [[TMP44:%.*]] = insertelement <8 x float> [[TMP43]], float [[TMP13]], i64 2
684 ; AVX2-NEXT: [[TMP45:%.*]] = insertelement <8 x float> [[TMP44]], float [[TMP17]], i64 3
685 ; AVX2-NEXT: [[TMP46:%.*]] = insertelement <8 x float> [[TMP45]], float [[TMP21]], i64 4
686 ; AVX2-NEXT: [[TMP47:%.*]] = insertelement <8 x float> [[TMP46]], float [[TMP25]], i64 5
687 ; AVX2-NEXT: [[TMP48:%.*]] = insertelement <8 x float> [[TMP47]], float [[TMP29]], i64 6
688 ; AVX2-NEXT: [[TMP49:%.*]] = insertelement <8 x float> [[TMP48]], float [[TMP33]], i64 7
689 ; AVX2-NEXT: [[TMP50:%.*]] = fdiv <8 x float> [[TMP41]], [[TMP49]]
690 ; AVX2-NEXT: store <8 x float> [[TMP50]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]]
691 ; AVX2-NEXT: ret void
693 ; AVX512F-LABEL: @gather_load_div(
694 ; AVX512F-NEXT: [[TMP3:%.*]] = insertelement <8 x ptr> poison, ptr [[TMP1:%.*]], i64 0
695 ; AVX512F-NEXT: [[TMP4:%.*]] = shufflevector <8 x ptr> [[TMP3]], <8 x ptr> poison, <8 x i32> zeroinitializer
696 ; AVX512F-NEXT: [[TMP5:%.*]] = getelementptr float, <8 x ptr> [[TMP4]], <8 x i64> <i64 4, i64 13, i64 11, i64 44, i64 33, i64 30, i64 27, i64 23>
697 ; AVX512F-NEXT: [[TMP6:%.*]] = getelementptr float, <8 x ptr> [[TMP4]], <8 x i64> <i64 0, i64 10, i64 3, i64 14, i64 17, i64 8, i64 5, i64 20>
698 ; AVX512F-NEXT: [[TMP7:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP6]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x float> poison), !tbaa [[TBAA0]]
699 ; AVX512F-NEXT: [[TMP8:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP5]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x float> poison), !tbaa [[TBAA0]]
700 ; AVX512F-NEXT: [[TMP9:%.*]] = fdiv <8 x float> [[TMP7]], [[TMP8]]
701 ; AVX512F-NEXT: store <8 x float> [[TMP9]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]]
702 ; AVX512F-NEXT: ret void
704 ; AVX512VL-LABEL: @gather_load_div(
705 ; AVX512VL-NEXT: [[TMP3:%.*]] = insertelement <8 x ptr> poison, ptr [[TMP1:%.*]], i64 0
706 ; AVX512VL-NEXT: [[TMP4:%.*]] = shufflevector <8 x ptr> [[TMP3]], <8 x ptr> poison, <8 x i32> zeroinitializer
707 ; AVX512VL-NEXT: [[TMP5:%.*]] = getelementptr float, <8 x ptr> [[TMP4]], <8 x i64> <i64 4, i64 13, i64 11, i64 44, i64 33, i64 30, i64 27, i64 23>
708 ; AVX512VL-NEXT: [[TMP6:%.*]] = getelementptr float, <8 x ptr> [[TMP4]], <8 x i64> <i64 0, i64 10, i64 3, i64 14, i64 17, i64 8, i64 5, i64 20>
709 ; AVX512VL-NEXT: [[TMP7:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP6]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x float> poison), !tbaa [[TBAA0]]
710 ; AVX512VL-NEXT: [[TMP8:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP5]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x float> poison), !tbaa [[TBAA0]]
711 ; AVX512VL-NEXT: [[TMP9:%.*]] = fdiv <8 x float> [[TMP7]], [[TMP8]]
712 ; AVX512VL-NEXT: store <8 x float> [[TMP9]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]]
713 ; AVX512VL-NEXT: ret void
715 %3 = load float, ptr %1, align 4, !tbaa !2
716 %4 = getelementptr inbounds float, ptr %1, i64 4
717 %5 = load float, ptr %4, align 4, !tbaa !2
718 %6 = fdiv float %3, %5
719 %7 = getelementptr inbounds float, ptr %0, i64 1
720 store float %6, ptr %0, align 4, !tbaa !2
721 %8 = getelementptr inbounds float, ptr %1, i64 10
722 %9 = load float, ptr %8, align 4, !tbaa !2
723 %10 = getelementptr inbounds float, ptr %1, i64 13
724 %11 = load float, ptr %10, align 4, !tbaa !2
725 %12 = fdiv float %9, %11
726 %13 = getelementptr inbounds float, ptr %0, i64 2
727 store float %12, ptr %7, align 4, !tbaa !2
728 %14 = getelementptr inbounds float, ptr %1, i64 3
729 %15 = load float, ptr %14, align 4, !tbaa !2
730 %16 = getelementptr inbounds float, ptr %1, i64 11
731 %17 = load float, ptr %16, align 4, !tbaa !2
732 %18 = fdiv float %15, %17
733 %19 = getelementptr inbounds float, ptr %0, i64 3
734 store float %18, ptr %13, align 4, !tbaa !2
735 %20 = getelementptr inbounds float, ptr %1, i64 14
736 %21 = load float, ptr %20, align 4, !tbaa !2
737 %22 = getelementptr inbounds float, ptr %1, i64 44
738 %23 = load float, ptr %22, align 4, !tbaa !2
739 %24 = fdiv float %21, %23
740 %25 = getelementptr inbounds float, ptr %0, i64 4
741 store float %24, ptr %19, align 4, !tbaa !2
742 %26 = getelementptr inbounds float, ptr %1, i64 17
743 %27 = load float, ptr %26, align 4, !tbaa !2
744 %28 = getelementptr inbounds float, ptr %1, i64 33
745 %29 = load float, ptr %28, align 4, !tbaa !2
746 %30 = fdiv float %27, %29
747 %31 = getelementptr inbounds float, ptr %0, i64 5
748 store float %30, ptr %25, align 4, !tbaa !2
749 %32 = getelementptr inbounds float, ptr %1, i64 8
750 %33 = load float, ptr %32, align 4, !tbaa !2
751 %34 = getelementptr inbounds float, ptr %1, i64 30
752 %35 = load float, ptr %34, align 4, !tbaa !2
753 %36 = fdiv float %33, %35
754 %37 = getelementptr inbounds float, ptr %0, i64 6
755 store float %36, ptr %31, align 4, !tbaa !2
756 %38 = getelementptr inbounds float, ptr %1, i64 5
757 %39 = load float, ptr %38, align 4, !tbaa !2
758 %40 = getelementptr inbounds float, ptr %1, i64 27
759 %41 = load float, ptr %40, align 4, !tbaa !2
760 %42 = fdiv float %39, %41
761 %43 = getelementptr inbounds float, ptr %0, i64 7
762 store float %42, ptr %37, align 4, !tbaa !2
763 %44 = getelementptr inbounds float, ptr %1, i64 20
764 %45 = load float, ptr %44, align 4, !tbaa !2
765 %46 = getelementptr inbounds float, ptr %1, i64 23
766 %47 = load float, ptr %46, align 4, !tbaa !2
767 %48 = fdiv float %45, %47
768 store float %48, ptr %43, align 4, !tbaa !2
772 !2 = !{!3, !3, i64 0}
773 !3 = !{!"short", !4, i64 0}
774 !4 = !{!"omnipotent char", !5, i64 0}
775 !5 = !{!"Simple C++ TBAA"}