1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt < %s -passes=slp-vectorizer,instcombine -S -mtriple=x86_64-unknown-linux -mattr=+sse2 | FileCheck %s --check-prefixes=SSE
3 ; RUN: opt < %s -passes=slp-vectorizer,instcombine -S -mtriple=x86_64-unknown-linux -mattr=+avx | FileCheck %s --check-prefixes=AVX
4 ; RUN: opt < %s -passes=slp-vectorizer,instcombine -S -mtriple=x86_64-unknown-linux -mattr=+avx2 | FileCheck %s --check-prefixes=AVX2
5 ; RUN: opt < %s -passes=slp-vectorizer,instcombine -S -mtriple=x86_64-unknown-linux -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512F
6 ; RUN: opt < %s -passes=slp-vectorizer,instcombine -S -mtriple=x86_64-unknown-linux -mattr=+avx512vl | FileCheck %s --check-prefixes=AVX512VL
8 define void @gather_load(ptr noalias nocapture %0, ptr noalias nocapture readonly %1) {
9 ; SSE-LABEL: @gather_load(
10 ; SSE-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1:%.*]], i64 4
11 ; SSE-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[TBAA0:![0-9]+]]
12 ; SSE-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 44
13 ; SSE-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4, !tbaa [[TBAA0]]
14 ; SSE-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 16
15 ; SSE-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !tbaa [[TBAA0]]
16 ; SSE-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[TBAA0]]
17 ; SSE-NEXT: [[TMP10:%.*]] = insertelement <4 x i32> poison, i32 [[TMP4]], i64 0
18 ; SSE-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> [[TMP10]], i32 [[TMP6]], i64 1
19 ; SSE-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP8]], i64 2
20 ; SSE-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[TMP9]], i64 3
21 ; SSE-NEXT: [[TMP14:%.*]] = add nsw <4 x i32> [[TMP13]], <i32 1, i32 2, i32 3, i32 4>
22 ; SSE-NEXT: store <4 x i32> [[TMP14]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]]
25 ; AVX-LABEL: @gather_load(
26 ; AVX-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1:%.*]], i64 4
27 ; AVX-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[TBAA0:![0-9]+]]
28 ; AVX-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 44
29 ; AVX-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4, !tbaa [[TBAA0]]
30 ; AVX-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 16
31 ; AVX-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !tbaa [[TBAA0]]
32 ; AVX-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[TBAA0]]
33 ; AVX-NEXT: [[TMP10:%.*]] = insertelement <4 x i32> poison, i32 [[TMP4]], i64 0
34 ; AVX-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> [[TMP10]], i32 [[TMP6]], i64 1
35 ; AVX-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP8]], i64 2
36 ; AVX-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[TMP9]], i64 3
37 ; AVX-NEXT: [[TMP14:%.*]] = add nsw <4 x i32> [[TMP13]], <i32 1, i32 2, i32 3, i32 4>
38 ; AVX-NEXT: store <4 x i32> [[TMP14]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]]
41 ; AVX2-LABEL: @gather_load(
42 ; AVX2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1:%.*]], i64 4
43 ; AVX2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[TBAA0:![0-9]+]]
44 ; AVX2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 44
45 ; AVX2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4, !tbaa [[TBAA0]]
46 ; AVX2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 16
47 ; AVX2-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !tbaa [[TBAA0]]
48 ; AVX2-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[TBAA0]]
49 ; AVX2-NEXT: [[TMP10:%.*]] = insertelement <4 x i32> poison, i32 [[TMP4]], i64 0
50 ; AVX2-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> [[TMP10]], i32 [[TMP6]], i64 1
51 ; AVX2-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP8]], i64 2
52 ; AVX2-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[TMP9]], i64 3
53 ; AVX2-NEXT: [[TMP14:%.*]] = add nsw <4 x i32> [[TMP13]], <i32 1, i32 2, i32 3, i32 4>
54 ; AVX2-NEXT: store <4 x i32> [[TMP14]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]]
57 ; AVX512F-LABEL: @gather_load(
58 ; AVX512F-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1:%.*]], i64 4
59 ; AVX512F-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[TBAA0:![0-9]+]]
60 ; AVX512F-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 44
61 ; AVX512F-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4, !tbaa [[TBAA0]]
62 ; AVX512F-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 16
63 ; AVX512F-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !tbaa [[TBAA0]]
64 ; AVX512F-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[TBAA0]]
65 ; AVX512F-NEXT: [[TMP10:%.*]] = insertelement <4 x i32> poison, i32 [[TMP4]], i64 0
66 ; AVX512F-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> [[TMP10]], i32 [[TMP6]], i64 1
67 ; AVX512F-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP8]], i64 2
68 ; AVX512F-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[TMP9]], i64 3
69 ; AVX512F-NEXT: [[TMP14:%.*]] = add nsw <4 x i32> [[TMP13]], <i32 1, i32 2, i32 3, i32 4>
70 ; AVX512F-NEXT: store <4 x i32> [[TMP14]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]]
71 ; AVX512F-NEXT: ret void
73 ; AVX512VL-LABEL: @gather_load(
74 ; AVX512VL-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1:%.*]], i64 4
75 ; AVX512VL-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[TBAA0:![0-9]+]]
76 ; AVX512VL-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 44
77 ; AVX512VL-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4, !tbaa [[TBAA0]]
78 ; AVX512VL-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 16
79 ; AVX512VL-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !tbaa [[TBAA0]]
80 ; AVX512VL-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[TBAA0]]
81 ; AVX512VL-NEXT: [[TMP10:%.*]] = insertelement <4 x i32> poison, i32 [[TMP4]], i64 0
82 ; AVX512VL-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> [[TMP10]], i32 [[TMP6]], i64 1
83 ; AVX512VL-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP8]], i64 2
84 ; AVX512VL-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[TMP9]], i64 3
85 ; AVX512VL-NEXT: [[TMP14:%.*]] = add nsw <4 x i32> [[TMP13]], <i32 1, i32 2, i32 3, i32 4>
86 ; AVX512VL-NEXT: store <4 x i32> [[TMP14]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]]
87 ; AVX512VL-NEXT: ret void
89 %3 = getelementptr inbounds i32, ptr %1, i64 1
90 %4 = load i32, ptr %1, align 4, !tbaa !2
91 %5 = getelementptr inbounds i32, ptr %0, i64 1
92 %6 = getelementptr inbounds i32, ptr %1, i64 11
93 %7 = load i32, ptr %6, align 4, !tbaa !2
94 %8 = getelementptr inbounds i32, ptr %0, i64 2
95 %9 = getelementptr inbounds i32, ptr %1, i64 4
96 %10 = load i32, ptr %9, align 4, !tbaa !2
97 %11 = getelementptr inbounds i32, ptr %0, i64 3
98 %12 = load i32, ptr %3, align 4, !tbaa !2
99 %13 = insertelement <4 x i32> undef, i32 %4, i32 0
100 %14 = insertelement <4 x i32> %13, i32 %7, i32 1
101 %15 = insertelement <4 x i32> %14, i32 %10, i32 2
102 %16 = insertelement <4 x i32> %15, i32 %12, i32 3
103 %17 = add nsw <4 x i32> %16, <i32 1, i32 2, i32 3, i32 4>
104 store <4 x i32> %17, ptr %0, align 4, !tbaa !2
108 define void @gather_load_2(ptr noalias nocapture %0, ptr noalias nocapture readonly %1) {
109 ; SSE-LABEL: @gather_load_2(
110 ; SSE-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1:%.*]], i64 4
111 ; SSE-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[TBAA0]]
112 ; SSE-NEXT: [[TMP5:%.*]] = add nsw i32 [[TMP4]], 1
113 ; SSE-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP0:%.*]], i64 4
114 ; SSE-NEXT: store i32 [[TMP5]], ptr [[TMP0]], align 4, !tbaa [[TBAA0]]
115 ; SSE-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 40
116 ; SSE-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !tbaa [[TBAA0]]
117 ; SSE-NEXT: [[TMP9:%.*]] = add nsw i32 [[TMP8]], 2
118 ; SSE-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 8
119 ; SSE-NEXT: store i32 [[TMP9]], ptr [[TMP6]], align 4, !tbaa [[TBAA0]]
120 ; SSE-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 12
121 ; SSE-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4, !tbaa [[TBAA0]]
122 ; SSE-NEXT: [[TMP13:%.*]] = add nsw i32 [[TMP12]], 3
123 ; SSE-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 12
124 ; SSE-NEXT: store i32 [[TMP13]], ptr [[TMP10]], align 4, !tbaa [[TBAA0]]
125 ; SSE-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 20
126 ; SSE-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4, !tbaa [[TBAA0]]
127 ; SSE-NEXT: [[TMP17:%.*]] = add nsw i32 [[TMP16]], 4
128 ; SSE-NEXT: store i32 [[TMP17]], ptr [[TMP14]], align 4, !tbaa [[TBAA0]]
131 ; AVX-LABEL: @gather_load_2(
132 ; AVX-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1:%.*]], i64 4
133 ; AVX-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[TBAA0]]
134 ; AVX-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 40
135 ; AVX-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4, !tbaa [[TBAA0]]
136 ; AVX-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 12
137 ; AVX-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !tbaa [[TBAA0]]
138 ; AVX-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 20
139 ; AVX-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !tbaa [[TBAA0]]
140 ; AVX-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> poison, i32 [[TMP4]], i64 0
141 ; AVX-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP6]], i64 1
142 ; AVX-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[TMP8]], i64 2
143 ; AVX-NEXT: [[TMP14:%.*]] = insertelement <4 x i32> [[TMP13]], i32 [[TMP10]], i64 3
144 ; AVX-NEXT: [[TMP15:%.*]] = add nsw <4 x i32> [[TMP14]], <i32 1, i32 2, i32 3, i32 4>
145 ; AVX-NEXT: store <4 x i32> [[TMP15]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]]
148 ; AVX2-LABEL: @gather_load_2(
149 ; AVX2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1:%.*]], i64 4
150 ; AVX2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[TBAA0]]
151 ; AVX2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 40
152 ; AVX2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4, !tbaa [[TBAA0]]
153 ; AVX2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 12
154 ; AVX2-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !tbaa [[TBAA0]]
155 ; AVX2-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 20
156 ; AVX2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !tbaa [[TBAA0]]
157 ; AVX2-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> poison, i32 [[TMP4]], i64 0
158 ; AVX2-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP6]], i64 1
159 ; AVX2-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[TMP8]], i64 2
160 ; AVX2-NEXT: [[TMP14:%.*]] = insertelement <4 x i32> [[TMP13]], i32 [[TMP10]], i64 3
161 ; AVX2-NEXT: [[TMP15:%.*]] = add nsw <4 x i32> [[TMP14]], <i32 1, i32 2, i32 3, i32 4>
162 ; AVX2-NEXT: store <4 x i32> [[TMP15]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]]
163 ; AVX2-NEXT: ret void
165 ; AVX512F-LABEL: @gather_load_2(
166 ; AVX512F-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1:%.*]], i64 4
167 ; AVX512F-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[TBAA0]]
168 ; AVX512F-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 40
169 ; AVX512F-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4, !tbaa [[TBAA0]]
170 ; AVX512F-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 12
171 ; AVX512F-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !tbaa [[TBAA0]]
172 ; AVX512F-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 20
173 ; AVX512F-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !tbaa [[TBAA0]]
174 ; AVX512F-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> poison, i32 [[TMP4]], i64 0
175 ; AVX512F-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP6]], i64 1
176 ; AVX512F-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[TMP8]], i64 2
177 ; AVX512F-NEXT: [[TMP14:%.*]] = insertelement <4 x i32> [[TMP13]], i32 [[TMP10]], i64 3
178 ; AVX512F-NEXT: [[TMP15:%.*]] = add nsw <4 x i32> [[TMP14]], <i32 1, i32 2, i32 3, i32 4>
179 ; AVX512F-NEXT: store <4 x i32> [[TMP15]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]]
180 ; AVX512F-NEXT: ret void
182 ; AVX512VL-LABEL: @gather_load_2(
183 ; AVX512VL-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1:%.*]], i64 4
184 ; AVX512VL-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[TBAA0]]
185 ; AVX512VL-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 40
186 ; AVX512VL-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4, !tbaa [[TBAA0]]
187 ; AVX512VL-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 12
188 ; AVX512VL-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !tbaa [[TBAA0]]
189 ; AVX512VL-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 20
190 ; AVX512VL-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !tbaa [[TBAA0]]
191 ; AVX512VL-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> poison, i32 [[TMP4]], i64 0
192 ; AVX512VL-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP6]], i64 1
193 ; AVX512VL-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[TMP8]], i64 2
194 ; AVX512VL-NEXT: [[TMP14:%.*]] = insertelement <4 x i32> [[TMP13]], i32 [[TMP10]], i64 3
195 ; AVX512VL-NEXT: [[TMP15:%.*]] = add nsw <4 x i32> [[TMP14]], <i32 1, i32 2, i32 3, i32 4>
196 ; AVX512VL-NEXT: store <4 x i32> [[TMP15]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]]
197 ; AVX512VL-NEXT: ret void
199 %3 = getelementptr inbounds i32, ptr %1, i64 1
200 %4 = load i32, ptr %3, align 4, !tbaa !2
201 %5 = add nsw i32 %4, 1
202 %6 = getelementptr inbounds i32, ptr %0, i64 1
203 store i32 %5, ptr %0, align 4, !tbaa !2
204 %7 = getelementptr inbounds i32, ptr %1, i64 10
205 %8 = load i32, ptr %7, align 4, !tbaa !2
206 %9 = add nsw i32 %8, 2
207 %10 = getelementptr inbounds i32, ptr %0, i64 2
208 store i32 %9, ptr %6, align 4, !tbaa !2
209 %11 = getelementptr inbounds i32, ptr %1, i64 3
210 %12 = load i32, ptr %11, align 4, !tbaa !2
211 %13 = add nsw i32 %12, 3
212 %14 = getelementptr inbounds i32, ptr %0, i64 3
213 store i32 %13, ptr %10, align 4, !tbaa !2
214 %15 = getelementptr inbounds i32, ptr %1, i64 5
215 %16 = load i32, ptr %15, align 4, !tbaa !2
216 %17 = add nsw i32 %16, 4
217 store i32 %17, ptr %14, align 4, !tbaa !2
222 define void @gather_load_3(ptr noalias nocapture %0, ptr noalias nocapture readonly %1) {
223 ; SSE-LABEL: @gather_load_3(
224 ; SSE-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP1:%.*]], align 4, !tbaa [[TBAA0]]
225 ; SSE-NEXT: [[TMP4:%.*]] = add i32 [[TMP3]], 1
226 ; SSE-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP0:%.*]], i64 4
227 ; SSE-NEXT: store i32 [[TMP4]], ptr [[TMP0]], align 4, !tbaa [[TBAA0]]
228 ; SSE-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 44
229 ; SSE-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4, !tbaa [[TBAA0]]
230 ; SSE-NEXT: [[TMP8:%.*]] = add i32 [[TMP7]], 2
231 ; SSE-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 8
232 ; SSE-NEXT: store i32 [[TMP8]], ptr [[TMP5]], align 4, !tbaa [[TBAA0]]
233 ; SSE-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 16
234 ; SSE-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4, !tbaa [[TBAA0]]
235 ; SSE-NEXT: [[TMP12:%.*]] = add i32 [[TMP11]], 3
236 ; SSE-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 12
237 ; SSE-NEXT: store i32 [[TMP12]], ptr [[TMP9]], align 4, !tbaa [[TBAA0]]
238 ; SSE-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 60
239 ; SSE-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4, !tbaa [[TBAA0]]
240 ; SSE-NEXT: [[TMP16:%.*]] = add i32 [[TMP15]], 4
241 ; SSE-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 16
242 ; SSE-NEXT: store i32 [[TMP16]], ptr [[TMP13]], align 4, !tbaa [[TBAA0]]
243 ; SSE-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 72
244 ; SSE-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4, !tbaa [[TBAA0]]
245 ; SSE-NEXT: [[TMP20:%.*]] = add i32 [[TMP19]], 1
246 ; SSE-NEXT: [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 20
247 ; SSE-NEXT: store i32 [[TMP20]], ptr [[TMP17]], align 4, !tbaa [[TBAA0]]
248 ; SSE-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 36
249 ; SSE-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4, !tbaa [[TBAA0]]
250 ; SSE-NEXT: [[TMP24:%.*]] = add i32 [[TMP23]], 2
251 ; SSE-NEXT: [[TMP25:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 24
252 ; SSE-NEXT: store i32 [[TMP24]], ptr [[TMP21]], align 4, !tbaa [[TBAA0]]
253 ; SSE-NEXT: [[TMP26:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 24
254 ; SSE-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4, !tbaa [[TBAA0]]
255 ; SSE-NEXT: [[TMP28:%.*]] = add i32 [[TMP27]], 3
256 ; SSE-NEXT: [[TMP29:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 28
257 ; SSE-NEXT: store i32 [[TMP28]], ptr [[TMP25]], align 4, !tbaa [[TBAA0]]
258 ; SSE-NEXT: [[TMP30:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 84
259 ; SSE-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4, !tbaa [[TBAA0]]
260 ; SSE-NEXT: [[TMP32:%.*]] = add i32 [[TMP31]], 4
261 ; SSE-NEXT: store i32 [[TMP32]], ptr [[TMP29]], align 4, !tbaa [[TBAA0]]
264 ; AVX-LABEL: @gather_load_3(
265 ; AVX-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP1:%.*]], align 4, !tbaa [[TBAA0]]
266 ; AVX-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 44
267 ; AVX-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4, !tbaa [[TBAA0]]
268 ; AVX-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 16
269 ; AVX-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4, !tbaa [[TBAA0]]
270 ; AVX-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 60
271 ; AVX-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4, !tbaa [[TBAA0]]
272 ; AVX-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 72
273 ; AVX-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4, !tbaa [[TBAA0]]
274 ; AVX-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 36
275 ; AVX-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4, !tbaa [[TBAA0]]
276 ; AVX-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 24
277 ; AVX-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4, !tbaa [[TBAA0]]
278 ; AVX-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 84
279 ; AVX-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4, !tbaa [[TBAA0]]
280 ; AVX-NEXT: [[TMP18:%.*]] = insertelement <8 x i32> poison, i32 [[TMP3]], i64 0
281 ; AVX-NEXT: [[TMP19:%.*]] = insertelement <8 x i32> [[TMP18]], i32 [[TMP5]], i64 1
282 ; AVX-NEXT: [[TMP20:%.*]] = insertelement <8 x i32> [[TMP19]], i32 [[TMP7]], i64 2
283 ; AVX-NEXT: [[TMP21:%.*]] = insertelement <8 x i32> [[TMP20]], i32 [[TMP9]], i64 3
284 ; AVX-NEXT: [[TMP22:%.*]] = insertelement <8 x i32> [[TMP21]], i32 [[TMP11]], i64 4
285 ; AVX-NEXT: [[TMP23:%.*]] = insertelement <8 x i32> [[TMP22]], i32 [[TMP13]], i64 5
286 ; AVX-NEXT: [[TMP24:%.*]] = insertelement <8 x i32> [[TMP23]], i32 [[TMP15]], i64 6
287 ; AVX-NEXT: [[TMP25:%.*]] = insertelement <8 x i32> [[TMP24]], i32 [[TMP17]], i64 7
288 ; AVX-NEXT: [[TMP26:%.*]] = add <8 x i32> [[TMP25]], <i32 1, i32 2, i32 3, i32 4, i32 1, i32 2, i32 3, i32 4>
289 ; AVX-NEXT: store <8 x i32> [[TMP26]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]]
292 ; AVX2-LABEL: @gather_load_3(
293 ; AVX2-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP1:%.*]], align 4, !tbaa [[TBAA0]]
294 ; AVX2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 44
295 ; AVX2-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4, !tbaa [[TBAA0]]
296 ; AVX2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 16
297 ; AVX2-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4, !tbaa [[TBAA0]]
298 ; AVX2-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 60
299 ; AVX2-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4, !tbaa [[TBAA0]]
300 ; AVX2-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 72
301 ; AVX2-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4, !tbaa [[TBAA0]]
302 ; AVX2-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 36
303 ; AVX2-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4, !tbaa [[TBAA0]]
304 ; AVX2-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 24
305 ; AVX2-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4, !tbaa [[TBAA0]]
306 ; AVX2-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 84
307 ; AVX2-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4, !tbaa [[TBAA0]]
308 ; AVX2-NEXT: [[TMP18:%.*]] = insertelement <8 x i32> poison, i32 [[TMP3]], i64 0
309 ; AVX2-NEXT: [[TMP19:%.*]] = insertelement <8 x i32> [[TMP18]], i32 [[TMP5]], i64 1
310 ; AVX2-NEXT: [[TMP20:%.*]] = insertelement <8 x i32> [[TMP19]], i32 [[TMP7]], i64 2
311 ; AVX2-NEXT: [[TMP21:%.*]] = insertelement <8 x i32> [[TMP20]], i32 [[TMP9]], i64 3
312 ; AVX2-NEXT: [[TMP22:%.*]] = insertelement <8 x i32> [[TMP21]], i32 [[TMP11]], i64 4
313 ; AVX2-NEXT: [[TMP23:%.*]] = insertelement <8 x i32> [[TMP22]], i32 [[TMP13]], i64 5
314 ; AVX2-NEXT: [[TMP24:%.*]] = insertelement <8 x i32> [[TMP23]], i32 [[TMP15]], i64 6
315 ; AVX2-NEXT: [[TMP25:%.*]] = insertelement <8 x i32> [[TMP24]], i32 [[TMP17]], i64 7
316 ; AVX2-NEXT: [[TMP26:%.*]] = add <8 x i32> [[TMP25]], <i32 1, i32 2, i32 3, i32 4, i32 1, i32 2, i32 3, i32 4>
317 ; AVX2-NEXT: store <8 x i32> [[TMP26]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]]
318 ; AVX2-NEXT: ret void
320 ; AVX512F-LABEL: @gather_load_3(
321 ; AVX512F-NEXT: [[TMP3:%.*]] = insertelement <8 x ptr> poison, ptr [[TMP1:%.*]], i64 0
322 ; AVX512F-NEXT: [[TMP4:%.*]] = shufflevector <8 x ptr> [[TMP3]], <8 x ptr> poison, <8 x i32> zeroinitializer
323 ; AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i32, <8 x ptr> [[TMP4]], <8 x i64> <i64 0, i64 11, i64 4, i64 15, i64 18, i64 9, i64 6, i64 21>
324 ; AVX512F-NEXT: [[TMP6:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP5]], i32 4, <8 x i1> splat (i1 true), <8 x i32> poison), !tbaa [[TBAA0]]
325 ; AVX512F-NEXT: [[TMP7:%.*]] = add <8 x i32> [[TMP6]], <i32 1, i32 2, i32 3, i32 4, i32 1, i32 2, i32 3, i32 4>
326 ; AVX512F-NEXT: store <8 x i32> [[TMP7]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]]
327 ; AVX512F-NEXT: ret void
329 ; AVX512VL-LABEL: @gather_load_3(
330 ; AVX512VL-NEXT: [[TMP3:%.*]] = insertelement <8 x ptr> poison, ptr [[TMP1:%.*]], i64 0
331 ; AVX512VL-NEXT: [[TMP4:%.*]] = shufflevector <8 x ptr> [[TMP3]], <8 x ptr> poison, <8 x i32> zeroinitializer
332 ; AVX512VL-NEXT: [[TMP5:%.*]] = getelementptr i32, <8 x ptr> [[TMP4]], <8 x i64> <i64 0, i64 11, i64 4, i64 15, i64 18, i64 9, i64 6, i64 21>
333 ; AVX512VL-NEXT: [[TMP6:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP5]], i32 4, <8 x i1> splat (i1 true), <8 x i32> poison), !tbaa [[TBAA0]]
334 ; AVX512VL-NEXT: [[TMP7:%.*]] = add <8 x i32> [[TMP6]], <i32 1, i32 2, i32 3, i32 4, i32 1, i32 2, i32 3, i32 4>
335 ; AVX512VL-NEXT: store <8 x i32> [[TMP7]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]]
336 ; AVX512VL-NEXT: ret void
338 %3 = load i32, ptr %1, align 4, !tbaa !2
340 %5 = getelementptr inbounds i32, ptr %0, i64 1
341 store i32 %4, ptr %0, align 4, !tbaa !2
342 %6 = getelementptr inbounds i32, ptr %1, i64 11
343 %7 = load i32, ptr %6, align 4, !tbaa !2
345 %9 = getelementptr inbounds i32, ptr %0, i64 2
346 store i32 %8, ptr %5, align 4, !tbaa !2
347 %10 = getelementptr inbounds i32, ptr %1, i64 4
348 %11 = load i32, ptr %10, align 4, !tbaa !2
350 %13 = getelementptr inbounds i32, ptr %0, i64 3
351 store i32 %12, ptr %9, align 4, !tbaa !2
352 %14 = getelementptr inbounds i32, ptr %1, i64 15
353 %15 = load i32, ptr %14, align 4, !tbaa !2
355 %17 = getelementptr inbounds i32, ptr %0, i64 4
356 store i32 %16, ptr %13, align 4, !tbaa !2
357 %18 = getelementptr inbounds i32, ptr %1, i64 18
358 %19 = load i32, ptr %18, align 4, !tbaa !2
360 %21 = getelementptr inbounds i32, ptr %0, i64 5
361 store i32 %20, ptr %17, align 4, !tbaa !2
362 %22 = getelementptr inbounds i32, ptr %1, i64 9
363 %23 = load i32, ptr %22, align 4, !tbaa !2
365 %25 = getelementptr inbounds i32, ptr %0, i64 6
366 store i32 %24, ptr %21, align 4, !tbaa !2
367 %26 = getelementptr inbounds i32, ptr %1, i64 6
368 %27 = load i32, ptr %26, align 4, !tbaa !2
370 %29 = getelementptr inbounds i32, ptr %0, i64 7
371 store i32 %28, ptr %25, align 4, !tbaa !2
372 %30 = getelementptr inbounds i32, ptr %1, i64 21
373 %31 = load i32, ptr %30, align 4, !tbaa !2
375 store i32 %32, ptr %29, align 4, !tbaa !2
379 define void @gather_load_4(ptr noalias nocapture %t0, ptr noalias nocapture readonly %t1) {
380 ; SSE-LABEL: @gather_load_4(
381 ; SSE-NEXT: [[T5:%.*]] = getelementptr inbounds i8, ptr [[T0:%.*]], i64 4
382 ; SSE-NEXT: [[T6:%.*]] = getelementptr inbounds i8, ptr [[T1:%.*]], i64 44
383 ; SSE-NEXT: [[T9:%.*]] = getelementptr inbounds i8, ptr [[T0]], i64 8
384 ; SSE-NEXT: [[T10:%.*]] = getelementptr inbounds i8, ptr [[T1]], i64 16
385 ; SSE-NEXT: [[T13:%.*]] = getelementptr inbounds i8, ptr [[T0]], i64 12
386 ; SSE-NEXT: [[T14:%.*]] = getelementptr inbounds i8, ptr [[T1]], i64 60
387 ; SSE-NEXT: [[T17:%.*]] = getelementptr inbounds i8, ptr [[T0]], i64 16
388 ; SSE-NEXT: [[T18:%.*]] = getelementptr inbounds i8, ptr [[T1]], i64 72
389 ; SSE-NEXT: [[T21:%.*]] = getelementptr inbounds i8, ptr [[T0]], i64 20
390 ; SSE-NEXT: [[T22:%.*]] = getelementptr inbounds i8, ptr [[T1]], i64 36
391 ; SSE-NEXT: [[T25:%.*]] = getelementptr inbounds i8, ptr [[T0]], i64 24
392 ; SSE-NEXT: [[T26:%.*]] = getelementptr inbounds i8, ptr [[T1]], i64 24
393 ; SSE-NEXT: [[T29:%.*]] = getelementptr inbounds i8, ptr [[T0]], i64 28
394 ; SSE-NEXT: [[T30:%.*]] = getelementptr inbounds i8, ptr [[T1]], i64 84
395 ; SSE-NEXT: [[T3:%.*]] = load i32, ptr [[T1]], align 4, !tbaa [[TBAA0]]
396 ; SSE-NEXT: [[T7:%.*]] = load i32, ptr [[T6]], align 4, !tbaa [[TBAA0]]
397 ; SSE-NEXT: [[T11:%.*]] = load i32, ptr [[T10]], align 4, !tbaa [[TBAA0]]
398 ; SSE-NEXT: [[T15:%.*]] = load i32, ptr [[T14]], align 4, !tbaa [[TBAA0]]
399 ; SSE-NEXT: [[T19:%.*]] = load i32, ptr [[T18]], align 4, !tbaa [[TBAA0]]
400 ; SSE-NEXT: [[T23:%.*]] = load i32, ptr [[T22]], align 4, !tbaa [[TBAA0]]
401 ; SSE-NEXT: [[T27:%.*]] = load i32, ptr [[T26]], align 4, !tbaa [[TBAA0]]
402 ; SSE-NEXT: [[T31:%.*]] = load i32, ptr [[T30]], align 4, !tbaa [[TBAA0]]
403 ; SSE-NEXT: [[T4:%.*]] = add i32 [[T3]], 1
404 ; SSE-NEXT: [[T8:%.*]] = add i32 [[T7]], 2
405 ; SSE-NEXT: [[T12:%.*]] = add i32 [[T11]], 3
406 ; SSE-NEXT: [[T16:%.*]] = add i32 [[T15]], 4
407 ; SSE-NEXT: [[T20:%.*]] = add i32 [[T19]], 1
408 ; SSE-NEXT: [[T24:%.*]] = add i32 [[T23]], 2
409 ; SSE-NEXT: [[T28:%.*]] = add i32 [[T27]], 3
410 ; SSE-NEXT: [[T32:%.*]] = add i32 [[T31]], 4
411 ; SSE-NEXT: store i32 [[T4]], ptr [[T0]], align 4, !tbaa [[TBAA0]]
412 ; SSE-NEXT: store i32 [[T8]], ptr [[T5]], align 4, !tbaa [[TBAA0]]
413 ; SSE-NEXT: store i32 [[T12]], ptr [[T9]], align 4, !tbaa [[TBAA0]]
414 ; SSE-NEXT: store i32 [[T16]], ptr [[T13]], align 4, !tbaa [[TBAA0]]
415 ; SSE-NEXT: store i32 [[T20]], ptr [[T17]], align 4, !tbaa [[TBAA0]]
416 ; SSE-NEXT: store i32 [[T24]], ptr [[T21]], align 4, !tbaa [[TBAA0]]
417 ; SSE-NEXT: store i32 [[T28]], ptr [[T25]], align 4, !tbaa [[TBAA0]]
418 ; SSE-NEXT: store i32 [[T32]], ptr [[T29]], align 4, !tbaa [[TBAA0]]
421 ; AVX-LABEL: @gather_load_4(
422 ; AVX-NEXT: [[T6:%.*]] = getelementptr inbounds i8, ptr [[T1:%.*]], i64 44
423 ; AVX-NEXT: [[T10:%.*]] = getelementptr inbounds i8, ptr [[T1]], i64 16
424 ; AVX-NEXT: [[T14:%.*]] = getelementptr inbounds i8, ptr [[T1]], i64 60
425 ; AVX-NEXT: [[T18:%.*]] = getelementptr inbounds i8, ptr [[T1]], i64 72
426 ; AVX-NEXT: [[T22:%.*]] = getelementptr inbounds i8, ptr [[T1]], i64 36
427 ; AVX-NEXT: [[T26:%.*]] = getelementptr inbounds i8, ptr [[T1]], i64 24
428 ; AVX-NEXT: [[T30:%.*]] = getelementptr inbounds i8, ptr [[T1]], i64 84
429 ; AVX-NEXT: [[T3:%.*]] = load i32, ptr [[T1]], align 4, !tbaa [[TBAA0]]
430 ; AVX-NEXT: [[T7:%.*]] = load i32, ptr [[T6]], align 4, !tbaa [[TBAA0]]
431 ; AVX-NEXT: [[T11:%.*]] = load i32, ptr [[T10]], align 4, !tbaa [[TBAA0]]
432 ; AVX-NEXT: [[T15:%.*]] = load i32, ptr [[T14]], align 4, !tbaa [[TBAA0]]
433 ; AVX-NEXT: [[T19:%.*]] = load i32, ptr [[T18]], align 4, !tbaa [[TBAA0]]
434 ; AVX-NEXT: [[T23:%.*]] = load i32, ptr [[T22]], align 4, !tbaa [[TBAA0]]
435 ; AVX-NEXT: [[T27:%.*]] = load i32, ptr [[T26]], align 4, !tbaa [[TBAA0]]
436 ; AVX-NEXT: [[T31:%.*]] = load i32, ptr [[T30]], align 4, !tbaa [[TBAA0]]
437 ; AVX-NEXT: [[TMP1:%.*]] = insertelement <8 x i32> poison, i32 [[T3]], i64 0
438 ; AVX-NEXT: [[TMP2:%.*]] = insertelement <8 x i32> [[TMP1]], i32 [[T7]], i64 1
439 ; AVX-NEXT: [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 [[T11]], i64 2
440 ; AVX-NEXT: [[TMP4:%.*]] = insertelement <8 x i32> [[TMP3]], i32 [[T15]], i64 3
441 ; AVX-NEXT: [[TMP5:%.*]] = insertelement <8 x i32> [[TMP4]], i32 [[T19]], i64 4
442 ; AVX-NEXT: [[TMP6:%.*]] = insertelement <8 x i32> [[TMP5]], i32 [[T23]], i64 5
443 ; AVX-NEXT: [[TMP7:%.*]] = insertelement <8 x i32> [[TMP6]], i32 [[T27]], i64 6
444 ; AVX-NEXT: [[TMP8:%.*]] = insertelement <8 x i32> [[TMP7]], i32 [[T31]], i64 7
445 ; AVX-NEXT: [[TMP9:%.*]] = add <8 x i32> [[TMP8]], <i32 1, i32 2, i32 3, i32 4, i32 1, i32 2, i32 3, i32 4>
446 ; AVX-NEXT: store <8 x i32> [[TMP9]], ptr [[T0:%.*]], align 4, !tbaa [[TBAA0]]
449 ; AVX2-LABEL: @gather_load_4(
450 ; AVX2-NEXT: [[T6:%.*]] = getelementptr inbounds i8, ptr [[T1:%.*]], i64 44
451 ; AVX2-NEXT: [[T10:%.*]] = getelementptr inbounds i8, ptr [[T1]], i64 16
452 ; AVX2-NEXT: [[T14:%.*]] = getelementptr inbounds i8, ptr [[T1]], i64 60
453 ; AVX2-NEXT: [[T18:%.*]] = getelementptr inbounds i8, ptr [[T1]], i64 72
454 ; AVX2-NEXT: [[T22:%.*]] = getelementptr inbounds i8, ptr [[T1]], i64 36
455 ; AVX2-NEXT: [[T26:%.*]] = getelementptr inbounds i8, ptr [[T1]], i64 24
456 ; AVX2-NEXT: [[T30:%.*]] = getelementptr inbounds i8, ptr [[T1]], i64 84
457 ; AVX2-NEXT: [[T3:%.*]] = load i32, ptr [[T1]], align 4, !tbaa [[TBAA0]]
458 ; AVX2-NEXT: [[T7:%.*]] = load i32, ptr [[T6]], align 4, !tbaa [[TBAA0]]
459 ; AVX2-NEXT: [[T11:%.*]] = load i32, ptr [[T10]], align 4, !tbaa [[TBAA0]]
460 ; AVX2-NEXT: [[T15:%.*]] = load i32, ptr [[T14]], align 4, !tbaa [[TBAA0]]
461 ; AVX2-NEXT: [[T19:%.*]] = load i32, ptr [[T18]], align 4, !tbaa [[TBAA0]]
462 ; AVX2-NEXT: [[T23:%.*]] = load i32, ptr [[T22]], align 4, !tbaa [[TBAA0]]
463 ; AVX2-NEXT: [[T27:%.*]] = load i32, ptr [[T26]], align 4, !tbaa [[TBAA0]]
464 ; AVX2-NEXT: [[T31:%.*]] = load i32, ptr [[T30]], align 4, !tbaa [[TBAA0]]
465 ; AVX2-NEXT: [[TMP1:%.*]] = insertelement <8 x i32> poison, i32 [[T3]], i64 0
466 ; AVX2-NEXT: [[TMP2:%.*]] = insertelement <8 x i32> [[TMP1]], i32 [[T7]], i64 1
467 ; AVX2-NEXT: [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 [[T11]], i64 2
468 ; AVX2-NEXT: [[TMP4:%.*]] = insertelement <8 x i32> [[TMP3]], i32 [[T15]], i64 3
469 ; AVX2-NEXT: [[TMP5:%.*]] = insertelement <8 x i32> [[TMP4]], i32 [[T19]], i64 4
470 ; AVX2-NEXT: [[TMP6:%.*]] = insertelement <8 x i32> [[TMP5]], i32 [[T23]], i64 5
471 ; AVX2-NEXT: [[TMP7:%.*]] = insertelement <8 x i32> [[TMP6]], i32 [[T27]], i64 6
472 ; AVX2-NEXT: [[TMP8:%.*]] = insertelement <8 x i32> [[TMP7]], i32 [[T31]], i64 7
473 ; AVX2-NEXT: [[TMP9:%.*]] = add <8 x i32> [[TMP8]], <i32 1, i32 2, i32 3, i32 4, i32 1, i32 2, i32 3, i32 4>
474 ; AVX2-NEXT: store <8 x i32> [[TMP9]], ptr [[T0:%.*]], align 4, !tbaa [[TBAA0]]
475 ; AVX2-NEXT: ret void
477 ; AVX512F-LABEL: @gather_load_4(
478 ; AVX512F-NEXT: [[TMP1:%.*]] = insertelement <8 x ptr> poison, ptr [[T1:%.*]], i64 0
479 ; AVX512F-NEXT: [[TMP2:%.*]] = shufflevector <8 x ptr> [[TMP1]], <8 x ptr> poison, <8 x i32> zeroinitializer
480 ; AVX512F-NEXT: [[TMP3:%.*]] = getelementptr i32, <8 x ptr> [[TMP2]], <8 x i64> <i64 0, i64 11, i64 4, i64 15, i64 18, i64 9, i64 6, i64 21>
481 ; AVX512F-NEXT: [[TMP4:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP3]], i32 4, <8 x i1> splat (i1 true), <8 x i32> poison), !tbaa [[TBAA0]]
482 ; AVX512F-NEXT: [[TMP5:%.*]] = add <8 x i32> [[TMP4]], <i32 1, i32 2, i32 3, i32 4, i32 1, i32 2, i32 3, i32 4>
483 ; AVX512F-NEXT: store <8 x i32> [[TMP5]], ptr [[T0:%.*]], align 4, !tbaa [[TBAA0]]
484 ; AVX512F-NEXT: ret void
486 ; AVX512VL-LABEL: @gather_load_4(
487 ; AVX512VL-NEXT: [[TMP1:%.*]] = insertelement <8 x ptr> poison, ptr [[T1:%.*]], i64 0
488 ; AVX512VL-NEXT: [[TMP2:%.*]] = shufflevector <8 x ptr> [[TMP1]], <8 x ptr> poison, <8 x i32> zeroinitializer
489 ; AVX512VL-NEXT: [[TMP3:%.*]] = getelementptr i32, <8 x ptr> [[TMP2]], <8 x i64> <i64 0, i64 11, i64 4, i64 15, i64 18, i64 9, i64 6, i64 21>
490 ; AVX512VL-NEXT: [[TMP4:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP3]], i32 4, <8 x i1> splat (i1 true), <8 x i32> poison), !tbaa [[TBAA0]]
491 ; AVX512VL-NEXT: [[TMP5:%.*]] = add <8 x i32> [[TMP4]], <i32 1, i32 2, i32 3, i32 4, i32 1, i32 2, i32 3, i32 4>
492 ; AVX512VL-NEXT: store <8 x i32> [[TMP5]], ptr [[T0:%.*]], align 4, !tbaa [[TBAA0]]
493 ; AVX512VL-NEXT: ret void
495 %t5 = getelementptr inbounds i32, ptr %t0, i64 1
496 %t6 = getelementptr inbounds i32, ptr %t1, i64 11
497 %t9 = getelementptr inbounds i32, ptr %t0, i64 2
498 %t10 = getelementptr inbounds i32, ptr %t1, i64 4
499 %t13 = getelementptr inbounds i32, ptr %t0, i64 3
500 %t14 = getelementptr inbounds i32, ptr %t1, i64 15
501 %t17 = getelementptr inbounds i32, ptr %t0, i64 4
502 %t18 = getelementptr inbounds i32, ptr %t1, i64 18
503 %t21 = getelementptr inbounds i32, ptr %t0, i64 5
504 %t22 = getelementptr inbounds i32, ptr %t1, i64 9
505 %t25 = getelementptr inbounds i32, ptr %t0, i64 6
506 %t26 = getelementptr inbounds i32, ptr %t1, i64 6
507 %t29 = getelementptr inbounds i32, ptr %t0, i64 7
508 %t30 = getelementptr inbounds i32, ptr %t1, i64 21
510 %t3 = load i32, ptr %t1, align 4, !tbaa !2
511 %t7 = load i32, ptr %t6, align 4, !tbaa !2
512 %t11 = load i32, ptr %t10, align 4, !tbaa !2
513 %t15 = load i32, ptr %t14, align 4, !tbaa !2
514 %t19 = load i32, ptr %t18, align 4, !tbaa !2
515 %t23 = load i32, ptr %t22, align 4, !tbaa !2
516 %t27 = load i32, ptr %t26, align 4, !tbaa !2
517 %t31 = load i32, ptr %t30, align 4, !tbaa !2
521 %t12 = add i32 %t11, 3
522 %t16 = add i32 %t15, 4
523 %t20 = add i32 %t19, 1
524 %t24 = add i32 %t23, 2
525 %t28 = add i32 %t27, 3
526 %t32 = add i32 %t31, 4
528 store i32 %t4, ptr %t0, align 4, !tbaa !2
529 store i32 %t8, ptr %t5, align 4, !tbaa !2
530 store i32 %t12, ptr %t9, align 4, !tbaa !2
531 store i32 %t16, ptr %t13, align 4, !tbaa !2
532 store i32 %t20, ptr %t17, align 4, !tbaa !2
533 store i32 %t24, ptr %t21, align 4, !tbaa !2
534 store i32 %t28, ptr %t25, align 4, !tbaa !2
535 store i32 %t32, ptr %t29, align 4, !tbaa !2
541 define void @gather_load_div(ptr noalias nocapture %0, ptr noalias nocapture readonly %1) {
542 ; SSE-LABEL: @gather_load_div(
543 ; SSE-NEXT: [[TMP3:%.*]] = load float, ptr [[TMP1:%.*]], align 4, !tbaa [[TBAA0]]
544 ; SSE-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 40
545 ; SSE-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 52
546 ; SSE-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 12
547 ; SSE-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 176
548 ; SSE-NEXT: [[TMP8:%.*]] = load float, ptr [[TMP7]], align 4, !tbaa [[TBAA0]]
549 ; SSE-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP0:%.*]], i64 16
550 ; SSE-NEXT: [[TMP10:%.*]] = load <2 x float>, ptr [[TMP6]], align 4, !tbaa [[TBAA0]]
551 ; SSE-NEXT: [[TMP11:%.*]] = load <2 x float>, ptr [[TMP4]], align 4, !tbaa [[TBAA0]]
552 ; SSE-NEXT: [[TMP12:%.*]] = load <2 x float>, ptr [[TMP5]], align 4, !tbaa [[TBAA0]]
553 ; SSE-NEXT: [[TMP13:%.*]] = insertelement <4 x float> poison, float [[TMP3]], i64 0
554 ; SSE-NEXT: [[TMP14:%.*]] = shufflevector <2 x float> [[TMP11]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
555 ; SSE-NEXT: [[TMP15:%.*]] = shufflevector <4 x float> [[TMP13]], <4 x float> [[TMP14]], <4 x i32> <i32 0, i32 4, i32 poison, i32 poison>
556 ; SSE-NEXT: [[TMP16:%.*]] = shufflevector <2 x float> [[TMP10]], <2 x float> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
557 ; SSE-NEXT: [[TMP17:%.*]] = shufflevector <4 x float> [[TMP15]], <4 x float> [[TMP16]], <4 x i32> <i32 0, i32 1, i32 4, i32 poison>
558 ; SSE-NEXT: [[TMP18:%.*]] = shufflevector <2 x float> [[TMP12]], <2 x float> poison, <4 x i32> <i32 poison, i32 1, i32 poison, i32 poison>
559 ; SSE-NEXT: [[TMP19:%.*]] = shufflevector <4 x float> [[TMP17]], <4 x float> [[TMP18]], <4 x i32> <i32 0, i32 1, i32 2, i32 5>
560 ; SSE-NEXT: [[TMP20:%.*]] = shufflevector <2 x float> [[TMP10]], <2 x float> [[TMP12]], <4 x i32> <i32 1, i32 2, i32 poison, i32 poison>
561 ; SSE-NEXT: [[TMP21:%.*]] = shufflevector <4 x float> [[TMP20]], <4 x float> [[TMP14]], <4 x i32> <i32 0, i32 1, i32 5, i32 poison>
562 ; SSE-NEXT: [[TMP22:%.*]] = insertelement <4 x float> [[TMP21]], float [[TMP8]], i64 3
563 ; SSE-NEXT: [[TMP23:%.*]] = fdiv <4 x float> [[TMP19]], [[TMP22]]
564 ; SSE-NEXT: store <4 x float> [[TMP23]], ptr [[TMP0]], align 4, !tbaa [[TBAA0]]
565 ; SSE-NEXT: [[TMP24:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 68
566 ; SSE-NEXT: [[TMP25:%.*]] = load float, ptr [[TMP24]], align 4, !tbaa [[TBAA0]]
567 ; SSE-NEXT: [[TMP26:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 132
568 ; SSE-NEXT: [[TMP27:%.*]] = load float, ptr [[TMP26]], align 4, !tbaa [[TBAA0]]
569 ; SSE-NEXT: [[TMP28:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 32
570 ; SSE-NEXT: [[TMP29:%.*]] = load float, ptr [[TMP28]], align 4, !tbaa [[TBAA0]]
571 ; SSE-NEXT: [[TMP30:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 120
572 ; SSE-NEXT: [[TMP31:%.*]] = load float, ptr [[TMP30]], align 4, !tbaa [[TBAA0]]
573 ; SSE-NEXT: [[TMP32:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 20
574 ; SSE-NEXT: [[TMP33:%.*]] = load float, ptr [[TMP32]], align 4, !tbaa [[TBAA0]]
575 ; SSE-NEXT: [[TMP34:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 108
576 ; SSE-NEXT: [[TMP35:%.*]] = load float, ptr [[TMP34]], align 4, !tbaa [[TBAA0]]
577 ; SSE-NEXT: [[TMP36:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 80
578 ; SSE-NEXT: [[TMP37:%.*]] = load float, ptr [[TMP36]], align 4, !tbaa [[TBAA0]]
579 ; SSE-NEXT: [[TMP38:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 92
580 ; SSE-NEXT: [[TMP39:%.*]] = load float, ptr [[TMP38]], align 4, !tbaa [[TBAA0]]
581 ; SSE-NEXT: [[TMP40:%.*]] = insertelement <4 x float> poison, float [[TMP25]], i64 0
582 ; SSE-NEXT: [[TMP41:%.*]] = insertelement <4 x float> [[TMP40]], float [[TMP29]], i64 1
583 ; SSE-NEXT: [[TMP42:%.*]] = insertelement <4 x float> [[TMP41]], float [[TMP33]], i64 2
584 ; SSE-NEXT: [[TMP43:%.*]] = insertelement <4 x float> [[TMP42]], float [[TMP37]], i64 3
585 ; SSE-NEXT: [[TMP44:%.*]] = insertelement <4 x float> poison, float [[TMP27]], i64 0
586 ; SSE-NEXT: [[TMP45:%.*]] = insertelement <4 x float> [[TMP44]], float [[TMP31]], i64 1
587 ; SSE-NEXT: [[TMP46:%.*]] = insertelement <4 x float> [[TMP45]], float [[TMP35]], i64 2
588 ; SSE-NEXT: [[TMP47:%.*]] = insertelement <4 x float> [[TMP46]], float [[TMP39]], i64 3
589 ; SSE-NEXT: [[TMP48:%.*]] = fdiv <4 x float> [[TMP43]], [[TMP47]]
590 ; SSE-NEXT: store <4 x float> [[TMP48]], ptr [[TMP9]], align 4, !tbaa [[TBAA0]]
593 ; AVX-LABEL: @gather_load_div(
594 ; AVX-NEXT: [[TMP3:%.*]] = load float, ptr [[TMP1:%.*]], align 4, !tbaa [[TBAA0]]
595 ; AVX-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 40
596 ; AVX-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 52
597 ; AVX-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 12
598 ; AVX-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 176
599 ; AVX-NEXT: [[TMP8:%.*]] = load float, ptr [[TMP7]], align 4, !tbaa [[TBAA0]]
600 ; AVX-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 68
601 ; AVX-NEXT: [[TMP10:%.*]] = load float, ptr [[TMP9]], align 4, !tbaa [[TBAA0]]
602 ; AVX-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 132
603 ; AVX-NEXT: [[TMP12:%.*]] = load float, ptr [[TMP11]], align 4, !tbaa [[TBAA0]]
604 ; AVX-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 32
605 ; AVX-NEXT: [[TMP14:%.*]] = load float, ptr [[TMP13]], align 4, !tbaa [[TBAA0]]
606 ; AVX-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 120
607 ; AVX-NEXT: [[TMP16:%.*]] = load float, ptr [[TMP15]], align 4, !tbaa [[TBAA0]]
608 ; AVX-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 20
609 ; AVX-NEXT: [[TMP18:%.*]] = load float, ptr [[TMP17]], align 4, !tbaa [[TBAA0]]
610 ; AVX-NEXT: [[TMP19:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 108
611 ; AVX-NEXT: [[TMP20:%.*]] = load float, ptr [[TMP19]], align 4, !tbaa [[TBAA0]]
612 ; AVX-NEXT: [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 80
613 ; AVX-NEXT: [[TMP22:%.*]] = load float, ptr [[TMP21]], align 4, !tbaa [[TBAA0]]
614 ; AVX-NEXT: [[TMP23:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 92
615 ; AVX-NEXT: [[TMP24:%.*]] = load float, ptr [[TMP23]], align 4, !tbaa [[TBAA0]]
616 ; AVX-NEXT: [[TMP25:%.*]] = load <2 x float>, ptr [[TMP6]], align 4, !tbaa [[TBAA0]]
617 ; AVX-NEXT: [[TMP26:%.*]] = load <2 x float>, ptr [[TMP4]], align 4, !tbaa [[TBAA0]]
618 ; AVX-NEXT: [[TMP27:%.*]] = load <2 x float>, ptr [[TMP5]], align 4, !tbaa [[TBAA0]]
619 ; AVX-NEXT: [[TMP28:%.*]] = insertelement <8 x float> poison, float [[TMP3]], i64 0
620 ; AVX-NEXT: [[TMP29:%.*]] = shufflevector <2 x float> [[TMP26]], <2 x float> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
621 ; AVX-NEXT: [[TMP30:%.*]] = shufflevector <8 x float> [[TMP28]], <8 x float> [[TMP29]], <8 x i32> <i32 0, i32 8, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
622 ; AVX-NEXT: [[TMP31:%.*]] = shufflevector <2 x float> [[TMP25]], <2 x float> poison, <8 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
623 ; AVX-NEXT: [[TMP32:%.*]] = shufflevector <8 x float> [[TMP30]], <8 x float> [[TMP31]], <8 x i32> <i32 0, i32 1, i32 8, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
624 ; AVX-NEXT: [[TMP33:%.*]] = shufflevector <2 x float> [[TMP27]], <2 x float> poison, <8 x i32> <i32 poison, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
625 ; AVX-NEXT: [[TMP34:%.*]] = shufflevector <8 x float> [[TMP32]], <8 x float> [[TMP33]], <8 x i32> <i32 0, i32 1, i32 2, i32 9, i32 poison, i32 poison, i32 poison, i32 poison>
626 ; AVX-NEXT: [[TMP35:%.*]] = insertelement <8 x float> [[TMP34]], float [[TMP10]], i64 4
627 ; AVX-NEXT: [[TMP36:%.*]] = insertelement <8 x float> [[TMP35]], float [[TMP14]], i64 5
628 ; AVX-NEXT: [[TMP37:%.*]] = insertelement <8 x float> [[TMP36]], float [[TMP18]], i64 6
629 ; AVX-NEXT: [[TMP38:%.*]] = insertelement <8 x float> [[TMP37]], float [[TMP22]], i64 7
630 ; AVX-NEXT: [[TMP39:%.*]] = shufflevector <2 x float> [[TMP25]], <2 x float> [[TMP27]], <8 x i32> <i32 1, i32 2, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
631 ; AVX-NEXT: [[TMP40:%.*]] = shufflevector <8 x float> [[TMP39]], <8 x float> [[TMP29]], <8 x i32> <i32 0, i32 1, i32 9, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
632 ; AVX-NEXT: [[TMP41:%.*]] = insertelement <8 x float> [[TMP40]], float [[TMP8]], i64 3
633 ; AVX-NEXT: [[TMP42:%.*]] = insertelement <8 x float> [[TMP41]], float [[TMP12]], i64 4
634 ; AVX-NEXT: [[TMP43:%.*]] = insertelement <8 x float> [[TMP42]], float [[TMP16]], i64 5
635 ; AVX-NEXT: [[TMP44:%.*]] = insertelement <8 x float> [[TMP43]], float [[TMP20]], i64 6
636 ; AVX-NEXT: [[TMP45:%.*]] = insertelement <8 x float> [[TMP44]], float [[TMP24]], i64 7
637 ; AVX-NEXT: [[TMP46:%.*]] = fdiv <8 x float> [[TMP38]], [[TMP45]]
638 ; AVX-NEXT: store <8 x float> [[TMP46]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]]
641 ; AVX2-LABEL: @gather_load_div(
642 ; AVX2-NEXT: [[TMP3:%.*]] = load float, ptr [[TMP1:%.*]], align 4, !tbaa [[TBAA0]]
643 ; AVX2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 40
644 ; AVX2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 52
645 ; AVX2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 12
646 ; AVX2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 176
647 ; AVX2-NEXT: [[TMP8:%.*]] = load float, ptr [[TMP7]], align 4, !tbaa [[TBAA0]]
648 ; AVX2-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 68
649 ; AVX2-NEXT: [[TMP10:%.*]] = load float, ptr [[TMP9]], align 4, !tbaa [[TBAA0]]
650 ; AVX2-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 132
651 ; AVX2-NEXT: [[TMP12:%.*]] = load float, ptr [[TMP11]], align 4, !tbaa [[TBAA0]]
652 ; AVX2-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 32
653 ; AVX2-NEXT: [[TMP14:%.*]] = load float, ptr [[TMP13]], align 4, !tbaa [[TBAA0]]
654 ; AVX2-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 120
655 ; AVX2-NEXT: [[TMP16:%.*]] = load float, ptr [[TMP15]], align 4, !tbaa [[TBAA0]]
656 ; AVX2-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 20
657 ; AVX2-NEXT: [[TMP18:%.*]] = load float, ptr [[TMP17]], align 4, !tbaa [[TBAA0]]
658 ; AVX2-NEXT: [[TMP19:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 108
659 ; AVX2-NEXT: [[TMP20:%.*]] = load float, ptr [[TMP19]], align 4, !tbaa [[TBAA0]]
660 ; AVX2-NEXT: [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 80
661 ; AVX2-NEXT: [[TMP22:%.*]] = load float, ptr [[TMP21]], align 4, !tbaa [[TBAA0]]
662 ; AVX2-NEXT: [[TMP23:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 92
663 ; AVX2-NEXT: [[TMP24:%.*]] = load float, ptr [[TMP23]], align 4, !tbaa [[TBAA0]]
664 ; AVX2-NEXT: [[TMP25:%.*]] = load <2 x float>, ptr [[TMP6]], align 4, !tbaa [[TBAA0]]
665 ; AVX2-NEXT: [[TMP26:%.*]] = load <2 x float>, ptr [[TMP4]], align 4, !tbaa [[TBAA0]]
666 ; AVX2-NEXT: [[TMP27:%.*]] = load <2 x float>, ptr [[TMP5]], align 4, !tbaa [[TBAA0]]
667 ; AVX2-NEXT: [[TMP28:%.*]] = insertelement <8 x float> poison, float [[TMP3]], i64 0
668 ; AVX2-NEXT: [[TMP29:%.*]] = shufflevector <2 x float> [[TMP26]], <2 x float> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
669 ; AVX2-NEXT: [[TMP30:%.*]] = shufflevector <8 x float> [[TMP28]], <8 x float> [[TMP29]], <8 x i32> <i32 0, i32 8, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
670 ; AVX2-NEXT: [[TMP31:%.*]] = shufflevector <2 x float> [[TMP25]], <2 x float> poison, <8 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
671 ; AVX2-NEXT: [[TMP32:%.*]] = shufflevector <8 x float> [[TMP30]], <8 x float> [[TMP31]], <8 x i32> <i32 0, i32 1, i32 8, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
672 ; AVX2-NEXT: [[TMP33:%.*]] = shufflevector <2 x float> [[TMP27]], <2 x float> poison, <8 x i32> <i32 poison, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
673 ; AVX2-NEXT: [[TMP34:%.*]] = shufflevector <8 x float> [[TMP32]], <8 x float> [[TMP33]], <8 x i32> <i32 0, i32 1, i32 2, i32 9, i32 poison, i32 poison, i32 poison, i32 poison>
674 ; AVX2-NEXT: [[TMP35:%.*]] = insertelement <8 x float> [[TMP34]], float [[TMP10]], i64 4
675 ; AVX2-NEXT: [[TMP36:%.*]] = insertelement <8 x float> [[TMP35]], float [[TMP14]], i64 5
676 ; AVX2-NEXT: [[TMP37:%.*]] = insertelement <8 x float> [[TMP36]], float [[TMP18]], i64 6
677 ; AVX2-NEXT: [[TMP38:%.*]] = insertelement <8 x float> [[TMP37]], float [[TMP22]], i64 7
678 ; AVX2-NEXT: [[TMP39:%.*]] = shufflevector <2 x float> [[TMP25]], <2 x float> [[TMP27]], <8 x i32> <i32 1, i32 2, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
679 ; AVX2-NEXT: [[TMP40:%.*]] = shufflevector <8 x float> [[TMP39]], <8 x float> [[TMP29]], <8 x i32> <i32 0, i32 1, i32 9, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
680 ; AVX2-NEXT: [[TMP41:%.*]] = insertelement <8 x float> [[TMP40]], float [[TMP8]], i64 3
681 ; AVX2-NEXT: [[TMP42:%.*]] = insertelement <8 x float> [[TMP41]], float [[TMP12]], i64 4
682 ; AVX2-NEXT: [[TMP43:%.*]] = insertelement <8 x float> [[TMP42]], float [[TMP16]], i64 5
683 ; AVX2-NEXT: [[TMP44:%.*]] = insertelement <8 x float> [[TMP43]], float [[TMP20]], i64 6
684 ; AVX2-NEXT: [[TMP45:%.*]] = insertelement <8 x float> [[TMP44]], float [[TMP24]], i64 7
685 ; AVX2-NEXT: [[TMP46:%.*]] = fdiv <8 x float> [[TMP38]], [[TMP45]]
686 ; AVX2-NEXT: store <8 x float> [[TMP46]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]]
687 ; AVX2-NEXT: ret void
689 ; AVX512F-LABEL: @gather_load_div(
690 ; AVX512F-NEXT: [[TMP3:%.*]] = insertelement <8 x ptr> poison, ptr [[TMP1:%.*]], i64 0
691 ; AVX512F-NEXT: [[TMP4:%.*]] = shufflevector <8 x ptr> [[TMP3]], <8 x ptr> poison, <8 x i32> zeroinitializer
692 ; AVX512F-NEXT: [[TMP5:%.*]] = getelementptr float, <8 x ptr> [[TMP4]], <8 x i64> <i64 4, i64 13, i64 11, i64 44, i64 33, i64 30, i64 27, i64 23>
693 ; AVX512F-NEXT: [[TMP6:%.*]] = getelementptr float, <8 x ptr> [[TMP4]], <8 x i64> <i64 0, i64 10, i64 3, i64 14, i64 17, i64 8, i64 5, i64 20>
694 ; AVX512F-NEXT: [[TMP7:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP6]], i32 4, <8 x i1> splat (i1 true), <8 x float> poison), !tbaa [[TBAA0]]
695 ; AVX512F-NEXT: [[TMP8:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP5]], i32 4, <8 x i1> splat (i1 true), <8 x float> poison), !tbaa [[TBAA0]]
696 ; AVX512F-NEXT: [[TMP9:%.*]] = fdiv <8 x float> [[TMP7]], [[TMP8]]
697 ; AVX512F-NEXT: store <8 x float> [[TMP9]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]]
698 ; AVX512F-NEXT: ret void
700 ; AVX512VL-LABEL: @gather_load_div(
701 ; AVX512VL-NEXT: [[TMP3:%.*]] = insertelement <8 x ptr> poison, ptr [[TMP1:%.*]], i64 0
702 ; AVX512VL-NEXT: [[TMP4:%.*]] = shufflevector <8 x ptr> [[TMP3]], <8 x ptr> poison, <8 x i32> zeroinitializer
703 ; AVX512VL-NEXT: [[TMP5:%.*]] = getelementptr float, <8 x ptr> [[TMP4]], <8 x i64> <i64 4, i64 13, i64 11, i64 44, i64 33, i64 30, i64 27, i64 23>
704 ; AVX512VL-NEXT: [[TMP6:%.*]] = getelementptr float, <8 x ptr> [[TMP4]], <8 x i64> <i64 0, i64 10, i64 3, i64 14, i64 17, i64 8, i64 5, i64 20>
705 ; AVX512VL-NEXT: [[TMP7:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP6]], i32 4, <8 x i1> splat (i1 true), <8 x float> poison), !tbaa [[TBAA0]]
706 ; AVX512VL-NEXT: [[TMP8:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP5]], i32 4, <8 x i1> splat (i1 true), <8 x float> poison), !tbaa [[TBAA0]]
707 ; AVX512VL-NEXT: [[TMP9:%.*]] = fdiv <8 x float> [[TMP7]], [[TMP8]]
708 ; AVX512VL-NEXT: store <8 x float> [[TMP9]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]]
709 ; AVX512VL-NEXT: ret void
711 %3 = load float, ptr %1, align 4, !tbaa !2
712 %4 = getelementptr inbounds float, ptr %1, i64 4
713 %5 = load float, ptr %4, align 4, !tbaa !2
714 %6 = fdiv float %3, %5
715 %7 = getelementptr inbounds float, ptr %0, i64 1
716 store float %6, ptr %0, align 4, !tbaa !2
717 %8 = getelementptr inbounds float, ptr %1, i64 10
718 %9 = load float, ptr %8, align 4, !tbaa !2
719 %10 = getelementptr inbounds float, ptr %1, i64 13
720 %11 = load float, ptr %10, align 4, !tbaa !2
721 %12 = fdiv float %9, %11
722 %13 = getelementptr inbounds float, ptr %0, i64 2
723 store float %12, ptr %7, align 4, !tbaa !2
724 %14 = getelementptr inbounds float, ptr %1, i64 3
725 %15 = load float, ptr %14, align 4, !tbaa !2
726 %16 = getelementptr inbounds float, ptr %1, i64 11
727 %17 = load float, ptr %16, align 4, !tbaa !2
728 %18 = fdiv float %15, %17
729 %19 = getelementptr inbounds float, ptr %0, i64 3
730 store float %18, ptr %13, align 4, !tbaa !2
731 %20 = getelementptr inbounds float, ptr %1, i64 14
732 %21 = load float, ptr %20, align 4, !tbaa !2
733 %22 = getelementptr inbounds float, ptr %1, i64 44
734 %23 = load float, ptr %22, align 4, !tbaa !2
735 %24 = fdiv float %21, %23
736 %25 = getelementptr inbounds float, ptr %0, i64 4
737 store float %24, ptr %19, align 4, !tbaa !2
738 %26 = getelementptr inbounds float, ptr %1, i64 17
739 %27 = load float, ptr %26, align 4, !tbaa !2
740 %28 = getelementptr inbounds float, ptr %1, i64 33
741 %29 = load float, ptr %28, align 4, !tbaa !2
742 %30 = fdiv float %27, %29
743 %31 = getelementptr inbounds float, ptr %0, i64 5
744 store float %30, ptr %25, align 4, !tbaa !2
745 %32 = getelementptr inbounds float, ptr %1, i64 8
746 %33 = load float, ptr %32, align 4, !tbaa !2
747 %34 = getelementptr inbounds float, ptr %1, i64 30
748 %35 = load float, ptr %34, align 4, !tbaa !2
749 %36 = fdiv float %33, %35
750 %37 = getelementptr inbounds float, ptr %0, i64 6
751 store float %36, ptr %31, align 4, !tbaa !2
752 %38 = getelementptr inbounds float, ptr %1, i64 5
753 %39 = load float, ptr %38, align 4, !tbaa !2
754 %40 = getelementptr inbounds float, ptr %1, i64 27
755 %41 = load float, ptr %40, align 4, !tbaa !2
756 %42 = fdiv float %39, %41
757 %43 = getelementptr inbounds float, ptr %0, i64 7
758 store float %42, ptr %37, align 4, !tbaa !2
759 %44 = getelementptr inbounds float, ptr %1, i64 20
760 %45 = load float, ptr %44, align 4, !tbaa !2
761 %46 = getelementptr inbounds float, ptr %1, i64 23
762 %47 = load float, ptr %46, align 4, !tbaa !2
763 %48 = fdiv float %45, %47
764 store float %48, ptr %43, align 4, !tbaa !2
768 !2 = !{!3, !3, i64 0}
769 !3 = !{!"short", !4, i64 0}
770 !4 = !{!"omnipotent char", !5, i64 0}
771 !5 = !{!"Simple C++ TBAA"}