1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=i686-linux -mcpu=corei7-avx | FileCheck %s
3 ; RUN: opt -instsimplify -disable-output < %s
5 define <4 x i32*> @AGEP0(i32* %ptr) nounwind {
8 ; CHECK-NEXT: vbroadcastss {{[0-9]+}}(%esp), %xmm0
9 ; CHECK-NEXT: vpaddd {{\.LCPI.*}}, %xmm0, %xmm0
11 %vecinit.i = insertelement <4 x i32*> undef, i32* %ptr, i32 0
12 %vecinit2.i = insertelement <4 x i32*> %vecinit.i, i32* %ptr, i32 1
13 %vecinit4.i = insertelement <4 x i32*> %vecinit2.i, i32* %ptr, i32 2
14 %vecinit6.i = insertelement <4 x i32*> %vecinit4.i, i32* %ptr, i32 3
15 %A2 = getelementptr i32, <4 x i32*> %vecinit6.i, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
16 %A3 = getelementptr i32, <4 x i32*> %A2, <4 x i32> <i32 10, i32 14, i32 19, i32 233>
20 define i32 @AGEP1(<4 x i32*> %param) nounwind {
23 ; CHECK-NEXT: vextractps $3, %xmm0, %eax
24 ; CHECK-NEXT: movl 16(%eax), %eax
26 %A2 = getelementptr i32, <4 x i32*> %param, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
27 %k = extractelement <4 x i32*> %A2, i32 3
28 %v = load i32, i32* %k
32 define i32 @AGEP2(<4 x i32*> %param, <4 x i32> %off) nounwind {
35 ; CHECK-NEXT: vpslld $2, %xmm1, %xmm1
36 ; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0
37 ; CHECK-NEXT: vpextrd $3, %xmm0, %eax
38 ; CHECK-NEXT: movl (%eax), %eax
40 %A2 = getelementptr i32, <4 x i32*> %param, <4 x i32> %off
41 %k = extractelement <4 x i32*> %A2, i32 3
42 %v = load i32, i32* %k
46 define <4 x i32*> @AGEP3(<4 x i32*> %param, <4 x i32> %off) nounwind {
49 ; CHECK-NEXT: pushl %eax
50 ; CHECK-NEXT: vpslld $2, %xmm1, %xmm1
51 ; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0
52 ; CHECK-NEXT: movl %esp, %eax
53 ; CHECK-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0
54 ; CHECK-NEXT: popl %eax
56 %A2 = getelementptr i32, <4 x i32*> %param, <4 x i32> %off
58 %k = insertelement <4 x i32*> %A2, i32* %v, i32 3
62 define <4 x i16*> @AGEP4(<4 x i16*> %param, <4 x i32> %off) nounwind {
63 ; Multiply offset by two (add it to itself).
64 ; add the base to the offset
67 ; CHECK-NEXT: vpaddd %xmm1, %xmm1, %xmm1
68 ; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0
70 %A = getelementptr i16, <4 x i16*> %param, <4 x i32> %off
74 define <4 x i8*> @AGEP5(<4 x i8*> %param, <4 x i8> %off) nounwind {
77 ; CHECK-NEXT: vpslld $24, %xmm1, %xmm1
78 ; CHECK-NEXT: vpsrad $24, %xmm1, %xmm1
79 ; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0
81 %A = getelementptr i8, <4 x i8*> %param, <4 x i8> %off
86 ; The size of each element is 1 byte. No need to multiply by element size.
87 define <4 x i8*> @AGEP6(<4 x i8*> %param, <4 x i32> %off) nounwind {
90 ; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0
92 %A = getelementptr i8, <4 x i8*> %param, <4 x i32> %off
96 define <4 x i8*> @AGEP7(<4 x i8*> %param, i32 %off) nounwind {
99 ; CHECK-NEXT: vbroadcastss {{[0-9]+}}(%esp), %xmm1
100 ; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0
102 %A = getelementptr i8, <4 x i8*> %param, i32 %off
106 define <4 x i16*> @AGEP8(i16* %param, <4 x i32> %off) nounwind {
107 ; Multiply offset by two (add it to itself).
108 ; add the base to the offset
109 ; CHECK-LABEL: AGEP8:
111 ; CHECK-NEXT: vpaddd %xmm0, %xmm0, %xmm0
112 ; CHECK-NEXT: vbroadcastss {{[0-9]+}}(%esp), %xmm1
113 ; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0
115 %A = getelementptr i16, i16* %param, <4 x i32> %off
119 define <64 x i16*> @AGEP9(i16* %param, <64 x i32> %off) nounwind {
120 ; CHECK-LABEL: AGEP9:
122 ; CHECK-NEXT: pushl %ebp
123 ; CHECK-NEXT: movl %esp, %ebp
124 ; CHECK-NEXT: andl $-32, %esp
125 ; CHECK-NEXT: subl $96, %esp
126 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm3
127 ; CHECK-NEXT: vpaddd %xmm3, %xmm3, %xmm4
128 ; CHECK-NEXT: vbroadcastss 12(%ebp), %xmm3
129 ; CHECK-NEXT: vpaddd %xmm4, %xmm3, %xmm4
130 ; CHECK-NEXT: vpaddd %xmm0, %xmm0, %xmm0
131 ; CHECK-NEXT: vpaddd %xmm0, %xmm3, %xmm0
132 ; CHECK-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0
133 ; CHECK-NEXT: vmovaps %ymm0, {{[-0-9]+}}(%e{{[sb]}}p) # 32-byte Spill
134 ; CHECK-NEXT: vextractf128 $1, %ymm1, %xmm4
135 ; CHECK-NEXT: vpaddd %xmm4, %xmm4, %xmm4
136 ; CHECK-NEXT: vpaddd %xmm4, %xmm3, %xmm4
137 ; CHECK-NEXT: vpaddd %xmm1, %xmm1, %xmm1
138 ; CHECK-NEXT: vpaddd %xmm1, %xmm3, %xmm1
139 ; CHECK-NEXT: vinsertf128 $1, %xmm4, %ymm1, %ymm0
140 ; CHECK-NEXT: vmovaps %ymm0, (%esp) # 32-byte Spill
141 ; CHECK-NEXT: vextractf128 $1, %ymm2, %xmm4
142 ; CHECK-NEXT: vpaddd %xmm4, %xmm4, %xmm4
143 ; CHECK-NEXT: vpaddd %xmm4, %xmm3, %xmm4
144 ; CHECK-NEXT: vpaddd %xmm2, %xmm2, %xmm2
145 ; CHECK-NEXT: vpaddd %xmm2, %xmm3, %xmm2
146 ; CHECK-NEXT: vinsertf128 $1, %xmm4, %ymm2, %ymm2
147 ; CHECK-NEXT: vmovdqa 40(%ebp), %xmm4
148 ; CHECK-NEXT: vmovdqa 56(%ebp), %xmm5
149 ; CHECK-NEXT: vpaddd %xmm5, %xmm5, %xmm5
150 ; CHECK-NEXT: vpaddd %xmm5, %xmm3, %xmm5
151 ; CHECK-NEXT: vpaddd %xmm4, %xmm4, %xmm4
152 ; CHECK-NEXT: vpaddd %xmm4, %xmm3, %xmm4
153 ; CHECK-NEXT: vinsertf128 $1, %xmm5, %ymm4, %ymm4
154 ; CHECK-NEXT: vmovdqa 72(%ebp), %xmm5
155 ; CHECK-NEXT: vmovdqa 88(%ebp), %xmm6
156 ; CHECK-NEXT: vpaddd %xmm6, %xmm6, %xmm6
157 ; CHECK-NEXT: vpaddd %xmm6, %xmm3, %xmm6
158 ; CHECK-NEXT: vpaddd %xmm5, %xmm5, %xmm5
159 ; CHECK-NEXT: vpaddd %xmm5, %xmm3, %xmm5
160 ; CHECK-NEXT: vinsertf128 $1, %xmm6, %ymm5, %ymm5
161 ; CHECK-NEXT: vmovdqa 104(%ebp), %xmm6
162 ; CHECK-NEXT: vmovdqa 120(%ebp), %xmm7
163 ; CHECK-NEXT: vpaddd %xmm7, %xmm7, %xmm7
164 ; CHECK-NEXT: vpaddd %xmm7, %xmm3, %xmm7
165 ; CHECK-NEXT: vpaddd %xmm6, %xmm6, %xmm6
166 ; CHECK-NEXT: vpaddd %xmm6, %xmm3, %xmm6
167 ; CHECK-NEXT: vinsertf128 $1, %xmm7, %ymm6, %ymm6
168 ; CHECK-NEXT: vmovdqa 152(%ebp), %xmm7
169 ; CHECK-NEXT: vpaddd %xmm7, %xmm7, %xmm7
170 ; CHECK-NEXT: vpaddd %xmm7, %xmm3, %xmm7
171 ; CHECK-NEXT: vmovdqa 136(%ebp), %xmm0
172 ; CHECK-NEXT: vpaddd %xmm0, %xmm0, %xmm0
173 ; CHECK-NEXT: vpaddd %xmm0, %xmm3, %xmm0
174 ; CHECK-NEXT: vinsertf128 $1, %xmm7, %ymm0, %ymm0
175 ; CHECK-NEXT: vmovdqa 184(%ebp), %xmm7
176 ; CHECK-NEXT: vpaddd %xmm7, %xmm7, %xmm7
177 ; CHECK-NEXT: vpaddd %xmm7, %xmm3, %xmm7
178 ; CHECK-NEXT: vmovdqa 168(%ebp), %xmm1
179 ; CHECK-NEXT: vpaddd %xmm1, %xmm1, %xmm1
180 ; CHECK-NEXT: vpaddd %xmm1, %xmm3, %xmm1
181 ; CHECK-NEXT: vinsertf128 $1, %xmm7, %ymm1, %ymm1
182 ; CHECK-NEXT: movl 8(%ebp), %eax
183 ; CHECK-NEXT: vmovaps %ymm1, 224(%eax)
184 ; CHECK-NEXT: vmovaps %ymm0, 192(%eax)
185 ; CHECK-NEXT: vmovaps %ymm6, 160(%eax)
186 ; CHECK-NEXT: vmovaps %ymm5, 128(%eax)
187 ; CHECK-NEXT: vmovaps %ymm4, 96(%eax)
188 ; CHECK-NEXT: vmovaps %ymm2, 64(%eax)
189 ; CHECK-NEXT: vmovaps (%esp), %ymm0 # 32-byte Reload
190 ; CHECK-NEXT: vmovaps %ymm0, 32(%eax)
191 ; CHECK-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 # 32-byte Reload
192 ; CHECK-NEXT: vmovaps %ymm0, (%eax)
193 ; CHECK-NEXT: movl %ebp, %esp
194 ; CHECK-NEXT: popl %ebp
195 ; CHECK-NEXT: vzeroupper
196 ; CHECK-NEXT: retl $4
197 %A = getelementptr i16, i16* %param, <64 x i32> %off