1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s
4 %structTy = type { i8, i32, i32 }
6 @e = common global %structTy zeroinitializer, align 4
8 ;; Ensure that MergeConsecutiveStores doesn't incorrectly reorder
9 ;; store operations. The first test stores in increasing address
10 ;; order, the second in decreasing -- but in both cases should have
11 ;; the same result in memory in the end.
13 define void @redundant_stores_merging() {
14 ; CHECK-LABEL: redundant_stores_merging:
16 ; CHECK-NEXT: movabsq $1958505086977, %rax # imm = 0x1C800000001
17 ; CHECK-NEXT: movq %rax, e+{{.*}}(%rip)
19 store i32 1, i32* getelementptr inbounds (%structTy, %structTy* @e, i64 0, i32 1), align 4
20 store i32 123, i32* getelementptr inbounds (%structTy, %structTy* @e, i64 0, i32 2), align 4
21 store i32 456, i32* getelementptr inbounds (%structTy, %structTy* @e, i64 0, i32 2), align 4
25 ;; This variant tests PR25154.
26 define void @redundant_stores_merging_reverse() {
27 ; CHECK-LABEL: redundant_stores_merging_reverse:
29 ; CHECK-NEXT: movabsq $1958505086977, %rax # imm = 0x1C800000001
30 ; CHECK-NEXT: movq %rax, e+{{.*}}(%rip)
32 store i32 123, i32* getelementptr inbounds (%structTy, %structTy* @e, i64 0, i32 2), align 4
33 store i32 456, i32* getelementptr inbounds (%structTy, %structTy* @e, i64 0, i32 2), align 4
34 store i32 1, i32* getelementptr inbounds (%structTy, %structTy* @e, i64 0, i32 1), align 4
38 @b = common global [8 x i8] zeroinitializer, align 2
40 ;; The 2-byte store to offset 3 overlaps the 2-byte store to offset 2;
41 ;; these must not be reordered in MergeConsecutiveStores such that the
42 ;; store to 3 comes first (e.g. by merging the stores to 0 and 2 into
43 ;; a movl, after the store to 3).
45 define void @overlapping_stores_merging() {
46 ; CHECK-LABEL: overlapping_stores_merging:
48 ; CHECK-NEXT: movl $1, {{.*}}(%rip)
49 ; CHECK-NEXT: movw $2, b+{{.*}}(%rip)
51 store i16 0, i16* bitcast (i8* getelementptr inbounds ([8 x i8], [8 x i8]* @b, i64 0, i64 2) to i16*), align 2
52 store i16 2, i16* bitcast (i8* getelementptr inbounds ([8 x i8], [8 x i8]* @b, i64 0, i64 3) to i16*), align 1
53 store i16 1, i16* bitcast (i8* getelementptr inbounds ([8 x i8], [8 x i8]* @b, i64 0, i64 0) to i16*), align 2
57 define void @extract_vector_store_16_consecutive_bytes(<2 x i64> %v, i8* %ptr) #0 {
58 ; CHECK-LABEL: extract_vector_store_16_consecutive_bytes:
60 ; CHECK-NEXT: vmovups %xmm0, (%rdi)
62 %bc = bitcast <2 x i64> %v to <16 x i8>
63 %ext00 = extractelement <16 x i8> %bc, i32 0
64 %ext01 = extractelement <16 x i8> %bc, i32 1
65 %ext02 = extractelement <16 x i8> %bc, i32 2
66 %ext03 = extractelement <16 x i8> %bc, i32 3
67 %ext04 = extractelement <16 x i8> %bc, i32 4
68 %ext05 = extractelement <16 x i8> %bc, i32 5
69 %ext06 = extractelement <16 x i8> %bc, i32 6
70 %ext07 = extractelement <16 x i8> %bc, i32 7
71 %ext08 = extractelement <16 x i8> %bc, i32 8
72 %ext09 = extractelement <16 x i8> %bc, i32 9
73 %ext10 = extractelement <16 x i8> %bc, i32 10
74 %ext11 = extractelement <16 x i8> %bc, i32 11
75 %ext12 = extractelement <16 x i8> %bc, i32 12
76 %ext13 = extractelement <16 x i8> %bc, i32 13
77 %ext14 = extractelement <16 x i8> %bc, i32 14
78 %ext15 = extractelement <16 x i8> %bc, i32 15
79 %gep00 = getelementptr inbounds i8, i8* %ptr, i64 0
80 %gep01 = getelementptr inbounds i8, i8* %ptr, i64 1
81 %gep02 = getelementptr inbounds i8, i8* %ptr, i64 2
82 %gep03 = getelementptr inbounds i8, i8* %ptr, i64 3
83 %gep04 = getelementptr inbounds i8, i8* %ptr, i64 4
84 %gep05 = getelementptr inbounds i8, i8* %ptr, i64 5
85 %gep06 = getelementptr inbounds i8, i8* %ptr, i64 6
86 %gep07 = getelementptr inbounds i8, i8* %ptr, i64 7
87 %gep08 = getelementptr inbounds i8, i8* %ptr, i64 8
88 %gep09 = getelementptr inbounds i8, i8* %ptr, i64 9
89 %gep10 = getelementptr inbounds i8, i8* %ptr, i64 10
90 %gep11 = getelementptr inbounds i8, i8* %ptr, i64 11
91 %gep12 = getelementptr inbounds i8, i8* %ptr, i64 12
92 %gep13 = getelementptr inbounds i8, i8* %ptr, i64 13
93 %gep14 = getelementptr inbounds i8, i8* %ptr, i64 14
94 %gep15 = getelementptr inbounds i8, i8* %ptr, i64 15
95 store i8 %ext00, i8* %gep00, align 1
96 store i8 %ext01, i8* %gep01, align 1
97 store i8 %ext02, i8* %gep02, align 1
98 store i8 %ext03, i8* %gep03, align 1
99 store i8 %ext04, i8* %gep04, align 1
100 store i8 %ext05, i8* %gep05, align 1
101 store i8 %ext06, i8* %gep06, align 1
102 store i8 %ext07, i8* %gep07, align 1
103 store i8 %ext08, i8* %gep08, align 1
104 store i8 %ext09, i8* %gep09, align 1
105 store i8 %ext10, i8* %gep10, align 1
106 store i8 %ext11, i8* %gep11, align 1
107 store i8 %ext12, i8* %gep12, align 1
108 store i8 %ext13, i8* %gep13, align 1
109 store i8 %ext14, i8* %gep14, align 1
110 store i8 %ext15, i8* %gep15, align 1
114 ; PR34217 - https://bugs.llvm.org/show_bug.cgi?id=34217
116 define void @extract_vector_store_32_consecutive_bytes(<4 x i64> %v, i8* %ptr) #0 {
117 ; CHECK-LABEL: extract_vector_store_32_consecutive_bytes:
119 ; CHECK-NEXT: vmovups %ymm0, (%rdi)
120 ; CHECK-NEXT: vzeroupper
122 %bc = bitcast <4 x i64> %v to <32 x i8>
123 %ext00 = extractelement <32 x i8> %bc, i32 0
124 %ext01 = extractelement <32 x i8> %bc, i32 1
125 %ext02 = extractelement <32 x i8> %bc, i32 2
126 %ext03 = extractelement <32 x i8> %bc, i32 3
127 %ext04 = extractelement <32 x i8> %bc, i32 4
128 %ext05 = extractelement <32 x i8> %bc, i32 5
129 %ext06 = extractelement <32 x i8> %bc, i32 6
130 %ext07 = extractelement <32 x i8> %bc, i32 7
131 %ext08 = extractelement <32 x i8> %bc, i32 8
132 %ext09 = extractelement <32 x i8> %bc, i32 9
133 %ext10 = extractelement <32 x i8> %bc, i32 10
134 %ext11 = extractelement <32 x i8> %bc, i32 11
135 %ext12 = extractelement <32 x i8> %bc, i32 12
136 %ext13 = extractelement <32 x i8> %bc, i32 13
137 %ext14 = extractelement <32 x i8> %bc, i32 14
138 %ext15 = extractelement <32 x i8> %bc, i32 15
139 %ext16 = extractelement <32 x i8> %bc, i32 16
140 %ext17 = extractelement <32 x i8> %bc, i32 17
141 %ext18 = extractelement <32 x i8> %bc, i32 18
142 %ext19 = extractelement <32 x i8> %bc, i32 19
143 %ext20 = extractelement <32 x i8> %bc, i32 20
144 %ext21 = extractelement <32 x i8> %bc, i32 21
145 %ext22 = extractelement <32 x i8> %bc, i32 22
146 %ext23 = extractelement <32 x i8> %bc, i32 23
147 %ext24 = extractelement <32 x i8> %bc, i32 24
148 %ext25 = extractelement <32 x i8> %bc, i32 25
149 %ext26 = extractelement <32 x i8> %bc, i32 26
150 %ext27 = extractelement <32 x i8> %bc, i32 27
151 %ext28 = extractelement <32 x i8> %bc, i32 28
152 %ext29 = extractelement <32 x i8> %bc, i32 29
153 %ext30 = extractelement <32 x i8> %bc, i32 30
154 %ext31 = extractelement <32 x i8> %bc, i32 31
155 %gep00 = getelementptr inbounds i8, i8* %ptr, i64 0
156 %gep01 = getelementptr inbounds i8, i8* %ptr, i64 1
157 %gep02 = getelementptr inbounds i8, i8* %ptr, i64 2
158 %gep03 = getelementptr inbounds i8, i8* %ptr, i64 3
159 %gep04 = getelementptr inbounds i8, i8* %ptr, i64 4
160 %gep05 = getelementptr inbounds i8, i8* %ptr, i64 5
161 %gep06 = getelementptr inbounds i8, i8* %ptr, i64 6
162 %gep07 = getelementptr inbounds i8, i8* %ptr, i64 7
163 %gep08 = getelementptr inbounds i8, i8* %ptr, i64 8
164 %gep09 = getelementptr inbounds i8, i8* %ptr, i64 9
165 %gep10 = getelementptr inbounds i8, i8* %ptr, i64 10
166 %gep11 = getelementptr inbounds i8, i8* %ptr, i64 11
167 %gep12 = getelementptr inbounds i8, i8* %ptr, i64 12
168 %gep13 = getelementptr inbounds i8, i8* %ptr, i64 13
169 %gep14 = getelementptr inbounds i8, i8* %ptr, i64 14
170 %gep15 = getelementptr inbounds i8, i8* %ptr, i64 15
171 %gep16 = getelementptr inbounds i8, i8* %ptr, i64 16
172 %gep17 = getelementptr inbounds i8, i8* %ptr, i64 17
173 %gep18 = getelementptr inbounds i8, i8* %ptr, i64 18
174 %gep19 = getelementptr inbounds i8, i8* %ptr, i64 19
175 %gep20 = getelementptr inbounds i8, i8* %ptr, i64 20
176 %gep21 = getelementptr inbounds i8, i8* %ptr, i64 21
177 %gep22 = getelementptr inbounds i8, i8* %ptr, i64 22
178 %gep23 = getelementptr inbounds i8, i8* %ptr, i64 23
179 %gep24 = getelementptr inbounds i8, i8* %ptr, i64 24
180 %gep25 = getelementptr inbounds i8, i8* %ptr, i64 25
181 %gep26 = getelementptr inbounds i8, i8* %ptr, i64 26
182 %gep27 = getelementptr inbounds i8, i8* %ptr, i64 27
183 %gep28 = getelementptr inbounds i8, i8* %ptr, i64 28
184 %gep29 = getelementptr inbounds i8, i8* %ptr, i64 29
185 %gep30 = getelementptr inbounds i8, i8* %ptr, i64 30
186 %gep31 = getelementptr inbounds i8, i8* %ptr, i64 31
187 store i8 %ext00, i8* %gep00, align 1
188 store i8 %ext01, i8* %gep01, align 1
189 store i8 %ext02, i8* %gep02, align 1
190 store i8 %ext03, i8* %gep03, align 1
191 store i8 %ext04, i8* %gep04, align 1
192 store i8 %ext05, i8* %gep05, align 1
193 store i8 %ext06, i8* %gep06, align 1
194 store i8 %ext07, i8* %gep07, align 1
195 store i8 %ext08, i8* %gep08, align 1
196 store i8 %ext09, i8* %gep09, align 1
197 store i8 %ext10, i8* %gep10, align 1
198 store i8 %ext11, i8* %gep11, align 1
199 store i8 %ext12, i8* %gep12, align 1
200 store i8 %ext13, i8* %gep13, align 1
201 store i8 %ext14, i8* %gep14, align 1
202 store i8 %ext15, i8* %gep15, align 1
203 store i8 %ext16, i8* %gep16, align 1
204 store i8 %ext17, i8* %gep17, align 1
205 store i8 %ext18, i8* %gep18, align 1
206 store i8 %ext19, i8* %gep19, align 1
207 store i8 %ext20, i8* %gep20, align 1
208 store i8 %ext21, i8* %gep21, align 1
209 store i8 %ext22, i8* %gep22, align 1
210 store i8 %ext23, i8* %gep23, align 1
211 store i8 %ext24, i8* %gep24, align 1
212 store i8 %ext25, i8* %gep25, align 1
213 store i8 %ext26, i8* %gep26, align 1
214 store i8 %ext27, i8* %gep27, align 1
215 store i8 %ext28, i8* %gep28, align 1
216 store i8 %ext29, i8* %gep29, align 1
217 store i8 %ext30, i8* %gep30, align 1
218 store i8 %ext31, i8* %gep31, align 1