1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-linux -verify-machineinstrs | FileCheck %s -check-prefix=CHECK
3 ; RUN: llc < %s -mtriple=x86_64-linux --x86-disable-avoid-SFB -verify-machineinstrs | FileCheck %s --check-prefix=DISABLED
4 ; RUN: llc < %s -mtriple=x86_64-linux -mcpu=core-avx2 -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-AVX2
5 ; RUN: llc < %s -mtriple=x86_64-linux -mcpu=skx -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-AVX512
7 ; ModuleID = '../testSFB/testOverlapBlocks.c'
8 source_filename = "../testSFB/testOverlapBlocks.c"
9 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
10 target triple = "x86_64-unknown-linux-gnu"
12 ; Function Attrs: nounwind uwtable
13 define dso_local void @test_overlap_1(i8* nocapture %A, i32 %x) local_unnamed_addr #0 {
14 ; CHECK-LABEL: test_overlap_1:
15 ; CHECK: # %bb.0: # %entry
16 ; CHECK-NEXT: movl $7, -8(%rdi)
17 ; CHECK-NEXT: movq -16(%rdi), %rax
18 ; CHECK-NEXT: movq %rax, (%rdi)
19 ; CHECK-NEXT: movl -8(%rdi), %eax
20 ; CHECK-NEXT: movl %eax, 8(%rdi)
21 ; CHECK-NEXT: movl -4(%rdi), %eax
22 ; CHECK-NEXT: movl %eax, 12(%rdi)
23 ; CHECK-NEXT: movslq %esi, %rax
24 ; CHECK-NEXT: movq %rax, -9(%rdi)
25 ; CHECK-NEXT: movq %rax, -16(%rdi)
26 ; CHECK-NEXT: movb $0, -1(%rdi)
27 ; CHECK-NEXT: movq -16(%rdi), %rax
28 ; CHECK-NEXT: movq %rax, 16(%rdi)
29 ; CHECK-NEXT: movl -8(%rdi), %eax
30 ; CHECK-NEXT: movl %eax, 24(%rdi)
31 ; CHECK-NEXT: movzwl -4(%rdi), %eax
32 ; CHECK-NEXT: movw %ax, 28(%rdi)
33 ; CHECK-NEXT: movb -2(%rdi), %al
34 ; CHECK-NEXT: movb %al, 30(%rdi)
35 ; CHECK-NEXT: movb -1(%rdi), %al
36 ; CHECK-NEXT: movb %al, 31(%rdi)
39 ; DISABLED-LABEL: test_overlap_1:
40 ; DISABLED: # %bb.0: # %entry
41 ; DISABLED-NEXT: movl $7, -8(%rdi)
42 ; DISABLED-NEXT: movups -16(%rdi), %xmm0
43 ; DISABLED-NEXT: movups %xmm0, (%rdi)
44 ; DISABLED-NEXT: movslq %esi, %rax
45 ; DISABLED-NEXT: movq %rax, -9(%rdi)
46 ; DISABLED-NEXT: movq %rax, -16(%rdi)
47 ; DISABLED-NEXT: movb $0, -1(%rdi)
48 ; DISABLED-NEXT: movups -16(%rdi), %xmm0
49 ; DISABLED-NEXT: movups %xmm0, 16(%rdi)
52 ; CHECK-AVX2-LABEL: test_overlap_1:
53 ; CHECK-AVX2: # %bb.0: # %entry
54 ; CHECK-AVX2-NEXT: movl $7, -8(%rdi)
55 ; CHECK-AVX2-NEXT: movq -16(%rdi), %rax
56 ; CHECK-AVX2-NEXT: movq %rax, (%rdi)
57 ; CHECK-AVX2-NEXT: movl -8(%rdi), %eax
58 ; CHECK-AVX2-NEXT: movl %eax, 8(%rdi)
59 ; CHECK-AVX2-NEXT: movl -4(%rdi), %eax
60 ; CHECK-AVX2-NEXT: movl %eax, 12(%rdi)
61 ; CHECK-AVX2-NEXT: movslq %esi, %rax
62 ; CHECK-AVX2-NEXT: movq %rax, -9(%rdi)
63 ; CHECK-AVX2-NEXT: movq %rax, -16(%rdi)
64 ; CHECK-AVX2-NEXT: movb $0, -1(%rdi)
65 ; CHECK-AVX2-NEXT: movq -16(%rdi), %rax
66 ; CHECK-AVX2-NEXT: movq %rax, 16(%rdi)
67 ; CHECK-AVX2-NEXT: movl -8(%rdi), %eax
68 ; CHECK-AVX2-NEXT: movl %eax, 24(%rdi)
69 ; CHECK-AVX2-NEXT: movzwl -4(%rdi), %eax
70 ; CHECK-AVX2-NEXT: movw %ax, 28(%rdi)
71 ; CHECK-AVX2-NEXT: movb -2(%rdi), %al
72 ; CHECK-AVX2-NEXT: movb %al, 30(%rdi)
73 ; CHECK-AVX2-NEXT: movb -1(%rdi), %al
74 ; CHECK-AVX2-NEXT: movb %al, 31(%rdi)
75 ; CHECK-AVX2-NEXT: retq
77 ; CHECK-AVX512-LABEL: test_overlap_1:
78 ; CHECK-AVX512: # %bb.0: # %entry
79 ; CHECK-AVX512-NEXT: movl $7, -8(%rdi)
80 ; CHECK-AVX512-NEXT: movq -16(%rdi), %rax
81 ; CHECK-AVX512-NEXT: movq %rax, (%rdi)
82 ; CHECK-AVX512-NEXT: movl -8(%rdi), %eax
83 ; CHECK-AVX512-NEXT: movl %eax, 8(%rdi)
84 ; CHECK-AVX512-NEXT: movl -4(%rdi), %eax
85 ; CHECK-AVX512-NEXT: movl %eax, 12(%rdi)
86 ; CHECK-AVX512-NEXT: movslq %esi, %rax
87 ; CHECK-AVX512-NEXT: movq %rax, -9(%rdi)
88 ; CHECK-AVX512-NEXT: movq %rax, -16(%rdi)
89 ; CHECK-AVX512-NEXT: movb $0, -1(%rdi)
90 ; CHECK-AVX512-NEXT: movq -16(%rdi), %rax
91 ; CHECK-AVX512-NEXT: movq %rax, 16(%rdi)
92 ; CHECK-AVX512-NEXT: movl -8(%rdi), %eax
93 ; CHECK-AVX512-NEXT: movl %eax, 24(%rdi)
94 ; CHECK-AVX512-NEXT: movzwl -4(%rdi), %eax
95 ; CHECK-AVX512-NEXT: movw %ax, 28(%rdi)
96 ; CHECK-AVX512-NEXT: movb -2(%rdi), %al
97 ; CHECK-AVX512-NEXT: movb %al, 30(%rdi)
98 ; CHECK-AVX512-NEXT: movb -1(%rdi), %al
99 ; CHECK-AVX512-NEXT: movb %al, 31(%rdi)
100 ; CHECK-AVX512-NEXT: retq
102 %add.ptr = getelementptr inbounds i8, i8* %A, i64 -16
103 %add.ptr1 = getelementptr inbounds i8, i8* %A, i64 -8
104 %0 = bitcast i8* %add.ptr1 to i32*
105 store i32 7, i32* %0, align 4
106 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %A, i8* nonnull align 4 %add.ptr, i64 16, i1 false)
107 %conv = sext i32 %x to i64
108 %add.ptr2 = getelementptr inbounds i8, i8* %A, i64 -9
109 %1 = bitcast i8* %add.ptr2 to i64*
110 store i64 %conv, i64* %1, align 8
111 %2 = bitcast i8* %add.ptr to i64*
112 store i64 %conv, i64* %2, align 8
113 %add.ptr5 = getelementptr inbounds i8, i8* %A, i64 -1
114 store i8 0, i8* %add.ptr5, align 1
115 %add.ptr6 = getelementptr inbounds i8, i8* %A, i64 16
116 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 4 %add.ptr6, i8* nonnull align 4 %add.ptr, i64 16, i1 false)
120 ; Function Attrs: argmemonly nounwind
121 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1) #1
123 ; Function Attrs: nounwind uwtable
124 define dso_local void @test_overlap_2(i8* nocapture %A, i32 %x) local_unnamed_addr #0 {
125 ; CHECK-LABEL: test_overlap_2:
126 ; CHECK: # %bb.0: # %entry
127 ; CHECK-NEXT: movslq %esi, %rax
128 ; CHECK-NEXT: movq %rax, -16(%rdi)
129 ; CHECK-NEXT: movq -16(%rdi), %rcx
130 ; CHECK-NEXT: movq %rcx, (%rdi)
131 ; CHECK-NEXT: movq -8(%rdi), %rcx
132 ; CHECK-NEXT: movq %rcx, 8(%rdi)
133 ; CHECK-NEXT: movq %rax, -8(%rdi)
134 ; CHECK-NEXT: movl $7, -12(%rdi)
135 ; CHECK-NEXT: movl -16(%rdi), %eax
136 ; CHECK-NEXT: movl %eax, 16(%rdi)
137 ; CHECK-NEXT: movl -12(%rdi), %eax
138 ; CHECK-NEXT: movl %eax, 20(%rdi)
139 ; CHECK-NEXT: movq -8(%rdi), %rax
140 ; CHECK-NEXT: movq %rax, 24(%rdi)
143 ; DISABLED-LABEL: test_overlap_2:
144 ; DISABLED: # %bb.0: # %entry
145 ; DISABLED-NEXT: movslq %esi, %rax
146 ; DISABLED-NEXT: movq %rax, -16(%rdi)
147 ; DISABLED-NEXT: movups -16(%rdi), %xmm0
148 ; DISABLED-NEXT: movups %xmm0, (%rdi)
149 ; DISABLED-NEXT: movq %rax, -8(%rdi)
150 ; DISABLED-NEXT: movl $7, -12(%rdi)
151 ; DISABLED-NEXT: movups -16(%rdi), %xmm0
152 ; DISABLED-NEXT: movups %xmm0, 16(%rdi)
153 ; DISABLED-NEXT: retq
155 ; CHECK-AVX2-LABEL: test_overlap_2:
156 ; CHECK-AVX2: # %bb.0: # %entry
157 ; CHECK-AVX2-NEXT: movslq %esi, %rax
158 ; CHECK-AVX2-NEXT: movq %rax, -16(%rdi)
159 ; CHECK-AVX2-NEXT: movq -16(%rdi), %rcx
160 ; CHECK-AVX2-NEXT: movq %rcx, (%rdi)
161 ; CHECK-AVX2-NEXT: movq -8(%rdi), %rcx
162 ; CHECK-AVX2-NEXT: movq %rcx, 8(%rdi)
163 ; CHECK-AVX2-NEXT: movq %rax, -8(%rdi)
164 ; CHECK-AVX2-NEXT: movl $7, -12(%rdi)
165 ; CHECK-AVX2-NEXT: movl -16(%rdi), %eax
166 ; CHECK-AVX2-NEXT: movl %eax, 16(%rdi)
167 ; CHECK-AVX2-NEXT: movl -12(%rdi), %eax
168 ; CHECK-AVX2-NEXT: movl %eax, 20(%rdi)
169 ; CHECK-AVX2-NEXT: movq -8(%rdi), %rax
170 ; CHECK-AVX2-NEXT: movq %rax, 24(%rdi)
171 ; CHECK-AVX2-NEXT: retq
173 ; CHECK-AVX512-LABEL: test_overlap_2:
174 ; CHECK-AVX512: # %bb.0: # %entry
175 ; CHECK-AVX512-NEXT: movslq %esi, %rax
176 ; CHECK-AVX512-NEXT: movq %rax, -16(%rdi)
177 ; CHECK-AVX512-NEXT: movq -16(%rdi), %rcx
178 ; CHECK-AVX512-NEXT: movq %rcx, (%rdi)
179 ; CHECK-AVX512-NEXT: movq -8(%rdi), %rcx
180 ; CHECK-AVX512-NEXT: movq %rcx, 8(%rdi)
181 ; CHECK-AVX512-NEXT: movq %rax, -8(%rdi)
182 ; CHECK-AVX512-NEXT: movl $7, -12(%rdi)
183 ; CHECK-AVX512-NEXT: movl -16(%rdi), %eax
184 ; CHECK-AVX512-NEXT: movl %eax, 16(%rdi)
185 ; CHECK-AVX512-NEXT: movl -12(%rdi), %eax
186 ; CHECK-AVX512-NEXT: movl %eax, 20(%rdi)
187 ; CHECK-AVX512-NEXT: movq -8(%rdi), %rax
188 ; CHECK-AVX512-NEXT: movq %rax, 24(%rdi)
189 ; CHECK-AVX512-NEXT: retq
191 %add.ptr = getelementptr inbounds i8, i8* %A, i64 -16
192 %conv = sext i32 %x to i64
193 %0 = bitcast i8* %add.ptr to i64*
194 store i64 %conv, i64* %0, align 8
195 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %A, i8* nonnull align 4 %add.ptr, i64 16, i1 false)
196 %add.ptr3 = getelementptr inbounds i8, i8* %A, i64 -8
197 %1 = bitcast i8* %add.ptr3 to i64*
198 store i64 %conv, i64* %1, align 8
199 %add.ptr4 = getelementptr inbounds i8, i8* %A, i64 -12
200 %2 = bitcast i8* %add.ptr4 to i32*
201 store i32 7, i32* %2, align 4
202 %add.ptr5 = getelementptr inbounds i8, i8* %A, i64 16
203 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 4 %add.ptr5, i8* nonnull align 4 %add.ptr, i64 16, i1 false)
207 ; Function Attrs: nounwind uwtable
208 define dso_local void @test_overlap_3(i8* nocapture %A, i32 %x) local_unnamed_addr #0 {
209 ; CHECK-LABEL: test_overlap_3:
210 ; CHECK: # %bb.0: # %entry
211 ; CHECK-NEXT: movl $7, -10(%rdi)
212 ; CHECK-NEXT: movl -16(%rdi), %eax
213 ; CHECK-NEXT: movl %eax, (%rdi)
214 ; CHECK-NEXT: movzwl -12(%rdi), %eax
215 ; CHECK-NEXT: movw %ax, 4(%rdi)
216 ; CHECK-NEXT: movl -10(%rdi), %eax
217 ; CHECK-NEXT: movl %eax, 6(%rdi)
218 ; CHECK-NEXT: movl -6(%rdi), %eax
219 ; CHECK-NEXT: movl %eax, 10(%rdi)
220 ; CHECK-NEXT: movzwl -2(%rdi), %eax
221 ; CHECK-NEXT: movw %ax, 14(%rdi)
222 ; CHECK-NEXT: movslq %esi, %rax
223 ; CHECK-NEXT: movq %rax, -9(%rdi)
224 ; CHECK-NEXT: movq %rax, -16(%rdi)
225 ; CHECK-NEXT: movb $0, -1(%rdi)
226 ; CHECK-NEXT: movq -16(%rdi), %rax
227 ; CHECK-NEXT: movq %rax, 16(%rdi)
228 ; CHECK-NEXT: movzwl -8(%rdi), %eax
229 ; CHECK-NEXT: movw %ax, 24(%rdi)
230 ; CHECK-NEXT: movl -6(%rdi), %eax
231 ; CHECK-NEXT: movl %eax, 26(%rdi)
232 ; CHECK-NEXT: movb -2(%rdi), %al
233 ; CHECK-NEXT: movb %al, 30(%rdi)
234 ; CHECK-NEXT: movb -1(%rdi), %al
235 ; CHECK-NEXT: movb %al, 31(%rdi)
238 ; DISABLED-LABEL: test_overlap_3:
239 ; DISABLED: # %bb.0: # %entry
240 ; DISABLED-NEXT: movl $7, -10(%rdi)
241 ; DISABLED-NEXT: movups -16(%rdi), %xmm0
242 ; DISABLED-NEXT: movups %xmm0, (%rdi)
243 ; DISABLED-NEXT: movslq %esi, %rax
244 ; DISABLED-NEXT: movq %rax, -9(%rdi)
245 ; DISABLED-NEXT: movq %rax, -16(%rdi)
246 ; DISABLED-NEXT: movb $0, -1(%rdi)
247 ; DISABLED-NEXT: movups -16(%rdi), %xmm0
248 ; DISABLED-NEXT: movups %xmm0, 16(%rdi)
249 ; DISABLED-NEXT: retq
251 ; CHECK-AVX2-LABEL: test_overlap_3:
252 ; CHECK-AVX2: # %bb.0: # %entry
253 ; CHECK-AVX2-NEXT: movl $7, -10(%rdi)
254 ; CHECK-AVX2-NEXT: movl -16(%rdi), %eax
255 ; CHECK-AVX2-NEXT: movl %eax, (%rdi)
256 ; CHECK-AVX2-NEXT: movzwl -12(%rdi), %eax
257 ; CHECK-AVX2-NEXT: movw %ax, 4(%rdi)
258 ; CHECK-AVX2-NEXT: movl -10(%rdi), %eax
259 ; CHECK-AVX2-NEXT: movl %eax, 6(%rdi)
260 ; CHECK-AVX2-NEXT: movl -6(%rdi), %eax
261 ; CHECK-AVX2-NEXT: movl %eax, 10(%rdi)
262 ; CHECK-AVX2-NEXT: movzwl -2(%rdi), %eax
263 ; CHECK-AVX2-NEXT: movw %ax, 14(%rdi)
264 ; CHECK-AVX2-NEXT: movslq %esi, %rax
265 ; CHECK-AVX2-NEXT: movq %rax, -9(%rdi)
266 ; CHECK-AVX2-NEXT: movq %rax, -16(%rdi)
267 ; CHECK-AVX2-NEXT: movb $0, -1(%rdi)
268 ; CHECK-AVX2-NEXT: movq -16(%rdi), %rax
269 ; CHECK-AVX2-NEXT: movq %rax, 16(%rdi)
270 ; CHECK-AVX2-NEXT: movzwl -8(%rdi), %eax
271 ; CHECK-AVX2-NEXT: movw %ax, 24(%rdi)
272 ; CHECK-AVX2-NEXT: movl -6(%rdi), %eax
273 ; CHECK-AVX2-NEXT: movl %eax, 26(%rdi)
274 ; CHECK-AVX2-NEXT: movb -2(%rdi), %al
275 ; CHECK-AVX2-NEXT: movb %al, 30(%rdi)
276 ; CHECK-AVX2-NEXT: movb -1(%rdi), %al
277 ; CHECK-AVX2-NEXT: movb %al, 31(%rdi)
278 ; CHECK-AVX2-NEXT: retq
280 ; CHECK-AVX512-LABEL: test_overlap_3:
281 ; CHECK-AVX512: # %bb.0: # %entry
282 ; CHECK-AVX512-NEXT: movl $7, -10(%rdi)
283 ; CHECK-AVX512-NEXT: movl -16(%rdi), %eax
284 ; CHECK-AVX512-NEXT: movl %eax, (%rdi)
285 ; CHECK-AVX512-NEXT: movzwl -12(%rdi), %eax
286 ; CHECK-AVX512-NEXT: movw %ax, 4(%rdi)
287 ; CHECK-AVX512-NEXT: movl -10(%rdi), %eax
288 ; CHECK-AVX512-NEXT: movl %eax, 6(%rdi)
289 ; CHECK-AVX512-NEXT: movl -6(%rdi), %eax
290 ; CHECK-AVX512-NEXT: movl %eax, 10(%rdi)
291 ; CHECK-AVX512-NEXT: movzwl -2(%rdi), %eax
292 ; CHECK-AVX512-NEXT: movw %ax, 14(%rdi)
293 ; CHECK-AVX512-NEXT: movslq %esi, %rax
294 ; CHECK-AVX512-NEXT: movq %rax, -9(%rdi)
295 ; CHECK-AVX512-NEXT: movq %rax, -16(%rdi)
296 ; CHECK-AVX512-NEXT: movb $0, -1(%rdi)
297 ; CHECK-AVX512-NEXT: movq -16(%rdi), %rax
298 ; CHECK-AVX512-NEXT: movq %rax, 16(%rdi)
299 ; CHECK-AVX512-NEXT: movzwl -8(%rdi), %eax
300 ; CHECK-AVX512-NEXT: movw %ax, 24(%rdi)
301 ; CHECK-AVX512-NEXT: movl -6(%rdi), %eax
302 ; CHECK-AVX512-NEXT: movl %eax, 26(%rdi)
303 ; CHECK-AVX512-NEXT: movb -2(%rdi), %al
304 ; CHECK-AVX512-NEXT: movb %al, 30(%rdi)
305 ; CHECK-AVX512-NEXT: movb -1(%rdi), %al
306 ; CHECK-AVX512-NEXT: movb %al, 31(%rdi)
307 ; CHECK-AVX512-NEXT: retq
309 %add.ptr = getelementptr inbounds i8, i8* %A, i64 -16
310 %add.ptr1 = getelementptr inbounds i8, i8* %A, i64 -10
311 %0 = bitcast i8* %add.ptr1 to i32*
312 store i32 7, i32* %0, align 4
313 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %A, i8* nonnull align 4 %add.ptr, i64 16, i1 false)
314 %conv = sext i32 %x to i64
315 %add.ptr2 = getelementptr inbounds i8, i8* %A, i64 -9
316 %1 = bitcast i8* %add.ptr2 to i64*
317 store i64 %conv, i64* %1, align 8
318 %2 = bitcast i8* %add.ptr to i64*
319 store i64 %conv, i64* %2, align 8
320 %add.ptr5 = getelementptr inbounds i8, i8* %A, i64 -1
321 store i8 0, i8* %add.ptr5, align 1
322 %add.ptr6 = getelementptr inbounds i8, i8* %A, i64 16
323 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 4 %add.ptr6, i8* nonnull align 4 %add.ptr, i64 16, i1 false)
327 ; Function Attrs: nounwind uwtable
328 define dso_local void @test_overlap_4(i8* nocapture %A, i32 %x) local_unnamed_addr #0 {
329 ; CHECK-LABEL: test_overlap_4:
330 ; CHECK: # %bb.0: # %entry
331 ; CHECK-NEXT: movups -16(%rdi), %xmm0
332 ; CHECK-NEXT: movups %xmm0, (%rdi)
333 ; CHECK-NEXT: movslq %esi, %rax
334 ; CHECK-NEXT: movq %rax, -8(%rdi)
335 ; CHECK-NEXT: movl %eax, -16(%rdi)
336 ; CHECK-NEXT: movl $0, -11(%rdi)
337 ; CHECK-NEXT: movl -16(%rdi), %eax
338 ; CHECK-NEXT: movl %eax, 16(%rdi)
339 ; CHECK-NEXT: movb -12(%rdi), %al
340 ; CHECK-NEXT: movb %al, 20(%rdi)
341 ; CHECK-NEXT: movl -11(%rdi), %eax
342 ; CHECK-NEXT: movl %eax, 21(%rdi)
343 ; CHECK-NEXT: movl -7(%rdi), %eax
344 ; CHECK-NEXT: movl %eax, 25(%rdi)
345 ; CHECK-NEXT: movzwl -3(%rdi), %eax
346 ; CHECK-NEXT: movw %ax, 29(%rdi)
347 ; CHECK-NEXT: movb -1(%rdi), %al
348 ; CHECK-NEXT: movb %al, 31(%rdi)
351 ; DISABLED-LABEL: test_overlap_4:
352 ; DISABLED: # %bb.0: # %entry
353 ; DISABLED-NEXT: movups -16(%rdi), %xmm0
354 ; DISABLED-NEXT: movups %xmm0, (%rdi)
355 ; DISABLED-NEXT: movslq %esi, %rax
356 ; DISABLED-NEXT: movq %rax, -8(%rdi)
357 ; DISABLED-NEXT: movl %eax, -16(%rdi)
358 ; DISABLED-NEXT: movl $0, -11(%rdi)
359 ; DISABLED-NEXT: movups -16(%rdi), %xmm0
360 ; DISABLED-NEXT: movups %xmm0, 16(%rdi)
361 ; DISABLED-NEXT: retq
363 ; CHECK-AVX2-LABEL: test_overlap_4:
364 ; CHECK-AVX2: # %bb.0: # %entry
365 ; CHECK-AVX2-NEXT: vmovups -16(%rdi), %xmm0
366 ; CHECK-AVX2-NEXT: vmovups %xmm0, (%rdi)
367 ; CHECK-AVX2-NEXT: movslq %esi, %rax
368 ; CHECK-AVX2-NEXT: movq %rax, -8(%rdi)
369 ; CHECK-AVX2-NEXT: movl %eax, -16(%rdi)
370 ; CHECK-AVX2-NEXT: movl $0, -11(%rdi)
371 ; CHECK-AVX2-NEXT: movl -16(%rdi), %eax
372 ; CHECK-AVX2-NEXT: movl %eax, 16(%rdi)
373 ; CHECK-AVX2-NEXT: movb -12(%rdi), %al
374 ; CHECK-AVX2-NEXT: movb %al, 20(%rdi)
375 ; CHECK-AVX2-NEXT: movl -11(%rdi), %eax
376 ; CHECK-AVX2-NEXT: movl %eax, 21(%rdi)
377 ; CHECK-AVX2-NEXT: movl -7(%rdi), %eax
378 ; CHECK-AVX2-NEXT: movl %eax, 25(%rdi)
379 ; CHECK-AVX2-NEXT: movzwl -3(%rdi), %eax
380 ; CHECK-AVX2-NEXT: movw %ax, 29(%rdi)
381 ; CHECK-AVX2-NEXT: movb -1(%rdi), %al
382 ; CHECK-AVX2-NEXT: movb %al, 31(%rdi)
383 ; CHECK-AVX2-NEXT: retq
385 ; CHECK-AVX512-LABEL: test_overlap_4:
386 ; CHECK-AVX512: # %bb.0: # %entry
387 ; CHECK-AVX512-NEXT: vmovups -16(%rdi), %xmm0
388 ; CHECK-AVX512-NEXT: vmovups %xmm0, (%rdi)
389 ; CHECK-AVX512-NEXT: movslq %esi, %rax
390 ; CHECK-AVX512-NEXT: movq %rax, -8(%rdi)
391 ; CHECK-AVX512-NEXT: movl %eax, -16(%rdi)
392 ; CHECK-AVX512-NEXT: movl $0, -11(%rdi)
393 ; CHECK-AVX512-NEXT: movl -16(%rdi), %eax
394 ; CHECK-AVX512-NEXT: movl %eax, 16(%rdi)
395 ; CHECK-AVX512-NEXT: movb -12(%rdi), %al
396 ; CHECK-AVX512-NEXT: movb %al, 20(%rdi)
397 ; CHECK-AVX512-NEXT: movl -11(%rdi), %eax
398 ; CHECK-AVX512-NEXT: movl %eax, 21(%rdi)
399 ; CHECK-AVX512-NEXT: movl -7(%rdi), %eax
400 ; CHECK-AVX512-NEXT: movl %eax, 25(%rdi)
401 ; CHECK-AVX512-NEXT: movzwl -3(%rdi), %eax
402 ; CHECK-AVX512-NEXT: movw %ax, 29(%rdi)
403 ; CHECK-AVX512-NEXT: movb -1(%rdi), %al
404 ; CHECK-AVX512-NEXT: movb %al, 31(%rdi)
405 ; CHECK-AVX512-NEXT: retq
407 %add.ptr = getelementptr inbounds i8, i8* %A, i64 -16
408 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %A, i8* nonnull align 4 %add.ptr, i64 16, i1 false)
409 %conv = sext i32 %x to i64
410 %add.ptr1 = getelementptr inbounds i8, i8* %A, i64 -8
411 %0 = bitcast i8* %add.ptr1 to i64*
412 store i64 %conv, i64* %0, align 8
413 %1 = bitcast i8* %add.ptr to i32*
414 store i32 %x, i32* %1, align 4
415 %add.ptr3 = getelementptr inbounds i8, i8* %A, i64 -11
416 %2 = bitcast i8* %add.ptr3 to i32*
417 store i32 0, i32* %2, align 4
418 %add.ptr4 = getelementptr inbounds i8, i8* %A, i64 16
419 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 4 %add.ptr4, i8* nonnull align 4 %add.ptr, i64 16, i1 false)
423 ; Function Attrs: nounwind uwtable
424 define dso_local void @test_overlap_5(i8* nocapture %A, i32 %x) local_unnamed_addr #0 {
425 ; CHECK-LABEL: test_overlap_5:
426 ; CHECK: # %bb.0: # %entry
427 ; CHECK-NEXT: movups -16(%rdi), %xmm0
428 ; CHECK-NEXT: movups %xmm0, (%rdi)
429 ; CHECK-NEXT: movslq %esi, %rax
430 ; CHECK-NEXT: movq %rax, -16(%rdi)
431 ; CHECK-NEXT: movb %al, -14(%rdi)
432 ; CHECK-NEXT: movb $0, -11(%rdi)
433 ; CHECK-NEXT: movzwl -16(%rdi), %eax
434 ; CHECK-NEXT: movw %ax, 16(%rdi)
435 ; CHECK-NEXT: movb -14(%rdi), %al
436 ; CHECK-NEXT: movb %al, 18(%rdi)
437 ; CHECK-NEXT: movzwl -13(%rdi), %eax
438 ; CHECK-NEXT: movw %ax, 19(%rdi)
439 ; CHECK-NEXT: movb -11(%rdi), %al
440 ; CHECK-NEXT: movb %al, 21(%rdi)
441 ; CHECK-NEXT: movq -10(%rdi), %rax
442 ; CHECK-NEXT: movq %rax, 22(%rdi)
443 ; CHECK-NEXT: movzwl -2(%rdi), %eax
444 ; CHECK-NEXT: movw %ax, 30(%rdi)
447 ; DISABLED-LABEL: test_overlap_5:
448 ; DISABLED: # %bb.0: # %entry
449 ; DISABLED-NEXT: movups -16(%rdi), %xmm0
450 ; DISABLED-NEXT: movups %xmm0, (%rdi)
451 ; DISABLED-NEXT: movslq %esi, %rax
452 ; DISABLED-NEXT: movq %rax, -16(%rdi)
453 ; DISABLED-NEXT: movb %al, -14(%rdi)
454 ; DISABLED-NEXT: movb $0, -11(%rdi)
455 ; DISABLED-NEXT: movups -16(%rdi), %xmm0
456 ; DISABLED-NEXT: movups %xmm0, 16(%rdi)
457 ; DISABLED-NEXT: retq
459 ; CHECK-AVX2-LABEL: test_overlap_5:
460 ; CHECK-AVX2: # %bb.0: # %entry
461 ; CHECK-AVX2-NEXT: vmovups -16(%rdi), %xmm0
462 ; CHECK-AVX2-NEXT: vmovups %xmm0, (%rdi)
463 ; CHECK-AVX2-NEXT: movslq %esi, %rax
464 ; CHECK-AVX2-NEXT: movq %rax, -16(%rdi)
465 ; CHECK-AVX2-NEXT: movb %al, -14(%rdi)
466 ; CHECK-AVX2-NEXT: movb $0, -11(%rdi)
467 ; CHECK-AVX2-NEXT: movzwl -16(%rdi), %eax
468 ; CHECK-AVX2-NEXT: movw %ax, 16(%rdi)
469 ; CHECK-AVX2-NEXT: movb -14(%rdi), %al
470 ; CHECK-AVX2-NEXT: movb %al, 18(%rdi)
471 ; CHECK-AVX2-NEXT: movzwl -13(%rdi), %eax
472 ; CHECK-AVX2-NEXT: movw %ax, 19(%rdi)
473 ; CHECK-AVX2-NEXT: movb -11(%rdi), %al
474 ; CHECK-AVX2-NEXT: movb %al, 21(%rdi)
475 ; CHECK-AVX2-NEXT: movq -10(%rdi), %rax
476 ; CHECK-AVX2-NEXT: movq %rax, 22(%rdi)
477 ; CHECK-AVX2-NEXT: movzwl -2(%rdi), %eax
478 ; CHECK-AVX2-NEXT: movw %ax, 30(%rdi)
479 ; CHECK-AVX2-NEXT: retq
481 ; CHECK-AVX512-LABEL: test_overlap_5:
482 ; CHECK-AVX512: # %bb.0: # %entry
483 ; CHECK-AVX512-NEXT: vmovups -16(%rdi), %xmm0
484 ; CHECK-AVX512-NEXT: vmovups %xmm0, (%rdi)
485 ; CHECK-AVX512-NEXT: movslq %esi, %rax
486 ; CHECK-AVX512-NEXT: movq %rax, -16(%rdi)
487 ; CHECK-AVX512-NEXT: movb %al, -14(%rdi)
488 ; CHECK-AVX512-NEXT: movb $0, -11(%rdi)
489 ; CHECK-AVX512-NEXT: movzwl -16(%rdi), %eax
490 ; CHECK-AVX512-NEXT: movw %ax, 16(%rdi)
491 ; CHECK-AVX512-NEXT: movb -14(%rdi), %al
492 ; CHECK-AVX512-NEXT: movb %al, 18(%rdi)
493 ; CHECK-AVX512-NEXT: movzwl -13(%rdi), %eax
494 ; CHECK-AVX512-NEXT: movw %ax, 19(%rdi)
495 ; CHECK-AVX512-NEXT: movb -11(%rdi), %al
496 ; CHECK-AVX512-NEXT: movb %al, 21(%rdi)
497 ; CHECK-AVX512-NEXT: movq -10(%rdi), %rax
498 ; CHECK-AVX512-NEXT: movq %rax, 22(%rdi)
499 ; CHECK-AVX512-NEXT: movzwl -2(%rdi), %eax
500 ; CHECK-AVX512-NEXT: movw %ax, 30(%rdi)
501 ; CHECK-AVX512-NEXT: retq
503 %add.ptr = getelementptr inbounds i8, i8* %A, i64 -16
504 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %A, i8* nonnull align 4 %add.ptr, i64 16, i1 false)
505 %conv = sext i32 %x to i64
506 %0 = bitcast i8* %add.ptr to i64*
507 store i64 %conv, i64* %0, align 8
508 %conv2 = trunc i32 %x to i8
509 %add.ptr3 = getelementptr inbounds i8, i8* %A, i64 -14
510 store i8 %conv2, i8* %add.ptr3, align 1
511 %add.ptr4 = getelementptr inbounds i8, i8* %A, i64 -11
512 store i8 0, i8* %add.ptr4, align 1
513 %add.ptr5 = getelementptr inbounds i8, i8* %A, i64 16
514 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 4 %add.ptr5, i8* nonnull align 4 %add.ptr, i64 16, i1 false)
518 attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
519 attributes #1 = { argmemonly nounwind }