1 ; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -mcpu=cyclone -mattr=+slow-misaligned-128store | FileCheck %s
2 %struct.X = type <{ i32, i64, i64 }>
4 define void @foo1(i32* %p, i64 %val) nounwind {
6 ; CHECK: stur w1, [x0, #-4]
8 %tmp1 = trunc i64 %val to i32
9 %ptr = getelementptr inbounds i32, i32* %p, i64 -1
10 store i32 %tmp1, i32* %ptr, align 4
13 define void @foo2(i16* %p, i64 %val) nounwind {
15 ; CHECK: sturh w1, [x0, #-2]
17 %tmp1 = trunc i64 %val to i16
18 %ptr = getelementptr inbounds i16, i16* %p, i64 -1
19 store i16 %tmp1, i16* %ptr, align 2
22 define void @foo3(i8* %p, i64 %val) nounwind {
24 ; CHECK: sturb w1, [x0, #-1]
26 %tmp1 = trunc i64 %val to i8
27 %ptr = getelementptr inbounds i8, i8* %p, i64 -1
28 store i8 %tmp1, i8* %ptr, align 1
31 define void @foo4(i16* %p, i32 %val) nounwind {
33 ; CHECK: sturh w1, [x0, #-2]
35 %tmp1 = trunc i32 %val to i16
36 %ptr = getelementptr inbounds i16, i16* %p, i32 -1
37 store i16 %tmp1, i16* %ptr, align 2
40 define void @foo5(i8* %p, i32 %val) nounwind {
42 ; CHECK: sturb w1, [x0, #-1]
44 %tmp1 = trunc i32 %val to i8
45 %ptr = getelementptr inbounds i8, i8* %p, i32 -1
46 store i8 %tmp1, i8* %ptr, align 1
50 define void @foo(%struct.X* nocapture %p) nounwind optsize ssp {
53 ; CHECK: stur xzr, [x0, #12]
54 ; CHECK-NEXT: stur xzr, [x0, #4]
56 %B = getelementptr inbounds %struct.X, %struct.X* %p, i64 0, i32 1
57 %val = bitcast i64* %B to i8*
58 call void @llvm.memset.p0i8.i64(i8* %val, i8 0, i64 16, i1 false)
62 declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind
64 ; Unaligned 16b stores are split into 8b stores for performance.
67 ; CHECK-LABEL: unaligned:
69 ; CHECK: str d[[REG:[0-9]+]], [x0]
70 ; CHECK: ext.16b v[[REG2:[0-9]+]], v[[REG]], v[[REG]], #8
71 ; CHECK: str d[[REG2]], [x0, #8]
72 define void @unaligned(<4 x i32>* %p, <4 x i32> %v) nounwind {
73 store <4 x i32> %v, <4 x i32>* %p, align 4
77 ; CHECK-LABEL: aligned:
79 define void @aligned(<4 x i32>* %p, <4 x i32> %v) nounwind {
80 store <4 x i32> %v, <4 x i32>* %p
84 ; Don't split one and two byte aligned stores.
87 ; CHECK-LABEL: twobytealign:
89 define void @twobytealign(<4 x i32>* %p, <4 x i32> %v) nounwind {
90 store <4 x i32> %v, <4 x i32>* %p, align 2
93 ; CHECK-LABEL: onebytealign:
95 define void @onebytealign(<4 x i32>* %p, <4 x i32> %v) nounwind {
96 store <4 x i32> %v, <4 x i32>* %p, align 1