1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s
4 %struct.s_int8_t = type { [16 x i8], [16 x i8] }
5 %struct.s_int16_t = type { [8 x i16], [8 x i16] }
6 %struct.s_int32_t = type { [4 x i32], [4 x i32] }
7 %struct.s_float16_t = type { [8 x half], [8 x half] }
8 %struct.s_float32_t = type { [4 x float], [4 x float] }
10 define hidden void @fwd_int8_t(ptr noalias %v) local_unnamed_addr #0 {
11 ; CHECK-LABEL: fwd_int8_t:
12 ; CHECK: @ %bb.0: @ %entry
13 ; CHECK-NEXT: vldrb.u8 q0, [r0]
14 ; CHECK-NEXT: vstrb.8 q0, [r0, #16]
17 %arrayidx3 = getelementptr inbounds %struct.s_int8_t, ptr %v, i32 0, i32 1, i32 0
18 %0 = load <16 x i8>, ptr %v, align 1
19 store <16 x i8> %0, ptr %arrayidx3, align 1
23 define hidden void @fwd_int16_t(ptr noalias nocapture %v) local_unnamed_addr #0 {
24 ; CHECK-LABEL: fwd_int16_t:
25 ; CHECK: @ %bb.0: @ %entry
26 ; CHECK-NEXT: vldrh.u16 q0, [r0]
27 ; CHECK-NEXT: vstrh.16 q0, [r0, #16]
30 %arrayidx3 = getelementptr inbounds %struct.s_int16_t, ptr %v, i32 0, i32 1, i32 0
31 %0 = load <8 x i16>, ptr %v, align 2
32 store <8 x i16> %0, ptr %arrayidx3, align 2
36 define hidden void @fwd_int32_t(ptr noalias nocapture %v) local_unnamed_addr #0 {
37 ; CHECK-LABEL: fwd_int32_t:
38 ; CHECK: @ %bb.0: @ %entry
39 ; CHECK-NEXT: vldrw.u32 q0, [r0]
40 ; CHECK-NEXT: vstrw.32 q0, [r0, #16]
43 %arrayidx3 = getelementptr inbounds %struct.s_int32_t, ptr %v, i32 0, i32 1, i32 0
44 %0 = load <4 x i32>, ptr %v, align 4
45 store <4 x i32> %0, ptr %arrayidx3, align 4
49 define hidden void @fwd_float16_t(ptr noalias nocapture %v) local_unnamed_addr #0 {
50 ; CHECK-LABEL: fwd_float16_t:
51 ; CHECK: @ %bb.0: @ %entry
52 ; CHECK-NEXT: vldrh.u16 q0, [r0], #16
53 ; CHECK-NEXT: vstrh.16 q0, [r0]
56 %arrayidx3 = getelementptr inbounds %struct.s_float16_t, ptr %v, i32 0, i32 1, i32 0
57 %0 = load <8 x half>, ptr %v, align 2
58 store <8 x half> %0, ptr %arrayidx3, align 2
62 define hidden void @fwd_float32_t(ptr noalias nocapture %v) local_unnamed_addr #0 {
63 ; CHECK-LABEL: fwd_float32_t:
64 ; CHECK: @ %bb.0: @ %entry
65 ; CHECK-NEXT: vldrw.u32 q0, [r0]
66 ; CHECK-NEXT: vstrw.32 q0, [r0, #16]
69 %d = getelementptr inbounds %struct.s_float32_t, ptr %v, i32 0, i32 1
70 %0 = load <4 x i32>, ptr %v, align 4
71 store <4 x i32> %0, ptr %d, align 4
75 define hidden void @bwd_int8_t(ptr noalias %v) local_unnamed_addr #0 {
76 ; CHECK-LABEL: bwd_int8_t:
77 ; CHECK: @ %bb.0: @ %for.end
78 ; CHECK-NEXT: vldrb.u8 q0, [r0]
79 ; CHECK-NEXT: vstrb.8 q0, [r0, #-16]
82 %0 = load <16 x i8>, ptr %v, align 1
83 %arrayidx3 = getelementptr inbounds %struct.s_int8_t, ptr %v, i32 -1, i32 1, i32 0
84 store <16 x i8> %0, ptr %arrayidx3, align 1
88 define hidden void @bwd_int16_t(ptr noalias nocapture %v) local_unnamed_addr #0 {
89 ; CHECK-LABEL: bwd_int16_t:
90 ; CHECK: @ %bb.0: @ %for.end
91 ; CHECK-NEXT: vldrh.u16 q0, [r0]
92 ; CHECK-NEXT: vstrh.16 q0, [r0, #-16]
95 %0 = load <8 x i16>, ptr %v, align 2
96 %arrayidx3 = getelementptr inbounds %struct.s_int16_t, ptr %v, i32 -1, i32 1, i32 0
97 store <8 x i16> %0, ptr %arrayidx3, align 2
101 define hidden void @bwd_int32_t(ptr noalias nocapture %v) local_unnamed_addr #0 {
102 ; CHECK-LABEL: bwd_int32_t:
103 ; CHECK: @ %bb.0: @ %for.end
104 ; CHECK-NEXT: vldrw.u32 q0, [r0]
105 ; CHECK-NEXT: vstrw.32 q0, [r0, #-16]
108 %0 = load <4 x i32>, ptr %v, align 4
109 %arrayidx3 = getelementptr inbounds %struct.s_int32_t, ptr %v, i32 -1, i32 1, i32 0
110 store <4 x i32> %0, ptr %arrayidx3, align 4
114 define hidden void @bwd_float16_t(ptr noalias nocapture %v) local_unnamed_addr #0 {
115 ; CHECK-LABEL: bwd_float16_t:
116 ; CHECK: @ %bb.0: @ %for.end
117 ; CHECK-NEXT: vldrh.u16 q0, [r0], #-16
118 ; CHECK-NEXT: vstrh.16 q0, [r0]
121 %0 = load <8 x half>, ptr %v, align 2
122 %arrayidx3 = getelementptr inbounds %struct.s_float16_t, ptr %v, i32 -1, i32 1, i32 0
123 store <8 x half> %0, ptr %arrayidx3, align 2
127 define hidden void @bwd_float32_t(ptr noalias nocapture %v) local_unnamed_addr #0 {
128 ; CHECK-LABEL: bwd_float32_t:
129 ; CHECK: @ %bb.0: @ %for.end
130 ; CHECK-NEXT: vldrw.u32 q0, [r0]
131 ; CHECK-NEXT: vstrw.32 q0, [r0, #-16]
134 %0 = load <4 x i32>, ptr %v, align 4
135 %d = getelementptr inbounds %struct.s_float32_t, ptr %v, i32 -1, i32 1
136 store <4 x i32> %0, ptr %d, align 4