1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LE
3 ; RUN: llc -mtriple=thumbebv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-BE
5 define void @foo_int8_int32(<4 x i8>* %dest, <4 x i32>* readonly %src, i32 %n) {
6 ; CHECK-LABEL: foo_int8_int32:
7 ; CHECK: @ %bb.0: @ %entry
8 ; CHECK-NEXT: vldrw.u32 q0, [r1]
9 ; CHECK-NEXT: vstrb.32 q0, [r0]
12 %wide.load = load <4 x i32>, <4 x i32>* %src, align 4
13 %0 = trunc <4 x i32> %wide.load to <4 x i8>
14 store <4 x i8> %0, <4 x i8>* %dest, align 1
18 define void @foo_int16_int32(<4 x i16>* %dest, <4 x i32>* readonly %src, i32 %n) {
19 ; CHECK-LABEL: foo_int16_int32:
20 ; CHECK: @ %bb.0: @ %entry
21 ; CHECK-NEXT: vldrw.u32 q0, [r1]
22 ; CHECK-NEXT: vstrh.32 q0, [r0]
25 %wide.load = load <4 x i32>, <4 x i32>* %src, align 4
26 %0 = trunc <4 x i32> %wide.load to <4 x i16>
27 store <4 x i16> %0, <4 x i16>* %dest, align 2
31 define void @foo_int8_int16(<8 x i8>* %dest, <8 x i16>* readonly %src, i32 %n) {
32 ; CHECK-LABEL: foo_int8_int16:
33 ; CHECK: @ %bb.0: @ %entry
34 ; CHECK-NEXT: vldrh.u16 q0, [r1]
35 ; CHECK-NEXT: vstrb.16 q0, [r0]
38 %wide.load = load <8 x i16>, <8 x i16>* %src, align 2
39 %0 = trunc <8 x i16> %wide.load to <8 x i8>
40 store <8 x i8> %0, <8 x i8>* %dest, align 1
44 define void @foo_int32_int8(<4 x i32>* %dest, <4 x i8>* readonly %src, i32 %n) {
45 ; CHECK-LABEL: foo_int32_int8:
46 ; CHECK: @ %bb.0: @ %entry
47 ; CHECK-NEXT: vldrb.s32 q0, [r1]
48 ; CHECK-NEXT: vstrw.32 q0, [r0]
51 %wide.load = load <4 x i8>, <4 x i8>* %src, align 1
52 %0 = sext <4 x i8> %wide.load to <4 x i32>
53 store <4 x i32> %0, <4 x i32>* %dest, align 4
57 define void @foo_int16_int8(<8 x i16>* %dest, <8 x i8>* readonly %src, i32 %n) {
58 ; CHECK-LABEL: foo_int16_int8:
59 ; CHECK: @ %bb.0: @ %entry
60 ; CHECK-NEXT: vldrb.s16 q0, [r1]
61 ; CHECK-NEXT: vstrh.16 q0, [r0]
64 %wide.load = load <8 x i8>, <8 x i8>* %src, align 1
65 %0 = sext <8 x i8> %wide.load to <8 x i16>
66 store <8 x i16> %0, <8 x i16>* %dest, align 2
70 define void @foo_int32_int16(<4 x i32>* %dest, <4 x i16>* readonly %src, i32 %n) {
71 ; CHECK-LABEL: foo_int32_int16:
72 ; CHECK: @ %bb.0: @ %entry
73 ; CHECK-NEXT: vldrh.s32 q0, [r1]
74 ; CHECK-NEXT: vstrw.32 q0, [r0]
77 %wide.load = load <4 x i16>, <4 x i16>* %src, align 2
78 %0 = sext <4 x i16> %wide.load to <4 x i32>
79 store <4 x i32> %0, <4 x i32>* %dest, align 4
83 define void @foo_uint32_uint8(<4 x i32>* %dest, <4 x i8>* readonly %src, i32 %n) {
84 ; CHECK-LABEL: foo_uint32_uint8:
85 ; CHECK: @ %bb.0: @ %entry
86 ; CHECK-NEXT: vldrb.u32 q0, [r1]
87 ; CHECK-NEXT: vstrw.32 q0, [r0]
90 %wide.load = load <4 x i8>, <4 x i8>* %src, align 1
91 %0 = zext <4 x i8> %wide.load to <4 x i32>
92 store <4 x i32> %0, <4 x i32>* %dest, align 4
96 define void @foo_uint16_uint8(<8 x i16>* %dest, <8 x i8>* readonly %src, i32 %n) {
97 ; CHECK-LABEL: foo_uint16_uint8:
98 ; CHECK: @ %bb.0: @ %entry
99 ; CHECK-NEXT: vldrb.u16 q0, [r1]
100 ; CHECK-NEXT: vstrh.16 q0, [r0]
103 %wide.load = load <8 x i8>, <8 x i8>* %src, align 1
104 %0 = zext <8 x i8> %wide.load to <8 x i16>
105 store <8 x i16> %0, <8 x i16>* %dest, align 2
109 define void @foo_uint32_uint16(<4 x i32>* %dest, <4 x i16>* readonly %src, i32 %n) {
110 ; CHECK-LABEL: foo_uint32_uint16:
111 ; CHECK: @ %bb.0: @ %entry
112 ; CHECK-NEXT: vldrh.u32 q0, [r1]
113 ; CHECK-NEXT: vstrw.32 q0, [r0]
116 %wide.load = load <4 x i16>, <4 x i16>* %src, align 2
117 %0 = zext <4 x i16> %wide.load to <4 x i32>
118 store <4 x i32> %0, <4 x i32>* %dest, align 4
125 define void @foo_int16_int32_align1(<4 x i16>* %dest, <4 x i32>* readonly %src, i32 %n) {
126 ; CHECK-LABEL: foo_int16_int32_align1:
127 ; CHECK: @ %bb.0: @ %entry
128 ; CHECK-NEXT: .pad #8
129 ; CHECK-NEXT: sub sp, #8
130 ; CHECK-NEXT: vldrw.u32 q0, [r1]
131 ; CHECK-NEXT: mov r1, sp
132 ; CHECK-NEXT: vstrh.32 q0, [r1]
133 ; CHECK-NEXT: ldrd r1, r2, [sp]
134 ; CHECK-NEXT: str r1, [r0]
135 ; CHECK-NEXT: str r2, [r0, #4]
136 ; CHECK-NEXT: add sp, #8
139 %wide.load = load <4 x i32>, <4 x i32>* %src, align 4
140 %0 = trunc <4 x i32> %wide.load to <4 x i16>
141 store <4 x i16> %0, <4 x i16>* %dest, align 1
145 define void @foo_int32_int16_align1(<4 x i32>* %dest, <4 x i16>* readonly %src, i32 %n) {
146 ; CHECK-LABEL: foo_int32_int16_align1:
147 ; CHECK: @ %bb.0: @ %entry
148 ; CHECK-NEXT: .pad #8
149 ; CHECK-NEXT: sub sp, #8
150 ; CHECK-NEXT: ldr r2, [r1]
151 ; CHECK-NEXT: ldr r1, [r1, #4]
152 ; CHECK-NEXT: strd r2, r1, [sp]
153 ; CHECK-NEXT: mov r1, sp
154 ; CHECK-NEXT: vldrh.s32 q0, [r1]
155 ; CHECK-NEXT: vstrw.32 q0, [r0]
156 ; CHECK-NEXT: add sp, #8
159 %wide.load = load <4 x i16>, <4 x i16>* %src, align 1
160 %0 = sext <4 x i16> %wide.load to <4 x i32>
161 store <4 x i32> %0, <4 x i32>* %dest, align 4
165 define void @foo_uint32_uint16_align1(<4 x i32>* %dest, <4 x i16>* readonly %src, i32 %n) {
166 ; CHECK-LABEL: foo_uint32_uint16_align1:
167 ; CHECK: @ %bb.0: @ %entry
168 ; CHECK-NEXT: .pad #8
169 ; CHECK-NEXT: sub sp, #8
170 ; CHECK-NEXT: ldr r2, [r1]
171 ; CHECK-NEXT: ldr r1, [r1, #4]
172 ; CHECK-NEXT: strd r2, r1, [sp]
173 ; CHECK-NEXT: mov r1, sp
174 ; CHECK-NEXT: vldrh.u32 q0, [r1]
175 ; CHECK-NEXT: vstrw.32 q0, [r0]
176 ; CHECK-NEXT: add sp, #8
179 %wide.load = load <4 x i16>, <4 x i16>* %src, align 1
180 %0 = zext <4 x i16> %wide.load to <4 x i32>
181 store <4 x i32> %0, <4 x i32>* %dest, align 4