1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -verify-machineinstrs -mattr=+mve.fp %s -o - | FileCheck %s
4 define arm_aapcs_vfpcc <4 x i32> @vaddqr_v4i32(<4 x i32> %src, i32 %src2, <4 x i32> %a, <4 x i32> %b) {
5 ; CHECK-LABEL: vaddqr_v4i32:
6 ; CHECK: @ %bb.0: @ %entry
7 ; CHECK-NEXT: vadd.i32 q0, q0, r0
10 %i = insertelement <4 x i32> undef, i32 %src2, i32 0
11 %sp = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer
12 %c = add <4 x i32> %src, %sp
16 define arm_aapcs_vfpcc <8 x i16> @vaddqr_v8i16(<8 x i16> %src, i16 %src2, <8 x i16> %a, <8 x i16> %b) {
17 ; CHECK-LABEL: vaddqr_v8i16:
18 ; CHECK: @ %bb.0: @ %entry
19 ; CHECK-NEXT: vadd.i16 q0, q0, r0
22 %i = insertelement <8 x i16> undef, i16 %src2, i32 0
23 %sp = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer
24 %c = add <8 x i16> %src, %sp
28 define arm_aapcs_vfpcc <16 x i8> @vaddqr_v16i8(<16 x i8> %src, i8 %src2, <16 x i8> %a, <16 x i8> %b) {
29 ; CHECK-LABEL: vaddqr_v16i8:
30 ; CHECK: @ %bb.0: @ %entry
31 ; CHECK-NEXT: vadd.i8 q0, q0, r0
34 %i = insertelement <16 x i8> undef, i8 %src2, i32 0
35 %sp = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer
36 %c = add <16 x i8> %src, %sp
40 define arm_aapcs_vfpcc <4 x i32> @vaddqr_v4i32_2(<4 x i32> %src, i32 %src2, <4 x i32> %a, <4 x i32> %b) {
41 ; CHECK-LABEL: vaddqr_v4i32_2:
42 ; CHECK: @ %bb.0: @ %entry
43 ; CHECK-NEXT: vadd.i32 q0, q0, r0
46 %i = insertelement <4 x i32> undef, i32 %src2, i32 0
47 %sp = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer
48 %c = add <4 x i32> %sp, %src
52 define arm_aapcs_vfpcc <8 x i16> @vaddqr_v8i16_2(<8 x i16> %src, i16 %src2, <8 x i16> %a, <8 x i16> %b) {
53 ; CHECK-LABEL: vaddqr_v8i16_2:
54 ; CHECK: @ %bb.0: @ %entry
55 ; CHECK-NEXT: vadd.i16 q0, q0, r0
58 %i = insertelement <8 x i16> undef, i16 %src2, i32 0
59 %sp = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer
60 %c = add <8 x i16> %sp, %src
64 define arm_aapcs_vfpcc <16 x i8> @vaddqr_v16i8_2(<16 x i8> %src, i8 %src2, <16 x i8> %a, <16 x i8> %b) {
65 ; CHECK-LABEL: vaddqr_v16i8_2:
66 ; CHECK: @ %bb.0: @ %entry
67 ; CHECK-NEXT: vadd.i8 q0, q0, r0
70 %i = insertelement <16 x i8> undef, i8 %src2, i32 0
71 %sp = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer
72 %c = add <16 x i8> %sp, %src
76 define arm_aapcs_vfpcc <4 x float> @vaddqr_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) {
77 ; CHECK-LABEL: vaddqr_v4f32:
78 ; CHECK: @ %bb.0: @ %entry
79 ; CHECK-NEXT: vmov r0, s4
80 ; CHECK-NEXT: vadd.f32 q0, q0, r0
83 %i = insertelement <4 x float> undef, float %src2, i32 0
84 %sp = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer
85 %c = fadd <4 x float> %src, %sp
89 define arm_aapcs_vfpcc <8 x half> @vaddqr_v8f16(<8 x half> %src, half %src2, <8 x half> %a, <8 x half> %b) {
90 ; CHECK-LABEL: vaddqr_v8f16:
91 ; CHECK: @ %bb.0: @ %entry
92 ; CHECK-NEXT: vmov.f16 r0, s4
93 ; CHECK-NEXT: vadd.f16 q0, q0, r0
96 %i = insertelement <8 x half> undef, half %src2, i32 0
97 %sp = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer
98 %c = fadd <8 x half> %src, %sp
102 define arm_aapcs_vfpcc <4 x float> @vaddqr_v4f32_2(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) {
103 ; CHECK-LABEL: vaddqr_v4f32_2:
104 ; CHECK: @ %bb.0: @ %entry
105 ; CHECK-NEXT: vmov r0, s4
106 ; CHECK-NEXT: vadd.f32 q0, q0, r0
109 %i = insertelement <4 x float> undef, float %src2, i32 0
110 %sp = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer
111 %c = fadd <4 x float> %sp, %src
115 define arm_aapcs_vfpcc <8 x half> @vaddqr_v8f16_2(<8 x half> %src, half %src2, <8 x half> %a, <8 x half> %b) {
116 ; CHECK-LABEL: vaddqr_v8f16_2:
117 ; CHECK: @ %bb.0: @ %entry
118 ; CHECK-NEXT: vmov.f16 r0, s4
119 ; CHECK-NEXT: vadd.f16 q0, q0, r0
122 %i = insertelement <8 x half> undef, half %src2, i32 0
123 %sp = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer
124 %c = fadd <8 x half> %sp, %src
128 define arm_aapcs_vfpcc <4 x float> @vaddqr_v4f32_3(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) {
129 ; CHECK-LABEL: vaddqr_v4f32_3:
130 ; CHECK: @ %bb.0: @ %entry
131 ; CHECK-NEXT: vmov r0, s4
132 ; CHECK-NEXT: vadd.f32 q0, q0, r0
135 %src2bc = bitcast float %src2 to i32
136 %i = insertelement <4 x i32> undef, i32 %src2bc, i32 0
137 %spbc = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer
138 %sp = bitcast <4 x i32> %spbc to <4 x float>
139 %c = fadd <4 x float> %src, %sp
143 define arm_aapcs_vfpcc <8 x half> @vaddqr_v8f16_3(<8 x half> %src, half %src2, <8 x half> %a, <8 x half> %b) {
144 ; CHECK-LABEL: vaddqr_v8f16_3:
145 ; CHECK: @ %bb.0: @ %entry
146 ; CHECK-NEXT: vmov.f16 r0, s4
147 ; CHECK-NEXT: vadd.f16 q0, q0, r0
150 %src2bc = bitcast half %src2 to i16
151 %i = insertelement <8 x i16> undef, i16 %src2bc, i32 0
152 %spbc = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer
153 %sp = bitcast <8 x i16> %spbc to <8 x half>
154 %c = fadd <8 x half> %src, %sp
158 define arm_aapcs_vfpcc <4 x float> @vaddqr_v4f32_4(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) {
159 ; CHECK-LABEL: vaddqr_v4f32_4:
160 ; CHECK: @ %bb.0: @ %entry
161 ; CHECK-NEXT: vmov r0, s4
162 ; CHECK-NEXT: vadd.f32 q0, q0, r0
165 %src2bc = bitcast float %src2 to i32
166 %i = insertelement <4 x i32> undef, i32 %src2bc, i32 0
167 %spbc = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer
168 %sp = bitcast <4 x i32> %spbc to <4 x float>
169 %c = fadd <4 x float> %sp, %src
173 define arm_aapcs_vfpcc <8 x half> @vaddqr_v8f16_4(<8 x half> %src, half %src2, <8 x half> %a, <8 x half> %b) {
174 ; CHECK-LABEL: vaddqr_v8f16_4:
175 ; CHECK: @ %bb.0: @ %entry
176 ; CHECK-NEXT: vmov.f16 r0, s4
177 ; CHECK-NEXT: vadd.f16 q0, q0, r0
180 %src2bc = bitcast half %src2 to i16
181 %i = insertelement <8 x i16> undef, i16 %src2bc, i32 0
182 %spbc = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer
183 %sp = bitcast <8 x i16> %spbc to <8 x half>
184 %c = fadd <8 x half> %sp, %src