1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
5 define <8 x i8> @mla8xi8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C) {
6 ; CHECK-LABEL: mla8xi8:
8 ; CHECK-NEXT: mla v2.8b, v0.8b, v1.8b
9 ; CHECK-NEXT: fmov d0, d2
11 %tmp1 = mul <8 x i8> %A, %B;
12 %tmp2 = add <8 x i8> %C, %tmp1;
16 define <16 x i8> @mla16xi8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) {
17 ; CHECK-LABEL: mla16xi8:
19 ; CHECK-NEXT: mla v2.16b, v0.16b, v1.16b
20 ; CHECK-NEXT: mov v0.16b, v2.16b
22 %tmp1 = mul <16 x i8> %A, %B;
23 %tmp2 = add <16 x i8> %C, %tmp1;
27 define <4 x i16> @mla4xi16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C) {
28 ; CHECK-LABEL: mla4xi16:
30 ; CHECK-NEXT: mla v2.4h, v0.4h, v1.4h
31 ; CHECK-NEXT: fmov d0, d2
33 %tmp1 = mul <4 x i16> %A, %B;
34 %tmp2 = add <4 x i16> %C, %tmp1;
38 define <8 x i16> @mla8xi16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C) {
39 ; CHECK-LABEL: mla8xi16:
41 ; CHECK-NEXT: mla v2.8h, v0.8h, v1.8h
42 ; CHECK-NEXT: mov v0.16b, v2.16b
44 %tmp1 = mul <8 x i16> %A, %B;
45 %tmp2 = add <8 x i16> %C, %tmp1;
49 define <2 x i32> @mla2xi32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C) {
50 ; CHECK-LABEL: mla2xi32:
52 ; CHECK-NEXT: mla v2.2s, v0.2s, v1.2s
53 ; CHECK-NEXT: fmov d0, d2
55 %tmp1 = mul <2 x i32> %A, %B;
56 %tmp2 = add <2 x i32> %C, %tmp1;
60 define <4 x i32> @mla4xi32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) {
61 ; CHECK-LABEL: mla4xi32:
63 ; CHECK-NEXT: mla v2.4s, v0.4s, v1.4s
64 ; CHECK-NEXT: mov v0.16b, v2.16b
66 %tmp1 = mul <4 x i32> %A, %B;
67 %tmp2 = add <4 x i32> %C, %tmp1;
71 define <8 x i8> @mls8xi8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C) {
72 ; CHECK-LABEL: mls8xi8:
74 ; CHECK-NEXT: mls v2.8b, v0.8b, v1.8b
75 ; CHECK-NEXT: fmov d0, d2
77 %tmp1 = mul <8 x i8> %A, %B;
78 %tmp2 = sub <8 x i8> %C, %tmp1;
82 define <16 x i8> @mls16xi8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) {
83 ; CHECK-LABEL: mls16xi8:
85 ; CHECK-NEXT: mls v2.16b, v0.16b, v1.16b
86 ; CHECK-NEXT: mov v0.16b, v2.16b
88 %tmp1 = mul <16 x i8> %A, %B;
89 %tmp2 = sub <16 x i8> %C, %tmp1;
93 define <4 x i16> @mls4xi16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C) {
94 ; CHECK-LABEL: mls4xi16:
96 ; CHECK-NEXT: mls v2.4h, v0.4h, v1.4h
97 ; CHECK-NEXT: fmov d0, d2
99 %tmp1 = mul <4 x i16> %A, %B;
100 %tmp2 = sub <4 x i16> %C, %tmp1;
104 define <8 x i16> @mls8xi16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C) {
105 ; CHECK-LABEL: mls8xi16:
107 ; CHECK-NEXT: mls v2.8h, v0.8h, v1.8h
108 ; CHECK-NEXT: mov v0.16b, v2.16b
110 %tmp1 = mul <8 x i16> %A, %B;
111 %tmp2 = sub <8 x i16> %C, %tmp1;
115 define <2 x i32> @mls2xi32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C) {
116 ; CHECK-LABEL: mls2xi32:
118 ; CHECK-NEXT: mls v2.2s, v0.2s, v1.2s
119 ; CHECK-NEXT: fmov d0, d2
121 %tmp1 = mul <2 x i32> %A, %B;
122 %tmp2 = sub <2 x i32> %C, %tmp1;
126 define <4 x i32> @mls4xi32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) {
127 ; CHECK-LABEL: mls4xi32:
129 ; CHECK-NEXT: mls v2.4s, v0.4s, v1.4s
130 ; CHECK-NEXT: mov v0.16b, v2.16b
132 %tmp1 = mul <4 x i32> %A, %B;
133 %tmp2 = sub <4 x i32> %C, %tmp1;
138 define <8 x i8> @mls2v8xi8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C) {
139 ; CHECK-LABEL: mls2v8xi8:
141 ; CHECK-NEXT: neg v2.8b, v2.8b
142 ; CHECK-NEXT: mla v2.8b, v0.8b, v1.8b
143 ; CHECK-NEXT: fmov d0, d2
145 %tmp1 = mul <8 x i8> %A, %B;
146 %tmp2 = sub <8 x i8> %tmp1, %C;
150 define <16 x i8> @mls2v16xi8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) {
151 ; CHECK-LABEL: mls2v16xi8:
153 ; CHECK-NEXT: neg v2.16b, v2.16b
154 ; CHECK-NEXT: mla v2.16b, v0.16b, v1.16b
155 ; CHECK-NEXT: mov v0.16b, v2.16b
157 %tmp1 = mul <16 x i8> %A, %B;
158 %tmp2 = sub <16 x i8> %tmp1, %C;
162 define <4 x i16> @mls2v4xi16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C) {
163 ; CHECK-LABEL: mls2v4xi16:
165 ; CHECK-NEXT: neg v2.4h, v2.4h
166 ; CHECK-NEXT: mla v2.4h, v0.4h, v1.4h
167 ; CHECK-NEXT: fmov d0, d2
169 %tmp1 = mul <4 x i16> %A, %B;
170 %tmp2 = sub <4 x i16> %tmp1, %C;
174 define <8 x i16> @mls2v8xi16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C) {
175 ; CHECK-LABEL: mls2v8xi16:
177 ; CHECK-NEXT: neg v2.8h, v2.8h
178 ; CHECK-NEXT: mla v2.8h, v0.8h, v1.8h
179 ; CHECK-NEXT: mov v0.16b, v2.16b
181 %tmp1 = mul <8 x i16> %A, %B;
182 %tmp2 = sub <8 x i16> %tmp1, %C;
186 define <2 x i32> @mls2v2xi32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C) {
187 ; CHECK-LABEL: mls2v2xi32:
189 ; CHECK-NEXT: neg v2.2s, v2.2s
190 ; CHECK-NEXT: mla v2.2s, v0.2s, v1.2s
191 ; CHECK-NEXT: fmov d0, d2
193 %tmp1 = mul <2 x i32> %A, %B;
194 %tmp2 = sub <2 x i32> %tmp1, %C;
198 define <4 x i32> @mls2v4xi32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) {
199 ; CHECK-LABEL: mls2v4xi32:
201 ; CHECK-NEXT: neg v2.4s, v2.4s
202 ; CHECK-NEXT: mla v2.4s, v0.4s, v1.4s
203 ; CHECK-NEXT: mov v0.16b, v2.16b
205 %tmp1 = mul <4 x i32> %A, %B;
206 %tmp2 = sub <4 x i32> %tmp1, %C;