1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple aarch64-unknown-linux-gnu | FileCheck %s --check-prefixes=CHECK-SD
3 ; RUN: llc -mtriple=aarch64 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK-GI
5 ; Tests for wider-than-legal extensions into mul/mla.
7 define <16 x i16> @mul_i16(<16 x i8> %a, <16 x i8> %b) {
8 ; CHECK-SD-LABEL: mul_i16:
9 ; CHECK-SD: // %bb.0: // %entry
10 ; CHECK-SD-NEXT: umull2 v2.8h, v0.16b, v1.16b
11 ; CHECK-SD-NEXT: umull v0.8h, v0.8b, v1.8b
12 ; CHECK-SD-NEXT: mov v1.16b, v2.16b
15 ; CHECK-GI-LABEL: mul_i16:
16 ; CHECK-GI: // %bb.0: // %entry
17 ; CHECK-GI-NEXT: umull v2.8h, v0.8b, v1.8b
18 ; CHECK-GI-NEXT: umull2 v1.8h, v0.16b, v1.16b
19 ; CHECK-GI-NEXT: mov v0.16b, v2.16b
22 %ea = zext <16 x i8> %a to <16 x i16>
23 %eb = zext <16 x i8> %b to <16 x i16>
24 %m = mul <16 x i16> %ea, %eb
28 define <16 x i32> @mul_i32(<16 x i8> %a, <16 x i8> %b) {
29 ; CHECK-SD-LABEL: mul_i32:
30 ; CHECK-SD: // %bb.0: // %entry
31 ; CHECK-SD-NEXT: umull v2.8h, v0.8b, v1.8b
32 ; CHECK-SD-NEXT: umull2 v4.8h, v0.16b, v1.16b
33 ; CHECK-SD-NEXT: ushll v0.4s, v2.4h, #0
34 ; CHECK-SD-NEXT: ushll2 v3.4s, v4.8h, #0
35 ; CHECK-SD-NEXT: ushll2 v1.4s, v2.8h, #0
36 ; CHECK-SD-NEXT: ushll v2.4s, v4.4h, #0
39 ; CHECK-GI-LABEL: mul_i32:
40 ; CHECK-GI: // %bb.0: // %entry
41 ; CHECK-GI-NEXT: ushll v2.8h, v0.8b, #0
42 ; CHECK-GI-NEXT: ushll v3.8h, v1.8b, #0
43 ; CHECK-GI-NEXT: ushll2 v4.8h, v0.16b, #0
44 ; CHECK-GI-NEXT: ushll2 v5.8h, v1.16b, #0
45 ; CHECK-GI-NEXT: umull v0.4s, v2.4h, v3.4h
46 ; CHECK-GI-NEXT: umull2 v1.4s, v2.8h, v3.8h
47 ; CHECK-GI-NEXT: umull v2.4s, v4.4h, v5.4h
48 ; CHECK-GI-NEXT: umull2 v3.4s, v4.8h, v5.8h
51 %ea = zext <16 x i8> %a to <16 x i32>
52 %eb = zext <16 x i8> %b to <16 x i32>
53 %m = mul <16 x i32> %ea, %eb
57 define <16 x i64> @mul_i64(<16 x i8> %a, <16 x i8> %b) {
58 ; CHECK-SD-LABEL: mul_i64:
59 ; CHECK-SD: // %bb.0: // %entry
60 ; CHECK-SD-NEXT: umull v2.8h, v0.8b, v1.8b
61 ; CHECK-SD-NEXT: umull2 v0.8h, v0.16b, v1.16b
62 ; CHECK-SD-NEXT: ushll v3.4s, v2.4h, #0
63 ; CHECK-SD-NEXT: ushll2 v2.4s, v2.8h, #0
64 ; CHECK-SD-NEXT: ushll v5.4s, v0.4h, #0
65 ; CHECK-SD-NEXT: ushll2 v6.4s, v0.8h, #0
66 ; CHECK-SD-NEXT: ushll2 v1.2d, v3.4s, #0
67 ; CHECK-SD-NEXT: ushll v0.2d, v3.2s, #0
68 ; CHECK-SD-NEXT: ushll2 v3.2d, v2.4s, #0
69 ; CHECK-SD-NEXT: ushll v2.2d, v2.2s, #0
70 ; CHECK-SD-NEXT: ushll v4.2d, v5.2s, #0
71 ; CHECK-SD-NEXT: ushll2 v7.2d, v6.4s, #0
72 ; CHECK-SD-NEXT: ushll2 v5.2d, v5.4s, #0
73 ; CHECK-SD-NEXT: ushll v6.2d, v6.2s, #0
76 ; CHECK-GI-LABEL: mul_i64:
77 ; CHECK-GI: // %bb.0: // %entry
78 ; CHECK-GI-NEXT: ushll v2.8h, v0.8b, #0
79 ; CHECK-GI-NEXT: ushll v3.8h, v1.8b, #0
80 ; CHECK-GI-NEXT: ushll2 v0.8h, v0.16b, #0
81 ; CHECK-GI-NEXT: ushll2 v1.8h, v1.16b, #0
82 ; CHECK-GI-NEXT: ushll v4.4s, v2.4h, #0
83 ; CHECK-GI-NEXT: ushll2 v5.4s, v2.8h, #0
84 ; CHECK-GI-NEXT: ushll v2.4s, v3.4h, #0
85 ; CHECK-GI-NEXT: ushll v6.4s, v0.4h, #0
86 ; CHECK-GI-NEXT: ushll2 v3.4s, v3.8h, #0
87 ; CHECK-GI-NEXT: ushll v7.4s, v1.4h, #0
88 ; CHECK-GI-NEXT: ushll2 v16.4s, v0.8h, #0
89 ; CHECK-GI-NEXT: ushll2 v17.4s, v1.8h, #0
90 ; CHECK-GI-NEXT: umull v0.2d, v4.2s, v2.2s
91 ; CHECK-GI-NEXT: umull2 v1.2d, v4.4s, v2.4s
92 ; CHECK-GI-NEXT: umull v2.2d, v5.2s, v3.2s
93 ; CHECK-GI-NEXT: umull2 v3.2d, v5.4s, v3.4s
94 ; CHECK-GI-NEXT: umull v4.2d, v6.2s, v7.2s
95 ; CHECK-GI-NEXT: umull2 v5.2d, v6.4s, v7.4s
96 ; CHECK-GI-NEXT: umull v6.2d, v16.2s, v17.2s
97 ; CHECK-GI-NEXT: umull2 v7.2d, v16.4s, v17.4s
100 %ea = zext <16 x i8> %a to <16 x i64>
101 %eb = zext <16 x i8> %b to <16 x i64>
102 %m = mul <16 x i64> %ea, %eb
107 define <16 x i16> @mla_i16(<16 x i8> %a, <16 x i8> %b, <16 x i16> %c) {
108 ; CHECK-SD-LABEL: mla_i16:
109 ; CHECK-SD: // %bb.0: // %entry
110 ; CHECK-SD-NEXT: umlal2 v3.8h, v0.16b, v1.16b
111 ; CHECK-SD-NEXT: umlal v2.8h, v0.8b, v1.8b
112 ; CHECK-SD-NEXT: mov v0.16b, v2.16b
113 ; CHECK-SD-NEXT: mov v1.16b, v3.16b
116 ; CHECK-GI-LABEL: mla_i16:
117 ; CHECK-GI: // %bb.0: // %entry
118 ; CHECK-GI-NEXT: umlal v2.8h, v0.8b, v1.8b
119 ; CHECK-GI-NEXT: umlal2 v3.8h, v0.16b, v1.16b
120 ; CHECK-GI-NEXT: mov v0.16b, v2.16b
121 ; CHECK-GI-NEXT: mov v1.16b, v3.16b
124 %ea = zext <16 x i8> %a to <16 x i16>
125 %eb = zext <16 x i8> %b to <16 x i16>
126 %m = mul <16 x i16> %ea, %eb
127 %d = add <16 x i16> %m, %c
131 define <16 x i32> @mla_i32(<16 x i8> %a, <16 x i8> %b, <16 x i32> %c) {
132 ; CHECK-SD-LABEL: mla_i32:
133 ; CHECK-SD: // %bb.0: // %entry
134 ; CHECK-SD-NEXT: umull2 v7.8h, v0.16b, v1.16b
135 ; CHECK-SD-NEXT: umull v6.8h, v0.8b, v1.8b
136 ; CHECK-SD-NEXT: uaddw2 v5.4s, v5.4s, v7.8h
137 ; CHECK-SD-NEXT: uaddw v0.4s, v2.4s, v6.4h
138 ; CHECK-SD-NEXT: uaddw2 v1.4s, v3.4s, v6.8h
139 ; CHECK-SD-NEXT: uaddw v2.4s, v4.4s, v7.4h
140 ; CHECK-SD-NEXT: mov v3.16b, v5.16b
143 ; CHECK-GI-LABEL: mla_i32:
144 ; CHECK-GI: // %bb.0: // %entry
145 ; CHECK-GI-NEXT: ushll v6.8h, v0.8b, #0
146 ; CHECK-GI-NEXT: ushll v7.8h, v1.8b, #0
147 ; CHECK-GI-NEXT: ushll2 v0.8h, v0.16b, #0
148 ; CHECK-GI-NEXT: ushll2 v1.8h, v1.16b, #0
149 ; CHECK-GI-NEXT: umlal v2.4s, v6.4h, v7.4h
150 ; CHECK-GI-NEXT: umlal2 v3.4s, v6.8h, v7.8h
151 ; CHECK-GI-NEXT: umlal v4.4s, v0.4h, v1.4h
152 ; CHECK-GI-NEXT: umlal2 v5.4s, v0.8h, v1.8h
153 ; CHECK-GI-NEXT: mov v0.16b, v2.16b
154 ; CHECK-GI-NEXT: mov v1.16b, v3.16b
155 ; CHECK-GI-NEXT: mov v2.16b, v4.16b
156 ; CHECK-GI-NEXT: mov v3.16b, v5.16b
159 %ea = zext <16 x i8> %a to <16 x i32>
160 %eb = zext <16 x i8> %b to <16 x i32>
161 %m = mul <16 x i32> %ea, %eb
162 %d = add <16 x i32> %m, %c
166 define <16 x i64> @mla_i64(<16 x i8> %a, <16 x i8> %b, <16 x i64> %c) {
167 ; CHECK-SD-LABEL: mla_i64:
168 ; CHECK-SD: // %bb.0: // %entry
169 ; CHECK-SD-NEXT: umull v16.8h, v0.8b, v1.8b
170 ; CHECK-SD-NEXT: umull2 v0.8h, v0.16b, v1.16b
171 ; CHECK-SD-NEXT: ldp q20, q21, [sp]
172 ; CHECK-SD-NEXT: ushll v17.4s, v16.4h, #0
173 ; CHECK-SD-NEXT: ushll2 v16.4s, v16.8h, #0
174 ; CHECK-SD-NEXT: ushll2 v19.4s, v0.8h, #0
175 ; CHECK-SD-NEXT: ushll v18.4s, v0.4h, #0
176 ; CHECK-SD-NEXT: uaddw2 v1.2d, v3.2d, v17.4s
177 ; CHECK-SD-NEXT: uaddw v0.2d, v2.2d, v17.2s
178 ; CHECK-SD-NEXT: uaddw2 v3.2d, v5.2d, v16.4s
179 ; CHECK-SD-NEXT: uaddw v2.2d, v4.2d, v16.2s
180 ; CHECK-SD-NEXT: uaddw2 v16.2d, v21.2d, v19.4s
181 ; CHECK-SD-NEXT: uaddw v4.2d, v6.2d, v18.2s
182 ; CHECK-SD-NEXT: uaddw2 v5.2d, v7.2d, v18.4s
183 ; CHECK-SD-NEXT: uaddw v6.2d, v20.2d, v19.2s
184 ; CHECK-SD-NEXT: mov v7.16b, v16.16b
187 ; CHECK-GI-LABEL: mla_i64:
188 ; CHECK-GI: // %bb.0: // %entry
189 ; CHECK-GI-NEXT: mov v16.16b, v2.16b
190 ; CHECK-GI-NEXT: mov v17.16b, v3.16b
191 ; CHECK-GI-NEXT: mov v2.16b, v4.16b
192 ; CHECK-GI-NEXT: mov v3.16b, v5.16b
193 ; CHECK-GI-NEXT: mov v4.16b, v6.16b
194 ; CHECK-GI-NEXT: mov v5.16b, v7.16b
195 ; CHECK-GI-NEXT: ushll v6.8h, v0.8b, #0
196 ; CHECK-GI-NEXT: ushll v7.8h, v1.8b, #0
197 ; CHECK-GI-NEXT: ushll2 v0.8h, v0.16b, #0
198 ; CHECK-GI-NEXT: ushll2 v1.8h, v1.16b, #0
199 ; CHECK-GI-NEXT: ushll v18.4s, v6.4h, #0
200 ; CHECK-GI-NEXT: ushll v20.4s, v7.4h, #0
201 ; CHECK-GI-NEXT: ushll2 v19.4s, v6.8h, #0
202 ; CHECK-GI-NEXT: ushll v21.4s, v0.4h, #0
203 ; CHECK-GI-NEXT: ushll2 v22.4s, v7.8h, #0
204 ; CHECK-GI-NEXT: ushll v23.4s, v1.4h, #0
205 ; CHECK-GI-NEXT: ldp q6, q7, [sp]
206 ; CHECK-GI-NEXT: ushll2 v0.4s, v0.8h, #0
207 ; CHECK-GI-NEXT: ushll2 v1.4s, v1.8h, #0
208 ; CHECK-GI-NEXT: umlal v16.2d, v18.2s, v20.2s
209 ; CHECK-GI-NEXT: umlal2 v17.2d, v18.4s, v20.4s
210 ; CHECK-GI-NEXT: umlal v2.2d, v19.2s, v22.2s
211 ; CHECK-GI-NEXT: umlal2 v3.2d, v19.4s, v22.4s
212 ; CHECK-GI-NEXT: umlal v4.2d, v21.2s, v23.2s
213 ; CHECK-GI-NEXT: umlal2 v5.2d, v21.4s, v23.4s
214 ; CHECK-GI-NEXT: umlal v6.2d, v0.2s, v1.2s
215 ; CHECK-GI-NEXT: umlal2 v7.2d, v0.4s, v1.4s
216 ; CHECK-GI-NEXT: mov v0.16b, v16.16b
217 ; CHECK-GI-NEXT: mov v1.16b, v17.16b
220 %ea = zext <16 x i8> %a to <16 x i64>
221 %eb = zext <16 x i8> %b to <16 x i64>
222 %m = mul <16 x i64> %ea, %eb
223 %d = add <16 x i64> %m, %c