[AMDGPU][True16][CodeGen] true16 codegen pattern for v_med3_u/i16 (#121850)
[llvm-project.git] / llvm / test / MC / AArch64 / neon-2velem.s
blobed55ad0b13633696e61c41e7421cdb9bae345f48
1 // RUN: llvm-mc -triple=arm64 -mattr=+neon,+fullfp16 -show-encoding < %s | FileCheck %s
3 // Check that the assembler can handle the documented syntax for AArch64
5 //------------------------------------------------------------------------------
6 // Instructions with 2 vectors and an element
7 //------------------------------------------------------------------------------
9 mla v0.2s, v1.2s, v2.s[2]
10 mla v0.2s, v1.2s, v22.s[2]
11 mla v3.4s, v8.4s, v2.s[1]
12 mla v3.4s, v8.4s, v22.s[3]
14 // CHECK: mla v0.2s, v1.2s, v2.s[2] // encoding: [0x20,0x08,0x82,0x2f]
15 // CHECK: mla v0.2s, v1.2s, v22.s[2] // encoding: [0x20,0x08,0x96,0x2f]
16 // CHECK: mla v3.4s, v8.4s, v2.s[1] // encoding: [0x03,0x01,0xa2,0x6f]
17 // CHECK: mla v3.4s, v8.4s, v22.s[3] // encoding: [0x03,0x09,0xb6,0x6f]
19 mla v0.4h, v1.4h, v2.h[2]
20 mla v0.4h, v1.4h, v15.h[2]
21 mla v0.8h, v1.8h, v2.h[7]
22 mla v0.8h, v1.8h, v14.h[6]
24 // CHECK: mla v0.4h, v1.4h, v2.h[2] // encoding: [0x20,0x00,0x62,0x2f]
25 // CHECK: mla v0.4h, v1.4h, v15.h[2] // encoding: [0x20,0x00,0x6f,0x2f]
26 // CHECK: mla v0.8h, v1.8h, v2.h[7] // encoding: [0x20,0x08,0x72,0x6f]
27 // CHECK: mla v0.8h, v1.8h, v14.h[6] // encoding: [0x20,0x08,0x6e,0x6f]
29 mls v0.2s, v1.2s, v2.s[2]
30 mls v0.2s, v1.2s, v22.s[2]
31 mls v3.4s, v8.4s, v2.s[1]
32 mls v3.4s, v8.4s, v22.s[3]
34 // CHECK: mls v0.2s, v1.2s, v2.s[2] // encoding: [0x20,0x48,0x82,0x2f]
35 // CHECK: mls v0.2s, v1.2s, v22.s[2] // encoding: [0x20,0x48,0x96,0x2f]
36 // CHECK: mls v3.4s, v8.4s, v2.s[1] // encoding: [0x03,0x41,0xa2,0x6f]
37 // CHECK: mls v3.4s, v8.4s, v22.s[3] // encoding: [0x03,0x49,0xb6,0x6f]
39 mls v0.4h, v1.4h, v2.h[2]
40 mls v0.4h, v1.4h, v15.h[2]
41 mls v0.8h, v1.8h, v2.h[7]
42 mls v0.8h, v1.8h, v14.h[6]
44 // CHECK: mls v0.4h, v1.4h, v2.h[2] // encoding: [0x20,0x40,0x62,0x2f]
45 // CHECK: mls v0.4h, v1.4h, v15.h[2] // encoding: [0x20,0x40,0x6f,0x2f]
46 // CHECK: mls v0.8h, v1.8h, v2.h[7] // encoding: [0x20,0x48,0x72,0x6f]
47 // CHECK: mls v0.8h, v1.8h, v14.h[6] // encoding: [0x20,0x48,0x6e,0x6f]
49 fmla v0.4h, v1.4h, v2.h[2]
50 fmla v3.8h, v8.8h, v2.h[1]
51 fmla v0.2s, v1.2s, v2.s[2]
52 fmla v0.2s, v1.2s, v22.s[2]
53 fmla v3.4s, v8.4s, v2.s[1]
54 fmla v3.4s, v8.4s, v22.s[3]
55 fmla v0.2d, v1.2d, v2.d[1]
56 fmla v0.2d, v1.2d, v22.d[1]
58 // CHECK: fmla v0.4h, v1.4h, v2.h[2] // encoding: [0x20,0x10,0x22,0x0f]
59 // CHECK: fmla v3.8h, v8.8h, v2.h[1] // encoding: [0x03,0x11,0x12,0x4f]
60 // CHECK: fmla v0.2s, v1.2s, v2.s[2] // encoding: [0x20,0x18,0x82,0x0f]
61 // CHECK: fmla v0.2s, v1.2s, v22.s[2] // encoding: [0x20,0x18,0x96,0x0f]
62 // CHECK: fmla v3.4s, v8.4s, v2.s[1] // encoding: [0x03,0x11,0xa2,0x4f]
63 // CHECK: fmla v3.4s, v8.4s, v22.s[3] // encoding: [0x03,0x19,0xb6,0x4f]
64 // CHECK: fmla v0.2d, v1.2d, v2.d[1] // encoding: [0x20,0x18,0xc2,0x4f]
65 // CHECK: fmla v0.2d, v1.2d, v22.d[1] // encoding: [0x20,0x18,0xd6,0x4f]
67 fmls v0.4h, v1.4h, v2.h[2]
68 fmls v3.8h, v8.8h, v2.h[1]
69 fmls v0.2s, v1.2s, v2.s[2]
70 fmls v0.2s, v1.2s, v22.s[2]
71 fmls v3.4s, v8.4s, v2.s[1]
72 fmls v3.4s, v8.4s, v22.s[3]
73 fmls v0.2d, v1.2d, v2.d[1]
74 fmls v0.2d, v1.2d, v22.d[1]
76 // CHECK: fmls v0.4h, v1.4h, v2.h[2] // encoding: [0x20,0x50,0x22,0x0f]
77 // CHECK: fmls v3.8h, v8.8h, v2.h[1] // encoding: [0x03,0x51,0x12,0x4f]
78 // CHECK: fmls v0.2s, v1.2s, v2.s[2] // encoding: [0x20,0x58,0x82,0x0f]
79 // CHECK: fmls v0.2s, v1.2s, v22.s[2] // encoding: [0x20,0x58,0x96,0x0f]
80 // CHECK: fmls v3.4s, v8.4s, v2.s[1] // encoding: [0x03,0x51,0xa2,0x4f]
81 // CHECK: fmls v3.4s, v8.4s, v22.s[3] // encoding: [0x03,0x59,0xb6,0x4f]
82 // CHECK: fmls v0.2d, v1.2d, v2.d[1] // encoding: [0x20,0x58,0xc2,0x4f]
83 // CHECK: fmls v0.2d, v1.2d, v22.d[1] // encoding: [0x20,0x58,0xd6,0x4f]
85 smlal v0.4s, v1.4h, v2.h[2]
86 smlal v0.2d, v1.2s, v2.s[2]
87 smlal v0.2d, v1.2s, v22.s[2]
88 smlal2 v0.4s, v1.8h, v1.h[2]
89 smlal2 v0.2d, v1.4s, v1.s[2]
90 smlal2 v0.2d, v1.4s, v22.s[2]
92 // CHECK: smlal v0.4s, v1.4h, v2.h[2] // encoding: [0x20,0x20,0x62,0x0f]
93 // CHECK: smlal v0.2d, v1.2s, v2.s[2] // encoding: [0x20,0x28,0x82,0x0f]
94 // CHECK: smlal v0.2d, v1.2s, v22.s[2] // encoding: [0x20,0x28,0x96,0x0f]
95 // CHECK: smlal2 v0.4s, v1.8h, v1.h[2] // encoding: [0x20,0x20,0x61,0x4f]
96 // CHECK: smlal2 v0.2d, v1.4s, v1.s[2] // encoding: [0x20,0x28,0x81,0x4f]
97 // CHECK: smlal2 v0.2d, v1.4s, v22.s[2] // encoding: [0x20,0x28,0x96,0x4f]
99 smlsl v0.4s, v1.4h, v2.h[2]
100 smlsl v0.2d, v1.2s, v2.s[2]
101 smlsl v0.2d, v1.2s, v22.s[2]
102 smlsl2 v0.4s, v1.8h, v1.h[2]
103 smlsl2 v0.2d, v1.4s, v1.s[2]
104 smlsl2 v0.2d, v1.4s, v22.s[2]
106 // CHECK: smlsl v0.4s, v1.4h, v2.h[2] // encoding: [0x20,0x60,0x62,0x0f]
107 // CHECK: smlsl v0.2d, v1.2s, v2.s[2] // encoding: [0x20,0x68,0x82,0x0f]
108 // CHECK: smlsl v0.2d, v1.2s, v22.s[2] // encoding: [0x20,0x68,0x96,0x0f]
109 // CHECK: smlsl2 v0.4s, v1.8h, v1.h[2] // encoding: [0x20,0x60,0x61,0x4f]
110 // CHECK: smlsl2 v0.2d, v1.4s, v1.s[2] // encoding: [0x20,0x68,0x81,0x4f]
111 // CHECK: smlsl2 v0.2d, v1.4s, v22.s[2] // encoding: [0x20,0x68,0x96,0x4f]
113 sqdmlal v0.4s, v1.4h, v2.h[2]
114 sqdmlal v0.2d, v1.2s, v2.s[2]
115 sqdmlal v0.2d, v1.2s, v22.s[2]
116 sqdmlal2 v0.4s, v1.8h, v1.h[2]
117 sqdmlal2 v0.2d, v1.4s, v1.s[2]
118 sqdmlal2 v0.2d, v1.4s, v22.s[2]
120 // CHECK: sqdmlal v0.4s, v1.4h, v2.h[2] // encoding: [0x20,0x30,0x62,0x0f]
121 // CHECK: sqdmlal v0.2d, v1.2s, v2.s[2] // encoding: [0x20,0x38,0x82,0x0f]
122 // CHECK: sqdmlal v0.2d, v1.2s, v22.s[2] // encoding: [0x20,0x38,0x96,0x0f]
123 // CHECK: sqdmlal2 v0.4s, v1.8h, v1.h[2] // encoding: [0x20,0x30,0x61,0x4f]
124 // CHECK: sqdmlal2 v0.2d, v1.4s, v1.s[2] // encoding: [0x20,0x38,0x81,0x4f]
125 // CHECK: sqdmlal2 v0.2d, v1.4s, v22.s[2] // encoding: [0x20,0x38,0x96,0x4f]
127 umlal v0.4s, v1.4h, v2.h[2]
128 umlal v0.2d, v1.2s, v2.s[2]
129 umlal v0.2d, v1.2s, v22.s[2]
130 umlal2 v0.4s, v1.8h, v1.h[2]
131 umlal2 v0.2d, v1.4s, v1.s[2]
132 umlal2 v0.2d, v1.4s, v22.s[2]
134 // CHECK: umlal v0.4s, v1.4h, v2.h[2] // encoding: [0x20,0x20,0x62,0x2f]
135 // CHECK: umlal v0.2d, v1.2s, v2.s[2] // encoding: [0x20,0x28,0x82,0x2f]
136 // CHECK: umlal v0.2d, v1.2s, v22.s[2] // encoding: [0x20,0x28,0x96,0x2f]
137 // CHECK: umlal2 v0.4s, v1.8h, v1.h[2] // encoding: [0x20,0x20,0x61,0x6f]
138 // CHECK: umlal2 v0.2d, v1.4s, v1.s[2] // encoding: [0x20,0x28,0x81,0x6f]
139 // CHECK: umlal2 v0.2d, v1.4s, v22.s[2] // encoding: [0x20,0x28,0x96,0x6f]
141 umlsl v0.4s, v1.4h, v2.h[2]
142 umlsl v0.2d, v1.2s, v2.s[2]
143 umlsl v0.2d, v1.2s, v22.s[2]
144 umlsl2 v0.4s, v1.8h, v1.h[2]
145 umlsl2 v0.2d, v1.4s, v1.s[2]
146 umlsl2 v0.2d, v1.4s, v22.s[2]
148 // CHECK: umlsl v0.4s, v1.4h, v2.h[2] // encoding: [0x20,0x60,0x62,0x2f]
149 // CHECK: umlsl v0.2d, v1.2s, v2.s[2] // encoding: [0x20,0x68,0x82,0x2f]
150 // CHECK: umlsl v0.2d, v1.2s, v22.s[2] // encoding: [0x20,0x68,0x96,0x2f]
151 // CHECK: umlsl2 v0.4s, v1.8h, v1.h[2] // encoding: [0x20,0x60,0x61,0x6f]
152 // CHECK: umlsl2 v0.2d, v1.4s, v1.s[2] // encoding: [0x20,0x68,0x81,0x6f]
153 // CHECK: umlsl2 v0.2d, v1.4s, v22.s[2] // encoding: [0x20,0x68,0x96,0x6f]
155 sqdmlsl v0.4s, v1.4h, v2.h[2]
156 sqdmlsl v0.2d, v1.2s, v2.s[2]
157 sqdmlsl v0.2d, v1.2s, v22.s[2]
158 sqdmlsl2 v0.4s, v1.8h, v1.h[2]
159 sqdmlsl2 v0.2d, v1.4s, v1.s[2]
160 sqdmlsl2 v0.2d, v1.4s, v22.s[2]
162 // CHECK: sqdmlsl v0.4s, v1.4h, v2.h[2] // encoding: [0x20,0x70,0x62,0x0f]
163 // CHECK: sqdmlsl v0.2d, v1.2s, v2.s[2] // encoding: [0x20,0x78,0x82,0x0f]
164 // CHECK: sqdmlsl v0.2d, v1.2s, v22.s[2] // encoding: [0x20,0x78,0x96,0x0f]
165 // CHECK: sqdmlsl2 v0.4s, v1.8h, v1.h[2] // encoding: [0x20,0x70,0x61,0x4f]
166 // CHECK: sqdmlsl2 v0.2d, v1.4s, v1.s[2] // encoding: [0x20,0x78,0x81,0x4f]
167 // CHECK: sqdmlsl2 v0.2d, v1.4s, v22.s[2] // encoding: [0x20,0x78,0x96,0x4f]
169 mul v0.4h, v1.4h, v2.h[2]
170 mul v0.8h, v1.8h, v2.h[2]
171 mul v0.2s, v1.2s, v2.s[2]
172 mul v0.2s, v1.2s, v22.s[2]
173 mul v0.4s, v1.4s, v2.s[2]
174 mul v0.4s, v1.4s, v22.s[2]
176 // CHECK: mul v0.4h, v1.4h, v2.h[2] // encoding: [0x20,0x80,0x62,0x0f]
177 // CHECK: mul v0.8h, v1.8h, v2.h[2] // encoding: [0x20,0x80,0x62,0x4f]
178 // CHECK: mul v0.2s, v1.2s, v2.s[2] // encoding: [0x20,0x88,0x82,0x0f]
179 // CHECK: mul v0.2s, v1.2s, v22.s[2] // encoding: [0x20,0x88,0x96,0x0f]
180 // CHECK: mul v0.4s, v1.4s, v2.s[2] // encoding: [0x20,0x88,0x82,0x4f]
181 // CHECK: mul v0.4s, v1.4s, v22.s[2] // encoding: [0x20,0x88,0x96,0x4f]
183 fmul v0.4h, v1.4h, v2.h[2]
184 fmul v0.8h, v1.8h, v2.h[2]
185 fmul v0.2s, v1.2s, v2.s[2]
186 fmul v0.2s, v1.2s, v22.s[2]
187 fmul v0.4s, v1.4s, v2.s[2]
188 fmul v0.4s, v1.4s, v22.s[2]
189 fmul v0.2d, v1.2d, v2.d[1]
190 fmul v0.2d, v1.2d, v22.d[1]
192 // CHECK: fmul v0.4h, v1.4h, v2.h[2] // encoding: [0x20,0x90,0x22,0x0f]
193 // CHECK: fmul v0.8h, v1.8h, v2.h[2] // encoding: [0x20,0x90,0x22,0x4f]
194 // CHECK: fmul v0.2s, v1.2s, v2.s[2] // encoding: [0x20,0x98,0x82,0x0f]
195 // CHECK: fmul v0.2s, v1.2s, v22.s[2] // encoding: [0x20,0x98,0x96,0x0f]
196 // CHECK: fmul v0.4s, v1.4s, v2.s[2] // encoding: [0x20,0x98,0x82,0x4f]
197 // CHECK: fmul v0.4s, v1.4s, v22.s[2] // encoding: [0x20,0x98,0x96,0x4f]
198 // CHECK: fmul v0.2d, v1.2d, v2.d[1] // encoding: [0x20,0x98,0xc2,0x4f]
199 // CHECK: fmul v0.2d, v1.2d, v22.d[1] // encoding: [0x20,0x98,0xd6,0x4f]
201 fmulx v0.4h, v1.4h, v2.h[2]
202 fmulx v0.8h, v1.8h, v2.h[2]
203 fmulx v0.2s, v1.2s, v2.s[2]
204 fmulx v0.2s, v1.2s, v22.s[2]
205 fmulx v0.4s, v1.4s, v2.s[2]
206 fmulx v0.4s, v1.4s, v22.s[2]
207 fmulx v0.2d, v1.2d, v2.d[1]
208 fmulx v0.2d, v1.2d, v22.d[1]
210 // CHECK: fmulx v0.4h, v1.4h, v2.h[2] // encoding: [0x20,0x90,0x22,0x2f]
211 // CHECK: fmulx v0.8h, v1.8h, v2.h[2] // encoding: [0x20,0x90,0x22,0x6f]
212 // CHECK: fmulx v0.2s, v1.2s, v2.s[2] // encoding: [0x20,0x98,0x82,0x2f]
213 // CHECK: fmulx v0.2s, v1.2s, v22.s[2] // encoding: [0x20,0x98,0x96,0x2f]
214 // CHECK: fmulx v0.4s, v1.4s, v2.s[2] // encoding: [0x20,0x98,0x82,0x6f]
215 // CHECK: fmulx v0.4s, v1.4s, v22.s[2] // encoding: [0x20,0x98,0x96,0x6f]
216 // CHECK: fmulx v0.2d, v1.2d, v2.d[1] // encoding: [0x20,0x98,0xc2,0x6f]
217 // CHECK: fmulx v0.2d, v1.2d, v22.d[1] // encoding: [0x20,0x98,0xd6,0x6f]
219 smull v0.4s, v1.4h, v2.h[2]
220 smull v0.2d, v1.2s, v2.s[2]
221 smull v0.2d, v1.2s, v22.s[2]
222 smull2 v0.4s, v1.8h, v2.h[2]
223 smull2 v0.2d, v1.4s, v2.s[2]
224 smull2 v0.2d, v1.4s, v22.s[2]
226 // CHECK: smull v0.4s, v1.4h, v2.h[2] // encoding: [0x20,0xa0,0x62,0x0f]
227 // CHECK: smull v0.2d, v1.2s, v2.s[2] // encoding: [0x20,0xa8,0x82,0x0f]
228 // CHECK: smull v0.2d, v1.2s, v22.s[2] // encoding: [0x20,0xa8,0x96,0x0f]
229 // CHECK: smull2 v0.4s, v1.8h, v2.h[2] // encoding: [0x20,0xa0,0x62,0x4f]
230 // CHECK: smull2 v0.2d, v1.4s, v2.s[2] // encoding: [0x20,0xa8,0x82,0x4f]
231 // CHECK: smull2 v0.2d, v1.4s, v22.s[2] // encoding: [0x20,0xa8,0x96,0x4f]
233 umull v0.4s, v1.4h, v2.h[2]
234 umull v0.2d, v1.2s, v2.s[2]
235 umull v0.2d, v1.2s, v22.s[2]
236 umull2 v0.4s, v1.8h, v2.h[2]
237 umull2 v0.2d, v1.4s, v2.s[2]
238 umull2 v0.2d, v1.4s, v22.s[2]
240 // CHECK: umull v0.4s, v1.4h, v2.h[2] // encoding: [0x20,0xa0,0x62,0x2f]
241 // CHECK: umull v0.2d, v1.2s, v2.s[2] // encoding: [0x20,0xa8,0x82,0x2f]
242 // CHECK: umull v0.2d, v1.2s, v22.s[2] // encoding: [0x20,0xa8,0x96,0x2f]
243 // CHECK: umull2 v0.4s, v1.8h, v2.h[2] // encoding: [0x20,0xa0,0x62,0x6f]
244 // CHECK: umull2 v0.2d, v1.4s, v2.s[2] // encoding: [0x20,0xa8,0x82,0x6f]
245 // CHECK: umull2 v0.2d, v1.4s, v22.s[2] // encoding: [0x20,0xa8,0x96,0x6f]
247 sqdmull v0.4s, v1.4h, v2.h[2]
248 sqdmull v0.2d, v1.2s, v2.s[2]
249 sqdmull v0.2d, v1.2s, v22.s[2]
250 sqdmull2 v0.4s, v1.8h, v2.h[2]
251 sqdmull2 v0.2d, v1.4s, v2.s[2]
252 sqdmull2 v0.2d, v1.4s, v22.s[2]
254 // CHECK: sqdmull v0.4s, v1.4h, v2.h[2] // encoding: [0x20,0xb0,0x62,0x0f]
255 // CHECK: sqdmull v0.2d, v1.2s, v2.s[2] // encoding: [0x20,0xb8,0x82,0x0f]
256 // CHECK: sqdmull v0.2d, v1.2s, v22.s[2] // encoding: [0x20,0xb8,0x96,0x0f]
257 // CHECK: sqdmull2 v0.4s, v1.8h, v2.h[2] // encoding: [0x20,0xb0,0x62,0x4f]
258 // CHECK: sqdmull2 v0.2d, v1.4s, v2.s[2] // encoding: [0x20,0xb8,0x82,0x4f]
259 // CHECK: sqdmull2 v0.2d, v1.4s, v22.s[2] // encoding: [0x20,0xb8,0x96,0x4f]
261 sqdmulh v0.4h, v1.4h, v2.h[2]
262 sqdmulh v0.8h, v1.8h, v2.h[2]
263 sqdmulh v0.2s, v1.2s, v2.s[2]
264 sqdmulh v0.2s, v1.2s, v22.s[2]
265 sqdmulh v0.4s, v1.4s, v2.s[2]
266 sqdmulh v0.4s, v1.4s, v22.s[2]
268 // CHECK: sqdmulh v0.4h, v1.4h, v2.h[2] // encoding: [0x20,0xc0,0x62,0x0f]
269 // CHECK: sqdmulh v0.8h, v1.8h, v2.h[2] // encoding: [0x20,0xc0,0x62,0x4f]
270 // CHECK: sqdmulh v0.2s, v1.2s, v2.s[2] // encoding: [0x20,0xc8,0x82,0x0f]
271 // CHECK: sqdmulh v0.2s, v1.2s, v22.s[2] // encoding: [0x20,0xc8,0x96,0x0f]
272 // CHECK: sqdmulh v0.4s, v1.4s, v2.s[2] // encoding: [0x20,0xc8,0x82,0x4f]
273 // CHECK: sqdmulh v0.4s, v1.4s, v22.s[2] // encoding: [0x20,0xc8,0x96,0x4f]
275 sqrdmulh v0.4h, v1.4h, v2.h[2]
276 sqrdmulh v0.8h, v1.8h, v2.h[2]
277 sqrdmulh v0.2s, v1.2s, v2.s[2]
278 sqrdmulh v0.2s, v1.2s, v22.s[2]
279 sqrdmulh v0.4s, v1.4s, v2.s[2]
280 sqrdmulh v0.4s, v1.4s, v22.s[2]
282 // CHECK: sqrdmulh v0.4h, v1.4h, v2.h[2] // encoding: [0x20,0xd0,0x62,0x0f]
283 // CHECK: sqrdmulh v0.8h, v1.8h, v2.h[2] // encoding: [0x20,0xd0,0x62,0x4f]
284 // CHECK: sqrdmulh v0.2s, v1.2s, v2.s[2] // encoding: [0x20,0xd8,0x82,0x0f]
285 // CHECK: sqrdmulh v0.2s, v1.2s, v22.s[2] // encoding: [0x20,0xd8,0x96,0x0f]
286 // CHECK: sqrdmulh v0.4s, v1.4s, v2.s[2] // encoding: [0x20,0xd8,0x82,0x4f]
287 // CHECK: sqrdmulh v0.4s, v1.4s, v22.s[2] // encoding: [0x20,0xd8,0x96,0x4f]