1 //===-- ARMInstrNEON.td - NEON support for ARM -------------*- tablegen -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file describes the ARM NEON instruction set.
11 //===----------------------------------------------------------------------===//
14 //===----------------------------------------------------------------------===//
15 // NEON-specific Operands.
16 //===----------------------------------------------------------------------===//
17 def nModImm : Operand<i32> {
18 let PrintMethod = "printNEONModImmOperand";
21 def nImmSplatI8AsmOperand : AsmOperandClass { let Name = "NEONi8splat"; }
22 def nImmSplatI8 : Operand<i32> {
23 let PrintMethod = "printNEONModImmOperand";
24 let ParserMatchClass = nImmSplatI8AsmOperand;
26 def nImmSplatI16AsmOperand : AsmOperandClass { let Name = "NEONi16splat"; }
27 def nImmSplatI16 : Operand<i32> {
28 let PrintMethod = "printNEONModImmOperand";
29 let ParserMatchClass = nImmSplatI16AsmOperand;
31 def nImmSplatI32AsmOperand : AsmOperandClass { let Name = "NEONi32splat"; }
32 def nImmSplatI32 : Operand<i32> {
33 let PrintMethod = "printNEONModImmOperand";
34 let ParserMatchClass = nImmSplatI32AsmOperand;
36 def nImmSplatNotI16AsmOperand : AsmOperandClass { let Name = "NEONi16splatNot"; }
37 def nImmSplatNotI16 : Operand<i32> {
38 let ParserMatchClass = nImmSplatNotI16AsmOperand;
40 def nImmSplatNotI32AsmOperand : AsmOperandClass { let Name = "NEONi32splatNot"; }
41 def nImmSplatNotI32 : Operand<i32> {
42 let ParserMatchClass = nImmSplatNotI32AsmOperand;
44 def nImmVMOVI32AsmOperand : AsmOperandClass { let Name = "NEONi32vmov"; }
45 def nImmVMOVI32 : Operand<i32> {
46 let PrintMethod = "printNEONModImmOperand";
47 let ParserMatchClass = nImmVMOVI32AsmOperand;
50 class nImmVMOVIAsmOperandReplicate<ValueType From, ValueType To>
52 let Name = "NEONi" # To.Size # "vmovi" # From.Size # "Replicate";
53 let PredicateMethod = "isNEONmovReplicate<" # From.Size # ", " # To.Size # ">";
54 let RenderMethod = "addNEONvmovi" # From.Size # "ReplicateOperands";
57 class nImmVINVIAsmOperandReplicate<ValueType From, ValueType To>
59 let Name = "NEONi" # To.Size # "invi" # From.Size # "Replicate";
60 let PredicateMethod = "isNEONinvReplicate<" # From.Size # ", " # To.Size # ">";
61 let RenderMethod = "addNEONinvi" # From.Size # "ReplicateOperands";
64 class nImmVMOVIReplicate<ValueType From, ValueType To> : Operand<i32> {
65 let PrintMethod = "printNEONModImmOperand";
66 let ParserMatchClass = nImmVMOVIAsmOperandReplicate<From, To>;
69 class nImmVINVIReplicate<ValueType From, ValueType To> : Operand<i32> {
70 let PrintMethod = "printNEONModImmOperand";
71 let ParserMatchClass = nImmVINVIAsmOperandReplicate<From, To>;
74 def nImmVMOVI32NegAsmOperand : AsmOperandClass { let Name = "NEONi32vmovNeg"; }
75 def nImmVMOVI32Neg : Operand<i32> {
76 let PrintMethod = "printNEONModImmOperand";
77 let ParserMatchClass = nImmVMOVI32NegAsmOperand;
79 def nImmVMOVF32 : Operand<i32> {
80 let PrintMethod = "printFPImmOperand";
81 let ParserMatchClass = FPImmOperand;
83 def nImmSplatI64AsmOperand : AsmOperandClass { let Name = "NEONi64splat"; }
84 def nImmSplatI64 : Operand<i32> {
85 let PrintMethod = "printNEONModImmOperand";
86 let ParserMatchClass = nImmSplatI64AsmOperand;
89 def VectorIndex8Operand : AsmOperandClass { let Name = "VectorIndex8"; }
90 def VectorIndex16Operand : AsmOperandClass { let Name = "VectorIndex16"; }
91 def VectorIndex32Operand : AsmOperandClass { let Name = "VectorIndex32"; }
92 def VectorIndex64Operand : AsmOperandClass { let Name = "VectorIndex64"; }
93 def VectorIndex8 : Operand<i32>, ImmLeaf<i32, [{
94 return ((uint64_t)Imm) < 8;
96 let ParserMatchClass = VectorIndex8Operand;
97 let PrintMethod = "printVectorIndex";
98 let MIOperandInfo = (ops i32imm);
100 def VectorIndex16 : Operand<i32>, ImmLeaf<i32, [{
101 return ((uint64_t)Imm) < 4;
103 let ParserMatchClass = VectorIndex16Operand;
104 let PrintMethod = "printVectorIndex";
105 let MIOperandInfo = (ops i32imm);
107 def VectorIndex32 : Operand<i32>, ImmLeaf<i32, [{
108 return ((uint64_t)Imm) < 2;
110 let ParserMatchClass = VectorIndex32Operand;
111 let PrintMethod = "printVectorIndex";
112 let MIOperandInfo = (ops i32imm);
114 def VectorIndex64 : Operand<i32>, ImmLeaf<i32, [{
115 return ((uint64_t)Imm) < 1;
117 let ParserMatchClass = VectorIndex64Operand;
118 let PrintMethod = "printVectorIndex";
119 let MIOperandInfo = (ops i32imm);
122 // Register list of one D register.
123 def VecListOneDAsmOperand : AsmOperandClass {
124 let Name = "VecListOneD";
125 let ParserMethod = "parseVectorList";
126 let RenderMethod = "addVecListOperands";
128 def VecListOneD : RegisterOperand<DPR, "printVectorListOne"> {
129 let ParserMatchClass = VecListOneDAsmOperand;
131 // Register list of two sequential D registers.
132 def VecListDPairAsmOperand : AsmOperandClass {
133 let Name = "VecListDPair";
134 let ParserMethod = "parseVectorList";
135 let RenderMethod = "addVecListOperands";
137 def VecListDPair : RegisterOperand<DPair, "printVectorListTwo"> {
138 let ParserMatchClass = VecListDPairAsmOperand;
140 // Register list of three sequential D registers.
141 def VecListThreeDAsmOperand : AsmOperandClass {
142 let Name = "VecListThreeD";
143 let ParserMethod = "parseVectorList";
144 let RenderMethod = "addVecListOperands";
146 def VecListThreeD : RegisterOperand<DPR, "printVectorListThree"> {
147 let ParserMatchClass = VecListThreeDAsmOperand;
149 // Register list of four sequential D registers.
150 def VecListFourDAsmOperand : AsmOperandClass {
151 let Name = "VecListFourD";
152 let ParserMethod = "parseVectorList";
153 let RenderMethod = "addVecListOperands";
155 def VecListFourD : RegisterOperand<DPR, "printVectorListFour"> {
156 let ParserMatchClass = VecListFourDAsmOperand;
158 // Register list of two D registers spaced by 2 (two sequential Q registers).
159 def VecListDPairSpacedAsmOperand : AsmOperandClass {
160 let Name = "VecListDPairSpaced";
161 let ParserMethod = "parseVectorList";
162 let RenderMethod = "addVecListOperands";
164 def VecListDPairSpaced : RegisterOperand<DPair, "printVectorListTwoSpaced"> {
165 let ParserMatchClass = VecListDPairSpacedAsmOperand;
167 // Register list of three D registers spaced by 2 (three Q registers).
168 def VecListThreeQAsmOperand : AsmOperandClass {
169 let Name = "VecListThreeQ";
170 let ParserMethod = "parseVectorList";
171 let RenderMethod = "addVecListOperands";
173 def VecListThreeQ : RegisterOperand<DPR, "printVectorListThreeSpaced"> {
174 let ParserMatchClass = VecListThreeQAsmOperand;
176 // Register list of three D registers spaced by 2 (three Q registers).
177 def VecListFourQAsmOperand : AsmOperandClass {
178 let Name = "VecListFourQ";
179 let ParserMethod = "parseVectorList";
180 let RenderMethod = "addVecListOperands";
182 def VecListFourQ : RegisterOperand<DPR, "printVectorListFourSpaced"> {
183 let ParserMatchClass = VecListFourQAsmOperand;
186 // Register list of one D register, with "all lanes" subscripting.
187 def VecListOneDAllLanesAsmOperand : AsmOperandClass {
188 let Name = "VecListOneDAllLanes";
189 let ParserMethod = "parseVectorList";
190 let RenderMethod = "addVecListOperands";
192 def VecListOneDAllLanes : RegisterOperand<DPR, "printVectorListOneAllLanes"> {
193 let ParserMatchClass = VecListOneDAllLanesAsmOperand;
195 // Register list of two D registers, with "all lanes" subscripting.
196 def VecListDPairAllLanesAsmOperand : AsmOperandClass {
197 let Name = "VecListDPairAllLanes";
198 let ParserMethod = "parseVectorList";
199 let RenderMethod = "addVecListOperands";
201 def VecListDPairAllLanes : RegisterOperand<DPair,
202 "printVectorListTwoAllLanes"> {
203 let ParserMatchClass = VecListDPairAllLanesAsmOperand;
205 // Register list of two D registers spaced by 2 (two sequential Q registers).
206 def VecListDPairSpacedAllLanesAsmOperand : AsmOperandClass {
207 let Name = "VecListDPairSpacedAllLanes";
208 let ParserMethod = "parseVectorList";
209 let RenderMethod = "addVecListOperands";
211 def VecListDPairSpacedAllLanes : RegisterOperand<DPairSpc,
212 "printVectorListTwoSpacedAllLanes"> {
213 let ParserMatchClass = VecListDPairSpacedAllLanesAsmOperand;
215 // Register list of three D registers, with "all lanes" subscripting.
216 def VecListThreeDAllLanesAsmOperand : AsmOperandClass {
217 let Name = "VecListThreeDAllLanes";
218 let ParserMethod = "parseVectorList";
219 let RenderMethod = "addVecListOperands";
221 def VecListThreeDAllLanes : RegisterOperand<DPR,
222 "printVectorListThreeAllLanes"> {
223 let ParserMatchClass = VecListThreeDAllLanesAsmOperand;
225 // Register list of three D registers spaced by 2 (three sequential Q regs).
226 def VecListThreeQAllLanesAsmOperand : AsmOperandClass {
227 let Name = "VecListThreeQAllLanes";
228 let ParserMethod = "parseVectorList";
229 let RenderMethod = "addVecListOperands";
231 def VecListThreeQAllLanes : RegisterOperand<DPR,
232 "printVectorListThreeSpacedAllLanes"> {
233 let ParserMatchClass = VecListThreeQAllLanesAsmOperand;
235 // Register list of four D registers, with "all lanes" subscripting.
236 def VecListFourDAllLanesAsmOperand : AsmOperandClass {
237 let Name = "VecListFourDAllLanes";
238 let ParserMethod = "parseVectorList";
239 let RenderMethod = "addVecListOperands";
241 def VecListFourDAllLanes : RegisterOperand<DPR, "printVectorListFourAllLanes"> {
242 let ParserMatchClass = VecListFourDAllLanesAsmOperand;
244 // Register list of four D registers spaced by 2 (four sequential Q regs).
245 def VecListFourQAllLanesAsmOperand : AsmOperandClass {
246 let Name = "VecListFourQAllLanes";
247 let ParserMethod = "parseVectorList";
248 let RenderMethod = "addVecListOperands";
250 def VecListFourQAllLanes : RegisterOperand<DPR,
251 "printVectorListFourSpacedAllLanes"> {
252 let ParserMatchClass = VecListFourQAllLanesAsmOperand;
256 // Register list of one D register, with byte lane subscripting.
257 def VecListOneDByteIndexAsmOperand : AsmOperandClass {
258 let Name = "VecListOneDByteIndexed";
259 let ParserMethod = "parseVectorList";
260 let RenderMethod = "addVecListIndexedOperands";
262 def VecListOneDByteIndexed : Operand<i32> {
263 let ParserMatchClass = VecListOneDByteIndexAsmOperand;
264 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
266 // ...with half-word lane subscripting.
267 def VecListOneDHWordIndexAsmOperand : AsmOperandClass {
268 let Name = "VecListOneDHWordIndexed";
269 let ParserMethod = "parseVectorList";
270 let RenderMethod = "addVecListIndexedOperands";
272 def VecListOneDHWordIndexed : Operand<i32> {
273 let ParserMatchClass = VecListOneDHWordIndexAsmOperand;
274 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
276 // ...with word lane subscripting.
277 def VecListOneDWordIndexAsmOperand : AsmOperandClass {
278 let Name = "VecListOneDWordIndexed";
279 let ParserMethod = "parseVectorList";
280 let RenderMethod = "addVecListIndexedOperands";
282 def VecListOneDWordIndexed : Operand<i32> {
283 let ParserMatchClass = VecListOneDWordIndexAsmOperand;
284 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
287 // Register list of two D registers with byte lane subscripting.
288 def VecListTwoDByteIndexAsmOperand : AsmOperandClass {
289 let Name = "VecListTwoDByteIndexed";
290 let ParserMethod = "parseVectorList";
291 let RenderMethod = "addVecListIndexedOperands";
293 def VecListTwoDByteIndexed : Operand<i32> {
294 let ParserMatchClass = VecListTwoDByteIndexAsmOperand;
295 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
297 // ...with half-word lane subscripting.
298 def VecListTwoDHWordIndexAsmOperand : AsmOperandClass {
299 let Name = "VecListTwoDHWordIndexed";
300 let ParserMethod = "parseVectorList";
301 let RenderMethod = "addVecListIndexedOperands";
303 def VecListTwoDHWordIndexed : Operand<i32> {
304 let ParserMatchClass = VecListTwoDHWordIndexAsmOperand;
305 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
307 // ...with word lane subscripting.
308 def VecListTwoDWordIndexAsmOperand : AsmOperandClass {
309 let Name = "VecListTwoDWordIndexed";
310 let ParserMethod = "parseVectorList";
311 let RenderMethod = "addVecListIndexedOperands";
313 def VecListTwoDWordIndexed : Operand<i32> {
314 let ParserMatchClass = VecListTwoDWordIndexAsmOperand;
315 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
317 // Register list of two Q registers with half-word lane subscripting.
318 def VecListTwoQHWordIndexAsmOperand : AsmOperandClass {
319 let Name = "VecListTwoQHWordIndexed";
320 let ParserMethod = "parseVectorList";
321 let RenderMethod = "addVecListIndexedOperands";
323 def VecListTwoQHWordIndexed : Operand<i32> {
324 let ParserMatchClass = VecListTwoQHWordIndexAsmOperand;
325 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
327 // ...with word lane subscripting.
328 def VecListTwoQWordIndexAsmOperand : AsmOperandClass {
329 let Name = "VecListTwoQWordIndexed";
330 let ParserMethod = "parseVectorList";
331 let RenderMethod = "addVecListIndexedOperands";
333 def VecListTwoQWordIndexed : Operand<i32> {
334 let ParserMatchClass = VecListTwoQWordIndexAsmOperand;
335 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
339 // Register list of three D registers with byte lane subscripting.
340 def VecListThreeDByteIndexAsmOperand : AsmOperandClass {
341 let Name = "VecListThreeDByteIndexed";
342 let ParserMethod = "parseVectorList";
343 let RenderMethod = "addVecListIndexedOperands";
345 def VecListThreeDByteIndexed : Operand<i32> {
346 let ParserMatchClass = VecListThreeDByteIndexAsmOperand;
347 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
349 // ...with half-word lane subscripting.
350 def VecListThreeDHWordIndexAsmOperand : AsmOperandClass {
351 let Name = "VecListThreeDHWordIndexed";
352 let ParserMethod = "parseVectorList";
353 let RenderMethod = "addVecListIndexedOperands";
355 def VecListThreeDHWordIndexed : Operand<i32> {
356 let ParserMatchClass = VecListThreeDHWordIndexAsmOperand;
357 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
359 // ...with word lane subscripting.
360 def VecListThreeDWordIndexAsmOperand : AsmOperandClass {
361 let Name = "VecListThreeDWordIndexed";
362 let ParserMethod = "parseVectorList";
363 let RenderMethod = "addVecListIndexedOperands";
365 def VecListThreeDWordIndexed : Operand<i32> {
366 let ParserMatchClass = VecListThreeDWordIndexAsmOperand;
367 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
369 // Register list of three Q registers with half-word lane subscripting.
370 def VecListThreeQHWordIndexAsmOperand : AsmOperandClass {
371 let Name = "VecListThreeQHWordIndexed";
372 let ParserMethod = "parseVectorList";
373 let RenderMethod = "addVecListIndexedOperands";
375 def VecListThreeQHWordIndexed : Operand<i32> {
376 let ParserMatchClass = VecListThreeQHWordIndexAsmOperand;
377 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
379 // ...with word lane subscripting.
380 def VecListThreeQWordIndexAsmOperand : AsmOperandClass {
381 let Name = "VecListThreeQWordIndexed";
382 let ParserMethod = "parseVectorList";
383 let RenderMethod = "addVecListIndexedOperands";
385 def VecListThreeQWordIndexed : Operand<i32> {
386 let ParserMatchClass = VecListThreeQWordIndexAsmOperand;
387 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
390 // Register list of four D registers with byte lane subscripting.
391 def VecListFourDByteIndexAsmOperand : AsmOperandClass {
392 let Name = "VecListFourDByteIndexed";
393 let ParserMethod = "parseVectorList";
394 let RenderMethod = "addVecListIndexedOperands";
396 def VecListFourDByteIndexed : Operand<i32> {
397 let ParserMatchClass = VecListFourDByteIndexAsmOperand;
398 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
400 // ...with half-word lane subscripting.
401 def VecListFourDHWordIndexAsmOperand : AsmOperandClass {
402 let Name = "VecListFourDHWordIndexed";
403 let ParserMethod = "parseVectorList";
404 let RenderMethod = "addVecListIndexedOperands";
406 def VecListFourDHWordIndexed : Operand<i32> {
407 let ParserMatchClass = VecListFourDHWordIndexAsmOperand;
408 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
410 // ...with word lane subscripting.
411 def VecListFourDWordIndexAsmOperand : AsmOperandClass {
412 let Name = "VecListFourDWordIndexed";
413 let ParserMethod = "parseVectorList";
414 let RenderMethod = "addVecListIndexedOperands";
416 def VecListFourDWordIndexed : Operand<i32> {
417 let ParserMatchClass = VecListFourDWordIndexAsmOperand;
418 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
420 // Register list of four Q registers with half-word lane subscripting.
421 def VecListFourQHWordIndexAsmOperand : AsmOperandClass {
422 let Name = "VecListFourQHWordIndexed";
423 let ParserMethod = "parseVectorList";
424 let RenderMethod = "addVecListIndexedOperands";
426 def VecListFourQHWordIndexed : Operand<i32> {
427 let ParserMatchClass = VecListFourQHWordIndexAsmOperand;
428 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
430 // ...with word lane subscripting.
431 def VecListFourQWordIndexAsmOperand : AsmOperandClass {
432 let Name = "VecListFourQWordIndexed";
433 let ParserMethod = "parseVectorList";
434 let RenderMethod = "addVecListIndexedOperands";
436 def VecListFourQWordIndexed : Operand<i32> {
437 let ParserMatchClass = VecListFourQWordIndexAsmOperand;
438 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
441 def dword_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
442 return cast<LoadSDNode>(N)->getAlignment() >= 8;
444 def dword_alignedstore : PatFrag<(ops node:$val, node:$ptr),
445 (store node:$val, node:$ptr), [{
446 return cast<StoreSDNode>(N)->getAlignment() >= 8;
448 def word_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
449 return cast<LoadSDNode>(N)->getAlignment() == 4;
451 def word_alignedstore : PatFrag<(ops node:$val, node:$ptr),
452 (store node:$val, node:$ptr), [{
453 return cast<StoreSDNode>(N)->getAlignment() == 4;
455 def hword_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
456 return cast<LoadSDNode>(N)->getAlignment() == 2;
458 def hword_alignedstore : PatFrag<(ops node:$val, node:$ptr),
459 (store node:$val, node:$ptr), [{
460 return cast<StoreSDNode>(N)->getAlignment() == 2;
462 def byte_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
463 return cast<LoadSDNode>(N)->getAlignment() == 1;
465 def byte_alignedstore : PatFrag<(ops node:$val, node:$ptr),
466 (store node:$val, node:$ptr), [{
467 return cast<StoreSDNode>(N)->getAlignment() == 1;
469 def non_word_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
470 return cast<LoadSDNode>(N)->getAlignment() < 4;
472 def non_word_alignedstore : PatFrag<(ops node:$val, node:$ptr),
473 (store node:$val, node:$ptr), [{
474 return cast<StoreSDNode>(N)->getAlignment() < 4;
477 //===----------------------------------------------------------------------===//
478 // NEON-specific DAG Nodes.
479 //===----------------------------------------------------------------------===//
481 def SDTARMVCMP : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<1, 2>]>;
482 def SDTARMVCMPZ : SDTypeProfile<1, 1, []>;
484 def NEONvceq : SDNode<"ARMISD::VCEQ", SDTARMVCMP>;
485 def NEONvceqz : SDNode<"ARMISD::VCEQZ", SDTARMVCMPZ>;
486 def NEONvcge : SDNode<"ARMISD::VCGE", SDTARMVCMP>;
487 def NEONvcgez : SDNode<"ARMISD::VCGEZ", SDTARMVCMPZ>;
488 def NEONvclez : SDNode<"ARMISD::VCLEZ", SDTARMVCMPZ>;
489 def NEONvcgeu : SDNode<"ARMISD::VCGEU", SDTARMVCMP>;
490 def NEONvcgt : SDNode<"ARMISD::VCGT", SDTARMVCMP>;
491 def NEONvcgtz : SDNode<"ARMISD::VCGTZ", SDTARMVCMPZ>;
492 def NEONvcltz : SDNode<"ARMISD::VCLTZ", SDTARMVCMPZ>;
493 def NEONvcgtu : SDNode<"ARMISD::VCGTU", SDTARMVCMP>;
494 def NEONvtst : SDNode<"ARMISD::VTST", SDTARMVCMP>;
496 // Types for vector shift by immediates. The "SHX" version is for long and
497 // narrow operations where the source and destination vectors have different
498 // types. The "SHINS" version is for shift and insert operations.
499 def SDTARMVSH : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<0, 1>,
501 def SDTARMVSHX : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>,
503 def SDTARMVSHINS : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0, 1>,
504 SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>;
506 def NEONvshl : SDNode<"ARMISD::VSHL", SDTARMVSH>;
507 def NEONvshrs : SDNode<"ARMISD::VSHRs", SDTARMVSH>;
508 def NEONvshru : SDNode<"ARMISD::VSHRu", SDTARMVSH>;
509 def NEONvshrn : SDNode<"ARMISD::VSHRN", SDTARMVSHX>;
511 def NEONvrshrs : SDNode<"ARMISD::VRSHRs", SDTARMVSH>;
512 def NEONvrshru : SDNode<"ARMISD::VRSHRu", SDTARMVSH>;
513 def NEONvrshrn : SDNode<"ARMISD::VRSHRN", SDTARMVSHX>;
515 def NEONvqshls : SDNode<"ARMISD::VQSHLs", SDTARMVSH>;
516 def NEONvqshlu : SDNode<"ARMISD::VQSHLu", SDTARMVSH>;
517 def NEONvqshlsu : SDNode<"ARMISD::VQSHLsu", SDTARMVSH>;
518 def NEONvqshrns : SDNode<"ARMISD::VQSHRNs", SDTARMVSHX>;
519 def NEONvqshrnu : SDNode<"ARMISD::VQSHRNu", SDTARMVSHX>;
520 def NEONvqshrnsu : SDNode<"ARMISD::VQSHRNsu", SDTARMVSHX>;
522 def NEONvqrshrns : SDNode<"ARMISD::VQRSHRNs", SDTARMVSHX>;
523 def NEONvqrshrnu : SDNode<"ARMISD::VQRSHRNu", SDTARMVSHX>;
524 def NEONvqrshrnsu : SDNode<"ARMISD::VQRSHRNsu", SDTARMVSHX>;
526 def NEONvsli : SDNode<"ARMISD::VSLI", SDTARMVSHINS>;
527 def NEONvsri : SDNode<"ARMISD::VSRI", SDTARMVSHINS>;
529 def SDTARMVGETLN : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisInt<1>,
531 def NEONvgetlaneu : SDNode<"ARMISD::VGETLANEu", SDTARMVGETLN>;
532 def NEONvgetlanes : SDNode<"ARMISD::VGETLANEs", SDTARMVGETLN>;
534 def SDTARMVMOVIMM : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVT<1, i32>]>;
535 def NEONvmovImm : SDNode<"ARMISD::VMOVIMM", SDTARMVMOVIMM>;
536 def NEONvmvnImm : SDNode<"ARMISD::VMVNIMM", SDTARMVMOVIMM>;
537 def NEONvmovFPImm : SDNode<"ARMISD::VMOVFPIMM", SDTARMVMOVIMM>;
539 def SDTARMVORRIMM : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
541 def NEONvorrImm : SDNode<"ARMISD::VORRIMM", SDTARMVORRIMM>;
542 def NEONvbicImm : SDNode<"ARMISD::VBICIMM", SDTARMVORRIMM>;
544 def NEONvbsl : SDNode<"ARMISD::VBSL",
545 SDTypeProfile<1, 3, [SDTCisVec<0>,
548 SDTCisSameAs<0, 3>]>>;
550 def NEONvdup : SDNode<"ARMISD::VDUP", SDTypeProfile<1, 1, [SDTCisVec<0>]>>;
552 // VDUPLANE can produce a quad-register result from a double-register source,
553 // so the result is not constrained to match the source.
554 def NEONvduplane : SDNode<"ARMISD::VDUPLANE",
555 SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
558 def SDTARMVEXT : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
559 SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>;
560 def NEONvext : SDNode<"ARMISD::VEXT", SDTARMVEXT>;
562 def SDTARMVSHUF : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0, 1>]>;
563 def NEONvrev64 : SDNode<"ARMISD::VREV64", SDTARMVSHUF>;
564 def NEONvrev32 : SDNode<"ARMISD::VREV32", SDTARMVSHUF>;
565 def NEONvrev16 : SDNode<"ARMISD::VREV16", SDTARMVSHUF>;
567 def SDTARMVSHUF2 : SDTypeProfile<2, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
569 SDTCisSameAs<0, 3>]>;
570 def NEONzip : SDNode<"ARMISD::VZIP", SDTARMVSHUF2>;
571 def NEONuzp : SDNode<"ARMISD::VUZP", SDTARMVSHUF2>;
572 def NEONtrn : SDNode<"ARMISD::VTRN", SDTARMVSHUF2>;
574 def SDTARMVMULL : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>,
575 SDTCisSameAs<1, 2>]>;
576 def NEONvmulls : SDNode<"ARMISD::VMULLs", SDTARMVMULL>;
577 def NEONvmullu : SDNode<"ARMISD::VMULLu", SDTARMVMULL>;
579 def SDTARMVTBL1 : SDTypeProfile<1, 2, [SDTCisVT<0, v8i8>, SDTCisVT<1, v8i8>,
581 def SDTARMVTBL2 : SDTypeProfile<1, 3, [SDTCisVT<0, v8i8>, SDTCisVT<1, v8i8>,
582 SDTCisVT<2, v8i8>, SDTCisVT<3, v8i8>]>;
583 def NEONvtbl1 : SDNode<"ARMISD::VTBL1", SDTARMVTBL1>;
584 def NEONvtbl2 : SDNode<"ARMISD::VTBL2", SDTARMVTBL2>;
587 def NEONimmAllZerosV: PatLeaf<(NEONvmovImm (i32 timm)), [{
588 ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0));
589 unsigned EltBits = 0;
590 uint64_t EltVal = ARM_AM::decodeNEONModImm(ConstVal->getZExtValue(), EltBits);
591 return (EltBits == 32 && EltVal == 0);
594 def NEONimmAllOnesV: PatLeaf<(NEONvmovImm (i32 timm)), [{
595 ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0));
596 unsigned EltBits = 0;
597 uint64_t EltVal = ARM_AM::decodeNEONModImm(ConstVal->getZExtValue(), EltBits);
598 return (EltBits == 8 && EltVal == 0xff);
601 //===----------------------------------------------------------------------===//
602 // NEON load / store instructions
603 //===----------------------------------------------------------------------===//
605 // Use VLDM to load a Q register as a D register pair.
606 // This is a pseudo instruction that is expanded to VLDMD after reg alloc.
608 : PseudoVFPLdStM<(outs DPair:$dst), (ins GPR:$Rn),
610 [(set DPair:$dst, (v2f64 (word_alignedload GPR:$Rn)))]>;
612 // Use VSTM to store a Q register as a D register pair.
613 // This is a pseudo instruction that is expanded to VSTMD after reg alloc.
615 : PseudoVFPLdStM<(outs), (ins DPair:$src, GPR:$Rn),
617 [(word_alignedstore (v2f64 DPair:$src), GPR:$Rn)]>;
619 // Classes for VLD* pseudo-instructions with multi-register operands.
620 // These are expanded to real instructions after register allocation.
621 class VLDQPseudo<InstrItinClass itin>
622 : PseudoNLdSt<(outs QPR:$dst), (ins addrmode6:$addr), itin, "">;
623 class VLDQWBPseudo<InstrItinClass itin>
624 : PseudoNLdSt<(outs QPR:$dst, GPR:$wb),
625 (ins addrmode6:$addr, am6offset:$offset), itin,
627 class VLDQWBfixedPseudo<InstrItinClass itin>
628 : PseudoNLdSt<(outs QPR:$dst, GPR:$wb),
629 (ins addrmode6:$addr), itin,
631 class VLDQWBregisterPseudo<InstrItinClass itin>
632 : PseudoNLdSt<(outs QPR:$dst, GPR:$wb),
633 (ins addrmode6:$addr, rGPR:$offset), itin,
636 class VLDQQPseudo<InstrItinClass itin>
637 : PseudoNLdSt<(outs QQPR:$dst), (ins addrmode6:$addr), itin, "">;
638 class VLDQQWBPseudo<InstrItinClass itin>
639 : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
640 (ins addrmode6:$addr, am6offset:$offset), itin,
642 class VLDQQWBfixedPseudo<InstrItinClass itin>
643 : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
644 (ins addrmode6:$addr), itin,
646 class VLDQQWBregisterPseudo<InstrItinClass itin>
647 : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
648 (ins addrmode6:$addr, rGPR:$offset), itin,
652 class VLDQQQQPseudo<InstrItinClass itin>
653 : PseudoNLdSt<(outs QQQQPR:$dst), (ins addrmode6:$addr, QQQQPR:$src),itin,
655 class VLDQQQQWBPseudo<InstrItinClass itin>
656 : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb),
657 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin,
658 "$addr.addr = $wb, $src = $dst">;
660 let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in {
662 // VLD1 : Vector Load (multiple single elements)
663 class VLD1D<bits<4> op7_4, string Dt, Operand AddrMode>
664 : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd),
665 (ins AddrMode:$Rn), IIC_VLD1,
666 "vld1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVLD1]> {
669 let DecoderMethod = "DecodeVLDST1Instruction";
671 class VLD1Q<bits<4> op7_4, string Dt, Operand AddrMode>
672 : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd),
673 (ins AddrMode:$Rn), IIC_VLD1x2,
674 "vld1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVLD2]> {
676 let Inst{5-4} = Rn{5-4};
677 let DecoderMethod = "DecodeVLDST1Instruction";
680 def VLD1d8 : VLD1D<{0,0,0,?}, "8", addrmode6align64>;
681 def VLD1d16 : VLD1D<{0,1,0,?}, "16", addrmode6align64>;
682 def VLD1d32 : VLD1D<{1,0,0,?}, "32", addrmode6align64>;
683 def VLD1d64 : VLD1D<{1,1,0,?}, "64", addrmode6align64>;
685 def VLD1q8 : VLD1Q<{0,0,?,?}, "8", addrmode6align64or128>;
686 def VLD1q16 : VLD1Q<{0,1,?,?}, "16", addrmode6align64or128>;
687 def VLD1q32 : VLD1Q<{1,0,?,?}, "32", addrmode6align64or128>;
688 def VLD1q64 : VLD1Q<{1,1,?,?}, "64", addrmode6align64or128>;
690 // ...with address register writeback:
691 multiclass VLD1DWB<bits<4> op7_4, string Dt, Operand AddrMode> {
692 def _fixed : NLdSt<0,0b10, 0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb),
693 (ins AddrMode:$Rn), IIC_VLD1u,
694 "vld1", Dt, "$Vd, $Rn!",
695 "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> {
696 let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
698 let DecoderMethod = "DecodeVLDST1Instruction";
700 def _register : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb),
701 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1u,
702 "vld1", Dt, "$Vd, $Rn, $Rm",
703 "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> {
705 let DecoderMethod = "DecodeVLDST1Instruction";
708 multiclass VLD1QWB<bits<4> op7_4, string Dt, Operand AddrMode> {
709 def _fixed : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb),
710 (ins AddrMode:$Rn), IIC_VLD1x2u,
711 "vld1", Dt, "$Vd, $Rn!",
712 "$Rn.addr = $wb", []>, Sched<[WriteVLD2]> {
713 let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
714 let Inst{5-4} = Rn{5-4};
715 let DecoderMethod = "DecodeVLDST1Instruction";
717 def _register : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb),
718 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u,
719 "vld1", Dt, "$Vd, $Rn, $Rm",
720 "$Rn.addr = $wb", []>, Sched<[WriteVLD2]> {
721 let Inst{5-4} = Rn{5-4};
722 let DecoderMethod = "DecodeVLDST1Instruction";
726 defm VLD1d8wb : VLD1DWB<{0,0,0,?}, "8", addrmode6align64>;
727 defm VLD1d16wb : VLD1DWB<{0,1,0,?}, "16", addrmode6align64>;
728 defm VLD1d32wb : VLD1DWB<{1,0,0,?}, "32", addrmode6align64>;
729 defm VLD1d64wb : VLD1DWB<{1,1,0,?}, "64", addrmode6align64>;
730 defm VLD1q8wb : VLD1QWB<{0,0,?,?}, "8", addrmode6align64or128>;
731 defm VLD1q16wb : VLD1QWB<{0,1,?,?}, "16", addrmode6align64or128>;
732 defm VLD1q32wb : VLD1QWB<{1,0,?,?}, "32", addrmode6align64or128>;
733 defm VLD1q64wb : VLD1QWB<{1,1,?,?}, "64", addrmode6align64or128>;
735 // ...with 3 registers
736 class VLD1D3<bits<4> op7_4, string Dt, Operand AddrMode>
737 : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd),
738 (ins AddrMode:$Rn), IIC_VLD1x3, "vld1", Dt,
739 "$Vd, $Rn", "", []>, Sched<[WriteVLD3]> {
742 let DecoderMethod = "DecodeVLDST1Instruction";
744 multiclass VLD1D3WB<bits<4> op7_4, string Dt, Operand AddrMode> {
745 def _fixed : NLdSt<0,0b10,0b0110, op7_4, (outs VecListThreeD:$Vd, GPR:$wb),
746 (ins AddrMode:$Rn), IIC_VLD1x2u,
747 "vld1", Dt, "$Vd, $Rn!",
748 "$Rn.addr = $wb", []>, Sched<[WriteVLD3]> {
749 let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
751 let DecoderMethod = "DecodeVLDST1Instruction";
753 def _register : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd, GPR:$wb),
754 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u,
755 "vld1", Dt, "$Vd, $Rn, $Rm",
756 "$Rn.addr = $wb", []>, Sched<[WriteVLD3]> {
758 let DecoderMethod = "DecodeVLDST1Instruction";
762 def VLD1d8T : VLD1D3<{0,0,0,?}, "8", addrmode6align64>;
763 def VLD1d16T : VLD1D3<{0,1,0,?}, "16", addrmode6align64>;
764 def VLD1d32T : VLD1D3<{1,0,0,?}, "32", addrmode6align64>;
765 def VLD1d64T : VLD1D3<{1,1,0,?}, "64", addrmode6align64>;
767 defm VLD1d8Twb : VLD1D3WB<{0,0,0,?}, "8", addrmode6align64>;
768 defm VLD1d16Twb : VLD1D3WB<{0,1,0,?}, "16", addrmode6align64>;
769 defm VLD1d32Twb : VLD1D3WB<{1,0,0,?}, "32", addrmode6align64>;
770 defm VLD1d64Twb : VLD1D3WB<{1,1,0,?}, "64", addrmode6align64>;
772 def VLD1d8TPseudo : VLDQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
773 def VLD1d16TPseudo : VLDQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
774 def VLD1d32TPseudo : VLDQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
775 def VLD1d64TPseudo : VLDQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
776 def VLD1d64TPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
777 def VLD1d64TPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
779 def VLD1q8HighTPseudo : VLDQQQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
780 def VLD1q8LowTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
781 def VLD1q16HighTPseudo : VLDQQQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
782 def VLD1q16LowTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
783 def VLD1q32HighTPseudo : VLDQQQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
784 def VLD1q32LowTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
785 def VLD1q64HighTPseudo : VLDQQQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
786 def VLD1q64LowTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
788 // ...with 4 registers
789 class VLD1D4<bits<4> op7_4, string Dt, Operand AddrMode>
790 : NLdSt<0, 0b10, 0b0010, op7_4, (outs VecListFourD:$Vd),
791 (ins AddrMode:$Rn), IIC_VLD1x4, "vld1", Dt,
792 "$Vd, $Rn", "", []>, Sched<[WriteVLD4]> {
794 let Inst{5-4} = Rn{5-4};
795 let DecoderMethod = "DecodeVLDST1Instruction";
797 multiclass VLD1D4WB<bits<4> op7_4, string Dt, Operand AddrMode> {
798 def _fixed : NLdSt<0,0b10,0b0010, op7_4, (outs VecListFourD:$Vd, GPR:$wb),
799 (ins AddrMode:$Rn), IIC_VLD1x2u,
800 "vld1", Dt, "$Vd, $Rn!",
801 "$Rn.addr = $wb", []>, Sched<[WriteVLD4]> {
802 let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
803 let Inst{5-4} = Rn{5-4};
804 let DecoderMethod = "DecodeVLDST1Instruction";
806 def _register : NLdSt<0,0b10,0b0010,op7_4, (outs VecListFourD:$Vd, GPR:$wb),
807 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u,
808 "vld1", Dt, "$Vd, $Rn, $Rm",
809 "$Rn.addr = $wb", []>, Sched<[WriteVLD4]> {
810 let Inst{5-4} = Rn{5-4};
811 let DecoderMethod = "DecodeVLDST1Instruction";
815 def VLD1d8Q : VLD1D4<{0,0,?,?}, "8", addrmode6align64or128or256>;
816 def VLD1d16Q : VLD1D4<{0,1,?,?}, "16", addrmode6align64or128or256>;
817 def VLD1d32Q : VLD1D4<{1,0,?,?}, "32", addrmode6align64or128or256>;
818 def VLD1d64Q : VLD1D4<{1,1,?,?}, "64", addrmode6align64or128or256>;
820 defm VLD1d8Qwb : VLD1D4WB<{0,0,?,?}, "8", addrmode6align64or128or256>;
821 defm VLD1d16Qwb : VLD1D4WB<{0,1,?,?}, "16", addrmode6align64or128or256>;
822 defm VLD1d32Qwb : VLD1D4WB<{1,0,?,?}, "32", addrmode6align64or128or256>;
823 defm VLD1d64Qwb : VLD1D4WB<{1,1,?,?}, "64", addrmode6align64or128or256>;
825 def VLD1d8QPseudo : VLDQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
826 def VLD1d16QPseudo : VLDQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
827 def VLD1d32QPseudo : VLDQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
828 def VLD1d64QPseudo : VLDQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
829 def VLD1d64QPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
830 def VLD1d64QPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
832 def VLD1q8LowQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
833 def VLD1q8HighQPseudo : VLDQQQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
834 def VLD1q16LowQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
835 def VLD1q16HighQPseudo : VLDQQQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
836 def VLD1q32LowQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
837 def VLD1q32HighQPseudo : VLDQQQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
838 def VLD1q64LowQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
839 def VLD1q64HighQPseudo : VLDQQQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
841 // VLD2 : Vector Load (multiple 2-element structures)
842 class VLD2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy,
843 InstrItinClass itin, Operand AddrMode>
844 : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd),
845 (ins AddrMode:$Rn), itin,
846 "vld2", Dt, "$Vd, $Rn", "", []> {
848 let Inst{5-4} = Rn{5-4};
849 let DecoderMethod = "DecodeVLDST2Instruction";
852 def VLD2d8 : VLD2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2,
853 addrmode6align64or128>, Sched<[WriteVLD2]>;
854 def VLD2d16 : VLD2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2,
855 addrmode6align64or128>, Sched<[WriteVLD2]>;
856 def VLD2d32 : VLD2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2,
857 addrmode6align64or128>, Sched<[WriteVLD2]>;
859 def VLD2q8 : VLD2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2,
860 addrmode6align64or128or256>, Sched<[WriteVLD4]>;
861 def VLD2q16 : VLD2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2,
862 addrmode6align64or128or256>, Sched<[WriteVLD4]>;
863 def VLD2q32 : VLD2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2,
864 addrmode6align64or128or256>, Sched<[WriteVLD4]>;
866 def VLD2q8Pseudo : VLDQQPseudo<IIC_VLD2x2>, Sched<[WriteVLD4]>;
867 def VLD2q16Pseudo : VLDQQPseudo<IIC_VLD2x2>, Sched<[WriteVLD4]>;
868 def VLD2q32Pseudo : VLDQQPseudo<IIC_VLD2x2>, Sched<[WriteVLD4]>;
870 // ...with address register writeback:
871 multiclass VLD2WB<bits<4> op11_8, bits<4> op7_4, string Dt,
872 RegisterOperand VdTy, InstrItinClass itin, Operand AddrMode> {
873 def _fixed : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb),
874 (ins AddrMode:$Rn), itin,
875 "vld2", Dt, "$Vd, $Rn!",
876 "$Rn.addr = $wb", []> {
877 let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
878 let Inst{5-4} = Rn{5-4};
879 let DecoderMethod = "DecodeVLDST2Instruction";
881 def _register : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb),
882 (ins AddrMode:$Rn, rGPR:$Rm), itin,
883 "vld2", Dt, "$Vd, $Rn, $Rm",
884 "$Rn.addr = $wb", []> {
885 let Inst{5-4} = Rn{5-4};
886 let DecoderMethod = "DecodeVLDST2Instruction";
890 defm VLD2d8wb : VLD2WB<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2u,
891 addrmode6align64or128>, Sched<[WriteVLD2]>;
892 defm VLD2d16wb : VLD2WB<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2u,
893 addrmode6align64or128>, Sched<[WriteVLD2]>;
894 defm VLD2d32wb : VLD2WB<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2u,
895 addrmode6align64or128>, Sched<[WriteVLD2]>;
897 defm VLD2q8wb : VLD2WB<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2u,
898 addrmode6align64or128or256>, Sched<[WriteVLD4]>;
899 defm VLD2q16wb : VLD2WB<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2u,
900 addrmode6align64or128or256>, Sched<[WriteVLD4]>;
901 defm VLD2q32wb : VLD2WB<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2u,
902 addrmode6align64or128or256>, Sched<[WriteVLD4]>;
904 def VLD2q8PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>;
905 def VLD2q16PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>;
906 def VLD2q32PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>;
907 def VLD2q8PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>;
908 def VLD2q16PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>;
909 def VLD2q32PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>;
911 // ...with double-spaced registers
912 def VLD2b8 : VLD2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2,
913 addrmode6align64or128>, Sched<[WriteVLD2]>;
914 def VLD2b16 : VLD2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2,
915 addrmode6align64or128>, Sched<[WriteVLD2]>;
916 def VLD2b32 : VLD2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2,
917 addrmode6align64or128>, Sched<[WriteVLD2]>;
918 defm VLD2b8wb : VLD2WB<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2u,
919 addrmode6align64or128>, Sched<[WriteVLD2]>;
920 defm VLD2b16wb : VLD2WB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2u,
921 addrmode6align64or128>, Sched<[WriteVLD2]>;
922 defm VLD2b32wb : VLD2WB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2u,
923 addrmode6align64or128>, Sched<[WriteVLD2]>;
925 // VLD3 : Vector Load (multiple 3-element structures)
926 class VLD3D<bits<4> op11_8, bits<4> op7_4, string Dt>
927 : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3),
928 (ins addrmode6:$Rn), IIC_VLD3,
929 "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn", "", []>, Sched<[WriteVLD3]> {
932 let DecoderMethod = "DecodeVLDST3Instruction";
935 def VLD3d8 : VLD3D<0b0100, {0,0,0,?}, "8">;
936 def VLD3d16 : VLD3D<0b0100, {0,1,0,?}, "16">;
937 def VLD3d32 : VLD3D<0b0100, {1,0,0,?}, "32">;
939 def VLD3d8Pseudo : VLDQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>;
940 def VLD3d16Pseudo : VLDQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>;
941 def VLD3d32Pseudo : VLDQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>;
943 // ...with address register writeback:
944 class VLD3DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
945 : NLdSt<0, 0b10, op11_8, op7_4,
946 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb),
947 (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD3u,
948 "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn$Rm",
949 "$Rn.addr = $wb", []>, Sched<[WriteVLD3]> {
951 let DecoderMethod = "DecodeVLDST3Instruction";
954 def VLD3d8_UPD : VLD3DWB<0b0100, {0,0,0,?}, "8">;
955 def VLD3d16_UPD : VLD3DWB<0b0100, {0,1,0,?}, "16">;
956 def VLD3d32_UPD : VLD3DWB<0b0100, {1,0,0,?}, "32">;
958 def VLD3d8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
959 def VLD3d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
960 def VLD3d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
962 // ...with double-spaced registers:
963 def VLD3q8 : VLD3D<0b0101, {0,0,0,?}, "8">;
964 def VLD3q16 : VLD3D<0b0101, {0,1,0,?}, "16">;
965 def VLD3q32 : VLD3D<0b0101, {1,0,0,?}, "32">;
966 def VLD3q8_UPD : VLD3DWB<0b0101, {0,0,0,?}, "8">;
967 def VLD3q16_UPD : VLD3DWB<0b0101, {0,1,0,?}, "16">;
968 def VLD3q32_UPD : VLD3DWB<0b0101, {1,0,0,?}, "32">;
970 def VLD3q8Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
971 def VLD3q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
972 def VLD3q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
974 // ...alternate versions to be allocated odd register numbers:
975 def VLD3q8oddPseudo : VLDQQQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>;
976 def VLD3q16oddPseudo : VLDQQQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>;
977 def VLD3q32oddPseudo : VLDQQQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>;
979 def VLD3q8oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
980 def VLD3q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
981 def VLD3q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
983 // VLD4 : Vector Load (multiple 4-element structures)
984 class VLD4D<bits<4> op11_8, bits<4> op7_4, string Dt>
985 : NLdSt<0, 0b10, op11_8, op7_4,
986 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4),
987 (ins addrmode6:$Rn), IIC_VLD4,
988 "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []>,
991 let Inst{5-4} = Rn{5-4};
992 let DecoderMethod = "DecodeVLDST4Instruction";
995 def VLD4d8 : VLD4D<0b0000, {0,0,?,?}, "8">;
996 def VLD4d16 : VLD4D<0b0000, {0,1,?,?}, "16">;
997 def VLD4d32 : VLD4D<0b0000, {1,0,?,?}, "32">;
999 def VLD4d8Pseudo : VLDQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>;
1000 def VLD4d16Pseudo : VLDQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>;
1001 def VLD4d32Pseudo : VLDQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>;
1003 // ...with address register writeback:
1004 class VLD4DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
1005 : NLdSt<0, 0b10, op11_8, op7_4,
1006 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
1007 (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD4u,
1008 "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm",
1009 "$Rn.addr = $wb", []>, Sched<[WriteVLD4]> {
1010 let Inst{5-4} = Rn{5-4};
1011 let DecoderMethod = "DecodeVLDST4Instruction";
1014 def VLD4d8_UPD : VLD4DWB<0b0000, {0,0,?,?}, "8">;
1015 def VLD4d16_UPD : VLD4DWB<0b0000, {0,1,?,?}, "16">;
1016 def VLD4d32_UPD : VLD4DWB<0b0000, {1,0,?,?}, "32">;
1018 def VLD4d8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
1019 def VLD4d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
1020 def VLD4d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
1022 // ...with double-spaced registers:
1023 def VLD4q8 : VLD4D<0b0001, {0,0,?,?}, "8">;
1024 def VLD4q16 : VLD4D<0b0001, {0,1,?,?}, "16">;
1025 def VLD4q32 : VLD4D<0b0001, {1,0,?,?}, "32">;
1026 def VLD4q8_UPD : VLD4DWB<0b0001, {0,0,?,?}, "8">;
1027 def VLD4q16_UPD : VLD4DWB<0b0001, {0,1,?,?}, "16">;
1028 def VLD4q32_UPD : VLD4DWB<0b0001, {1,0,?,?}, "32">;
1030 def VLD4q8Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
1031 def VLD4q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
1032 def VLD4q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
1034 // ...alternate versions to be allocated odd register numbers:
1035 def VLD4q8oddPseudo : VLDQQQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>;
1036 def VLD4q16oddPseudo : VLDQQQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>;
1037 def VLD4q32oddPseudo : VLDQQQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>;
1039 def VLD4q8oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
1040 def VLD4q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
1041 def VLD4q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
1043 } // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1
1045 // Classes for VLD*LN pseudo-instructions with multi-register operands.
1046 // These are expanded to real instructions after register allocation.
1047 class VLDQLNPseudo<InstrItinClass itin>
1048 : PseudoNLdSt<(outs QPR:$dst),
1049 (ins addrmode6:$addr, QPR:$src, nohash_imm:$lane),
1050 itin, "$src = $dst">;
1051 class VLDQLNWBPseudo<InstrItinClass itin>
1052 : PseudoNLdSt<(outs QPR:$dst, GPR:$wb),
1053 (ins addrmode6:$addr, am6offset:$offset, QPR:$src,
1054 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">;
1055 class VLDQQLNPseudo<InstrItinClass itin>
1056 : PseudoNLdSt<(outs QQPR:$dst),
1057 (ins addrmode6:$addr, QQPR:$src, nohash_imm:$lane),
1058 itin, "$src = $dst">;
1059 class VLDQQLNWBPseudo<InstrItinClass itin>
1060 : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
1061 (ins addrmode6:$addr, am6offset:$offset, QQPR:$src,
1062 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">;
1063 class VLDQQQQLNPseudo<InstrItinClass itin>
1064 : PseudoNLdSt<(outs QQQQPR:$dst),
1065 (ins addrmode6:$addr, QQQQPR:$src, nohash_imm:$lane),
1066 itin, "$src = $dst">;
1067 class VLDQQQQLNWBPseudo<InstrItinClass itin>
1068 : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb),
1069 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src,
1070 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">;
1072 // VLD1LN : Vector Load (single element to one lane)
1073 class VLD1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
1075 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd),
1076 (ins addrmode6:$Rn, DPR:$src, nohash_imm:$lane),
1077 IIC_VLD1ln, "vld1", Dt, "\\{$Vd[$lane]\\}, $Rn",
1079 [(set DPR:$Vd, (vector_insert (Ty DPR:$src),
1080 (i32 (LoadOp addrmode6:$Rn)),
1083 let DecoderMethod = "DecodeVLD1LN";
1085 class VLD1LN32<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
1087 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd),
1088 (ins addrmode6oneL32:$Rn, DPR:$src, nohash_imm:$lane),
1089 IIC_VLD1ln, "vld1", Dt, "\\{$Vd[$lane]\\}, $Rn",
1091 [(set DPR:$Vd, (vector_insert (Ty DPR:$src),
1092 (i32 (LoadOp addrmode6oneL32:$Rn)),
1093 imm:$lane))]>, Sched<[WriteVLD1]> {
1095 let DecoderMethod = "DecodeVLD1LN";
1097 class VLD1QLNPseudo<ValueType Ty, PatFrag LoadOp> : VLDQLNPseudo<IIC_VLD1ln>,
1098 Sched<[WriteVLD1]> {
1099 let Pattern = [(set QPR:$dst, (vector_insert (Ty QPR:$src),
1100 (i32 (LoadOp addrmode6:$addr)),
1104 def VLD1LNd8 : VLD1LN<0b0000, {?,?,?,0}, "8", v8i8, extloadi8> {
1105 let Inst{7-5} = lane{2-0};
1107 def VLD1LNd16 : VLD1LN<0b0100, {?,?,0,?}, "16", v4i16, extloadi16> {
1108 let Inst{7-6} = lane{1-0};
1109 let Inst{5-4} = Rn{5-4};
1111 def VLD1LNd32 : VLD1LN32<0b1000, {?,0,?,?}, "32", v2i32, load> {
1112 let Inst{7} = lane{0};
1113 let Inst{5-4} = Rn{5-4};
1116 def VLD1LNq8Pseudo : VLD1QLNPseudo<v16i8, extloadi8>;
1117 def VLD1LNq16Pseudo : VLD1QLNPseudo<v8i16, extloadi16>;
1118 def VLD1LNq32Pseudo : VLD1QLNPseudo<v4i32, load>;
1120 def : Pat<(vector_insert (v2f32 DPR:$src),
1121 (f32 (load addrmode6:$addr)), imm:$lane),
1122 (VLD1LNd32 addrmode6:$addr, DPR:$src, imm:$lane)>;
1123 def : Pat<(vector_insert (v4f32 QPR:$src),
1124 (f32 (load addrmode6:$addr)), imm:$lane),
1125 (VLD1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>;
1127 // A 64-bit subvector insert to the first 128-bit vector position
1128 // is a subregister copy that needs no instruction.
1129 def : Pat<(insert_subvector undef, (v1i64 DPR:$src), (i32 0)),
1130 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), DPR:$src, dsub_0)>;
1131 def : Pat<(insert_subvector undef, (v2i32 DPR:$src), (i32 0)),
1132 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), DPR:$src, dsub_0)>;
1133 def : Pat<(insert_subvector undef, (v2f32 DPR:$src), (i32 0)),
1134 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), DPR:$src, dsub_0)>;
1135 def : Pat<(insert_subvector undef, (v4i16 DPR:$src), (i32 0)),
1136 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), DPR:$src, dsub_0)>;
1137 def : Pat<(insert_subvector undef, (v4f16 DPR:$src), (i32 0)),
1138 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), DPR:$src, dsub_0)>;
1139 def : Pat<(insert_subvector (v16i8 undef), (v8i8 DPR:$src), (i32 0)),
1140 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), DPR:$src, dsub_0)>;
1143 let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in {
1145 // ...with address register writeback:
1146 class VLD1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
1147 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, GPR:$wb),
1148 (ins addrmode6:$Rn, am6offset:$Rm,
1149 DPR:$src, nohash_imm:$lane), IIC_VLD1lnu, "vld1", Dt,
1150 "\\{$Vd[$lane]\\}, $Rn$Rm",
1151 "$src = $Vd, $Rn.addr = $wb", []>, Sched<[WriteVLD1]> {
1152 let DecoderMethod = "DecodeVLD1LN";
1155 def VLD1LNd8_UPD : VLD1LNWB<0b0000, {?,?,?,0}, "8"> {
1156 let Inst{7-5} = lane{2-0};
1158 def VLD1LNd16_UPD : VLD1LNWB<0b0100, {?,?,0,?}, "16"> {
1159 let Inst{7-6} = lane{1-0};
1160 let Inst{4} = Rn{4};
1162 def VLD1LNd32_UPD : VLD1LNWB<0b1000, {?,0,?,?}, "32"> {
1163 let Inst{7} = lane{0};
1164 let Inst{5} = Rn{4};
1165 let Inst{4} = Rn{4};
1168 def VLD1LNq8Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>, Sched<[WriteVLD1]>;
1169 def VLD1LNq16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>, Sched<[WriteVLD1]>;
1170 def VLD1LNq32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>, Sched<[WriteVLD1]>;
1172 // VLD2LN : Vector Load (single 2-element structure to one lane)
1173 class VLD2LN<bits<4> op11_8, bits<4> op7_4, string Dt>
1174 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2),
1175 (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, nohash_imm:$lane),
1176 IIC_VLD2ln, "vld2", Dt, "\\{$Vd[$lane], $dst2[$lane]\\}, $Rn",
1177 "$src1 = $Vd, $src2 = $dst2", []>, Sched<[WriteVLD1]> {
1179 let Inst{4} = Rn{4};
1180 let DecoderMethod = "DecodeVLD2LN";
1183 def VLD2LNd8 : VLD2LN<0b0001, {?,?,?,?}, "8"> {
1184 let Inst{7-5} = lane{2-0};
1186 def VLD2LNd16 : VLD2LN<0b0101, {?,?,0,?}, "16"> {
1187 let Inst{7-6} = lane{1-0};
1189 def VLD2LNd32 : VLD2LN<0b1001, {?,0,0,?}, "32"> {
1190 let Inst{7} = lane{0};
1193 def VLD2LNd8Pseudo : VLDQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>;
1194 def VLD2LNd16Pseudo : VLDQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>;
1195 def VLD2LNd32Pseudo : VLDQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>;
1197 // ...with double-spaced registers:
1198 def VLD2LNq16 : VLD2LN<0b0101, {?,?,1,?}, "16"> {
1199 let Inst{7-6} = lane{1-0};
1201 def VLD2LNq32 : VLD2LN<0b1001, {?,1,0,?}, "32"> {
1202 let Inst{7} = lane{0};
1205 def VLD2LNq16Pseudo : VLDQQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>;
1206 def VLD2LNq32Pseudo : VLDQQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>;
1208 // ...with address register writeback:
1209 class VLD2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
1210 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb),
1211 (ins addrmode6:$Rn, am6offset:$Rm,
1212 DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VLD2lnu, "vld2", Dt,
1213 "\\{$Vd[$lane], $dst2[$lane]\\}, $Rn$Rm",
1214 "$src1 = $Vd, $src2 = $dst2, $Rn.addr = $wb", []> {
1215 let Inst{4} = Rn{4};
1216 let DecoderMethod = "DecodeVLD2LN";
1219 def VLD2LNd8_UPD : VLD2LNWB<0b0001, {?,?,?,?}, "8"> {
1220 let Inst{7-5} = lane{2-0};
1222 def VLD2LNd16_UPD : VLD2LNWB<0b0101, {?,?,0,?}, "16"> {
1223 let Inst{7-6} = lane{1-0};
1225 def VLD2LNd32_UPD : VLD2LNWB<0b1001, {?,0,0,?}, "32"> {
1226 let Inst{7} = lane{0};
1229 def VLD2LNd8Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>;
1230 def VLD2LNd16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>;
1231 def VLD2LNd32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>;
1233 def VLD2LNq16_UPD : VLD2LNWB<0b0101, {?,?,1,?}, "16"> {
1234 let Inst{7-6} = lane{1-0};
1236 def VLD2LNq32_UPD : VLD2LNWB<0b1001, {?,1,0,?}, "32"> {
1237 let Inst{7} = lane{0};
1240 def VLD2LNq16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>;
1241 def VLD2LNq32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>;
1243 // VLD3LN : Vector Load (single 3-element structure to one lane)
1244 class VLD3LN<bits<4> op11_8, bits<4> op7_4, string Dt>
1245 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3),
1246 (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3,
1247 nohash_imm:$lane), IIC_VLD3ln, "vld3", Dt,
1248 "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn",
1249 "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3", []>, Sched<[WriteVLD2]> {
1251 let DecoderMethod = "DecodeVLD3LN";
1254 def VLD3LNd8 : VLD3LN<0b0010, {?,?,?,0}, "8"> {
1255 let Inst{7-5} = lane{2-0};
1257 def VLD3LNd16 : VLD3LN<0b0110, {?,?,0,0}, "16"> {
1258 let Inst{7-6} = lane{1-0};
1260 def VLD3LNd32 : VLD3LN<0b1010, {?,0,0,0}, "32"> {
1261 let Inst{7} = lane{0};
1264 def VLD3LNd8Pseudo : VLDQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>;
1265 def VLD3LNd16Pseudo : VLDQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>;
1266 def VLD3LNd32Pseudo : VLDQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>;
1268 // ...with double-spaced registers:
1269 def VLD3LNq16 : VLD3LN<0b0110, {?,?,1,0}, "16"> {
1270 let Inst{7-6} = lane{1-0};
1272 def VLD3LNq32 : VLD3LN<0b1010, {?,1,0,0}, "32"> {
1273 let Inst{7} = lane{0};
1276 def VLD3LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>;
1277 def VLD3LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>;
1279 // ...with address register writeback:
1280 class VLD3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
1281 : NLdStLn<1, 0b10, op11_8, op7_4,
1282 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb),
1283 (ins addrmode6:$Rn, am6offset:$Rm,
1284 DPR:$src1, DPR:$src2, DPR:$src3, nohash_imm:$lane),
1285 IIC_VLD3lnu, "vld3", Dt,
1286 "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn$Rm",
1287 "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $Rn.addr = $wb",
1288 []>, Sched<[WriteVLD2]> {
1289 let DecoderMethod = "DecodeVLD3LN";
1292 def VLD3LNd8_UPD : VLD3LNWB<0b0010, {?,?,?,0}, "8"> {
1293 let Inst{7-5} = lane{2-0};
1295 def VLD3LNd16_UPD : VLD3LNWB<0b0110, {?,?,0,0}, "16"> {
1296 let Inst{7-6} = lane{1-0};
1298 def VLD3LNd32_UPD : VLD3LNWB<0b1010, {?,0,0,0}, "32"> {
1299 let Inst{7} = lane{0};
1302 def VLD3LNd8Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>;
1303 def VLD3LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>;
1304 def VLD3LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>;
1306 def VLD3LNq16_UPD : VLD3LNWB<0b0110, {?,?,1,0}, "16"> {
1307 let Inst{7-6} = lane{1-0};
1309 def VLD3LNq32_UPD : VLD3LNWB<0b1010, {?,1,0,0}, "32"> {
1310 let Inst{7} = lane{0};
1313 def VLD3LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>;
1314 def VLD3LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>;
1316 // VLD4LN : Vector Load (single 4-element structure to one lane)
1317 class VLD4LN<bits<4> op11_8, bits<4> op7_4, string Dt>
1318 : NLdStLn<1, 0b10, op11_8, op7_4,
1319 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4),
1320 (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4,
1321 nohash_imm:$lane), IIC_VLD4ln, "vld4", Dt,
1322 "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn",
1323 "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []>,
1324 Sched<[WriteVLD2]> {
1326 let Inst{4} = Rn{4};
1327 let DecoderMethod = "DecodeVLD4LN";
1330 def VLD4LNd8 : VLD4LN<0b0011, {?,?,?,?}, "8"> {
1331 let Inst{7-5} = lane{2-0};
1333 def VLD4LNd16 : VLD4LN<0b0111, {?,?,0,?}, "16"> {
1334 let Inst{7-6} = lane{1-0};
1336 def VLD4LNd32 : VLD4LN<0b1011, {?,0,?,?}, "32"> {
1337 let Inst{7} = lane{0};
1338 let Inst{5} = Rn{5};
1341 def VLD4LNd8Pseudo : VLDQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>;
1342 def VLD4LNd16Pseudo : VLDQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>;
1343 def VLD4LNd32Pseudo : VLDQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>;
1345 // ...with double-spaced registers:
1346 def VLD4LNq16 : VLD4LN<0b0111, {?,?,1,?}, "16"> {
1347 let Inst{7-6} = lane{1-0};
1349 def VLD4LNq32 : VLD4LN<0b1011, {?,1,?,?}, "32"> {
1350 let Inst{7} = lane{0};
1351 let Inst{5} = Rn{5};
1354 def VLD4LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>;
1355 def VLD4LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>;
1357 // ...with address register writeback:
1358 class VLD4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
1359 : NLdStLn<1, 0b10, op11_8, op7_4,
1360 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
1361 (ins addrmode6:$Rn, am6offset:$Rm,
1362 DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane),
1363 IIC_VLD4lnu, "vld4", Dt,
1364 "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn$Rm",
1365 "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4, $Rn.addr = $wb",
1367 let Inst{4} = Rn{4};
1368 let DecoderMethod = "DecodeVLD4LN" ;
1371 def VLD4LNd8_UPD : VLD4LNWB<0b0011, {?,?,?,?}, "8"> {
1372 let Inst{7-5} = lane{2-0};
1374 def VLD4LNd16_UPD : VLD4LNWB<0b0111, {?,?,0,?}, "16"> {
1375 let Inst{7-6} = lane{1-0};
1377 def VLD4LNd32_UPD : VLD4LNWB<0b1011, {?,0,?,?}, "32"> {
1378 let Inst{7} = lane{0};
1379 let Inst{5} = Rn{5};
1382 def VLD4LNd8Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>;
1383 def VLD4LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>;
1384 def VLD4LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>;
1386 def VLD4LNq16_UPD : VLD4LNWB<0b0111, {?,?,1,?}, "16"> {
1387 let Inst{7-6} = lane{1-0};
1389 def VLD4LNq32_UPD : VLD4LNWB<0b1011, {?,1,?,?}, "32"> {
1390 let Inst{7} = lane{0};
1391 let Inst{5} = Rn{5};
1394 def VLD4LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>;
1395 def VLD4LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>;
1397 } // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1
1399 // VLD1DUP : Vector Load (single element to all lanes)
1400 class VLD1DUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp,
1402 : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListOneDAllLanes:$Vd),
1404 IIC_VLD1dup, "vld1", Dt, "$Vd, $Rn", "",
1405 [(set VecListOneDAllLanes:$Vd,
1406 (Ty (NEONvdup (i32 (LoadOp AddrMode:$Rn)))))]>,
1407 Sched<[WriteVLD2]> {
1409 let Inst{4} = Rn{4};
1410 let DecoderMethod = "DecodeVLD1DupInstruction";
1412 def VLD1DUPd8 : VLD1DUP<{0,0,0,?}, "8", v8i8, extloadi8,
1413 addrmode6dupalignNone>;
1414 def VLD1DUPd16 : VLD1DUP<{0,1,0,?}, "16", v4i16, extloadi16,
1415 addrmode6dupalign16>;
1416 def VLD1DUPd32 : VLD1DUP<{1,0,0,?}, "32", v2i32, load,
1417 addrmode6dupalign32>;
1419 def : Pat<(v2f32 (NEONvdup (f32 (load addrmode6dup:$addr)))),
1420 (VLD1DUPd32 addrmode6:$addr)>;
1422 class VLD1QDUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp,
1424 : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListDPairAllLanes:$Vd),
1425 (ins AddrMode:$Rn), IIC_VLD1dup,
1426 "vld1", Dt, "$Vd, $Rn", "",
1427 [(set VecListDPairAllLanes:$Vd,
1428 (Ty (NEONvdup (i32 (LoadOp AddrMode:$Rn)))))]> {
1430 let Inst{4} = Rn{4};
1431 let DecoderMethod = "DecodeVLD1DupInstruction";
1434 def VLD1DUPq8 : VLD1QDUP<{0,0,1,0}, "8", v16i8, extloadi8,
1435 addrmode6dupalignNone>;
1436 def VLD1DUPq16 : VLD1QDUP<{0,1,1,?}, "16", v8i16, extloadi16,
1437 addrmode6dupalign16>;
1438 def VLD1DUPq32 : VLD1QDUP<{1,0,1,?}, "32", v4i32, load,
1439 addrmode6dupalign32>;
1441 def : Pat<(v4f32 (NEONvdup (f32 (load addrmode6dup:$addr)))),
1442 (VLD1DUPq32 addrmode6:$addr)>;
1444 let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in {
1445 // ...with address register writeback:
1446 multiclass VLD1DUPWB<bits<4> op7_4, string Dt, Operand AddrMode> {
1447 def _fixed : NLdSt<1, 0b10, 0b1100, op7_4,
1448 (outs VecListOneDAllLanes:$Vd, GPR:$wb),
1449 (ins AddrMode:$Rn), IIC_VLD1dupu,
1450 "vld1", Dt, "$Vd, $Rn!",
1451 "$Rn.addr = $wb", []> {
1452 let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
1453 let Inst{4} = Rn{4};
1454 let DecoderMethod = "DecodeVLD1DupInstruction";
1456 def _register : NLdSt<1, 0b10, 0b1100, op7_4,
1457 (outs VecListOneDAllLanes:$Vd, GPR:$wb),
1458 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1dupu,
1459 "vld1", Dt, "$Vd, $Rn, $Rm",
1460 "$Rn.addr = $wb", []> {
1461 let Inst{4} = Rn{4};
1462 let DecoderMethod = "DecodeVLD1DupInstruction";
1465 multiclass VLD1QDUPWB<bits<4> op7_4, string Dt, Operand AddrMode> {
1466 def _fixed : NLdSt<1, 0b10, 0b1100, op7_4,
1467 (outs VecListDPairAllLanes:$Vd, GPR:$wb),
1468 (ins AddrMode:$Rn), IIC_VLD1dupu,
1469 "vld1", Dt, "$Vd, $Rn!",
1470 "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> {
1471 let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
1472 let Inst{4} = Rn{4};
1473 let DecoderMethod = "DecodeVLD1DupInstruction";
1475 def _register : NLdSt<1, 0b10, 0b1100, op7_4,
1476 (outs VecListDPairAllLanes:$Vd, GPR:$wb),
1477 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1dupu,
1478 "vld1", Dt, "$Vd, $Rn, $Rm",
1479 "$Rn.addr = $wb", []> {
1480 let Inst{4} = Rn{4};
1481 let DecoderMethod = "DecodeVLD1DupInstruction";
1485 defm VLD1DUPd8wb : VLD1DUPWB<{0,0,0,0}, "8", addrmode6dupalignNone>;
1486 defm VLD1DUPd16wb : VLD1DUPWB<{0,1,0,?}, "16", addrmode6dupalign16>;
1487 defm VLD1DUPd32wb : VLD1DUPWB<{1,0,0,?}, "32", addrmode6dupalign32>;
1489 defm VLD1DUPq8wb : VLD1QDUPWB<{0,0,1,0}, "8", addrmode6dupalignNone>;
1490 defm VLD1DUPq16wb : VLD1QDUPWB<{0,1,1,?}, "16", addrmode6dupalign16>;
1491 defm VLD1DUPq32wb : VLD1QDUPWB<{1,0,1,?}, "32", addrmode6dupalign32>;
1493 // VLD2DUP : Vector Load (single 2-element structure to all lanes)
1494 class VLD2DUP<bits<4> op7_4, string Dt, RegisterOperand VdTy, Operand AddrMode>
1495 : NLdSt<1, 0b10, 0b1101, op7_4, (outs VdTy:$Vd),
1496 (ins AddrMode:$Rn), IIC_VLD2dup,
1497 "vld2", Dt, "$Vd, $Rn", "", []> {
1499 let Inst{4} = Rn{4};
1500 let DecoderMethod = "DecodeVLD2DupInstruction";
1503 def VLD2DUPd8 : VLD2DUP<{0,0,0,?}, "8", VecListDPairAllLanes,
1504 addrmode6dupalign16>;
1505 def VLD2DUPd16 : VLD2DUP<{0,1,0,?}, "16", VecListDPairAllLanes,
1506 addrmode6dupalign32>;
1507 def VLD2DUPd32 : VLD2DUP<{1,0,0,?}, "32", VecListDPairAllLanes,
1508 addrmode6dupalign64>;
1510 // HACK this one, VLD2DUPd8x2 must be changed at the same time with VLD2b8 or
1511 // "vld2.8 {d0[], d2[]}, [r4:32]" will become "vld2.8 {d0, d2}, [r4:32]".
1512 // ...with double-spaced registers
1513 def VLD2DUPd8x2 : VLD2DUP<{0,0,1,?}, "8", VecListDPairSpacedAllLanes,
1514 addrmode6dupalign16>;
1515 def VLD2DUPd16x2 : VLD2DUP<{0,1,1,?}, "16", VecListDPairSpacedAllLanes,
1516 addrmode6dupalign32>;
1517 def VLD2DUPd32x2 : VLD2DUP<{1,0,1,?}, "32", VecListDPairSpacedAllLanes,
1518 addrmode6dupalign64>;
1520 def VLD2DUPq8EvenPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
1521 def VLD2DUPq8OddPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
1522 def VLD2DUPq16EvenPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
1523 def VLD2DUPq16OddPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
1524 def VLD2DUPq32EvenPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
1525 def VLD2DUPq32OddPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
1527 // ...with address register writeback:
1528 multiclass VLD2DUPWB<bits<4> op7_4, string Dt, RegisterOperand VdTy,
1530 def _fixed : NLdSt<1, 0b10, 0b1101, op7_4,
1531 (outs VdTy:$Vd, GPR:$wb),
1532 (ins AddrMode:$Rn), IIC_VLD2dupu,
1533 "vld2", Dt, "$Vd, $Rn!",
1534 "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> {
1535 let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
1536 let Inst{4} = Rn{4};
1537 let DecoderMethod = "DecodeVLD2DupInstruction";
1539 def _register : NLdSt<1, 0b10, 0b1101, op7_4,
1540 (outs VdTy:$Vd, GPR:$wb),
1541 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD2dupu,
1542 "vld2", Dt, "$Vd, $Rn, $Rm",
1543 "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> {
1544 let Inst{4} = Rn{4};
1545 let DecoderMethod = "DecodeVLD2DupInstruction";
1549 defm VLD2DUPd8wb : VLD2DUPWB<{0,0,0,0}, "8", VecListDPairAllLanes,
1550 addrmode6dupalign16>;
1551 defm VLD2DUPd16wb : VLD2DUPWB<{0,1,0,?}, "16", VecListDPairAllLanes,
1552 addrmode6dupalign32>;
1553 defm VLD2DUPd32wb : VLD2DUPWB<{1,0,0,?}, "32", VecListDPairAllLanes,
1554 addrmode6dupalign64>;
1556 defm VLD2DUPd8x2wb : VLD2DUPWB<{0,0,1,0}, "8", VecListDPairSpacedAllLanes,
1557 addrmode6dupalign16>;
1558 defm VLD2DUPd16x2wb : VLD2DUPWB<{0,1,1,?}, "16", VecListDPairSpacedAllLanes,
1559 addrmode6dupalign32>;
1560 defm VLD2DUPd32x2wb : VLD2DUPWB<{1,0,1,?}, "32", VecListDPairSpacedAllLanes,
1561 addrmode6dupalign64>;
1563 // VLD3DUP : Vector Load (single 3-element structure to all lanes)
1564 class VLD3DUP<bits<4> op7_4, string Dt>
1565 : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3),
1566 (ins addrmode6dup:$Rn), IIC_VLD3dup,
1567 "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn", "", []>,
1568 Sched<[WriteVLD2]> {
1571 let DecoderMethod = "DecodeVLD3DupInstruction";
1574 def VLD3DUPd8 : VLD3DUP<{0,0,0,?}, "8">;
1575 def VLD3DUPd16 : VLD3DUP<{0,1,0,?}, "16">;
1576 def VLD3DUPd32 : VLD3DUP<{1,0,0,?}, "32">;
1578 def VLD3DUPd8Pseudo : VLDQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>;
1579 def VLD3DUPd16Pseudo : VLDQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>;
1580 def VLD3DUPd32Pseudo : VLDQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>;
1582 // ...with double-spaced registers (not used for codegen):
1583 def VLD3DUPq8 : VLD3DUP<{0,0,1,?}, "8">;
1584 def VLD3DUPq16 : VLD3DUP<{0,1,1,?}, "16">;
1585 def VLD3DUPq32 : VLD3DUP<{1,0,1,?}, "32">;
1587 def VLD3DUPq8EvenPseudo : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>;
1588 def VLD3DUPq8OddPseudo : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>;
1589 def VLD3DUPq16EvenPseudo : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>;
1590 def VLD3DUPq16OddPseudo : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>;
1591 def VLD3DUPq32EvenPseudo : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>;
1592 def VLD3DUPq32OddPseudo : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>;
1594 // ...with address register writeback:
1595 class VLD3DUPWB<bits<4> op7_4, string Dt, Operand AddrMode>
1596 : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb),
1597 (ins AddrMode:$Rn, am6offset:$Rm), IIC_VLD3dupu,
1598 "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn$Rm",
1599 "$Rn.addr = $wb", []>, Sched<[WriteVLD2]> {
1601 let DecoderMethod = "DecodeVLD3DupInstruction";
1604 def VLD3DUPd8_UPD : VLD3DUPWB<{0,0,0,0}, "8", addrmode6dupalign64>;
1605 def VLD3DUPd16_UPD : VLD3DUPWB<{0,1,0,?}, "16", addrmode6dupalign64>;
1606 def VLD3DUPd32_UPD : VLD3DUPWB<{1,0,0,?}, "32", addrmode6dupalign64>;
1608 def VLD3DUPq8_UPD : VLD3DUPWB<{0,0,1,0}, "8", addrmode6dupalign64>;
1609 def VLD3DUPq16_UPD : VLD3DUPWB<{0,1,1,?}, "16", addrmode6dupalign64>;
1610 def VLD3DUPq32_UPD : VLD3DUPWB<{1,0,1,?}, "32", addrmode6dupalign64>;
1612 def VLD3DUPd8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>, Sched<[WriteVLD2]>;
1613 def VLD3DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>, Sched<[WriteVLD2]>;
1614 def VLD3DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>, Sched<[WriteVLD2]>;
1616 // VLD4DUP : Vector Load (single 4-element structure to all lanes)
1617 class VLD4DUP<bits<4> op7_4, string Dt>
1618 : NLdSt<1, 0b10, 0b1111, op7_4,
1619 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4),
1620 (ins addrmode6dup:$Rn), IIC_VLD4dup,
1621 "vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn", "", []> {
1623 let Inst{4} = Rn{4};
1624 let DecoderMethod = "DecodeVLD4DupInstruction";
1627 def VLD4DUPd8 : VLD4DUP<{0,0,0,?}, "8">;
1628 def VLD4DUPd16 : VLD4DUP<{0,1,0,?}, "16">;
1629 def VLD4DUPd32 : VLD4DUP<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; }
1631 def VLD4DUPd8Pseudo : VLDQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>;
1632 def VLD4DUPd16Pseudo : VLDQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>;
1633 def VLD4DUPd32Pseudo : VLDQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>;
1635 // ...with double-spaced registers (not used for codegen):
1636 def VLD4DUPq8 : VLD4DUP<{0,0,1,?}, "8">;
1637 def VLD4DUPq16 : VLD4DUP<{0,1,1,?}, "16">;
1638 def VLD4DUPq32 : VLD4DUP<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; }
1640 def VLD4DUPq8EvenPseudo : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>;
1641 def VLD4DUPq8OddPseudo : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>;
1642 def VLD4DUPq16EvenPseudo : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>;
1643 def VLD4DUPq16OddPseudo : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>;
1644 def VLD4DUPq32EvenPseudo : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>;
1645 def VLD4DUPq32OddPseudo : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>;
1647 // ...with address register writeback:
1648 class VLD4DUPWB<bits<4> op7_4, string Dt>
1649 : NLdSt<1, 0b10, 0b1111, op7_4,
1650 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
1651 (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD4dupu,
1652 "vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn$Rm",
1653 "$Rn.addr = $wb", []>, Sched<[WriteVLD2]> {
1654 let Inst{4} = Rn{4};
1655 let DecoderMethod = "DecodeVLD4DupInstruction";
1658 def VLD4DUPd8_UPD : VLD4DUPWB<{0,0,0,0}, "8">;
1659 def VLD4DUPd16_UPD : VLD4DUPWB<{0,1,0,?}, "16">;
1660 def VLD4DUPd32_UPD : VLD4DUPWB<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; }
1662 def VLD4DUPq8_UPD : VLD4DUPWB<{0,0,1,0}, "8">;
1663 def VLD4DUPq16_UPD : VLD4DUPWB<{0,1,1,?}, "16">;
1664 def VLD4DUPq32_UPD : VLD4DUPWB<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; }
1666 def VLD4DUPd8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>, Sched<[WriteVLD2]>;
1667 def VLD4DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>, Sched<[WriteVLD2]>;
1668 def VLD4DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>, Sched<[WriteVLD2]>;
1670 } // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1
1672 let mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 in {
1674 // Classes for VST* pseudo-instructions with multi-register operands.
1675 // These are expanded to real instructions after register allocation.
1676 class VSTQPseudo<InstrItinClass itin>
1677 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src), itin, "">;
1678 class VSTQWBPseudo<InstrItinClass itin>
1679 : PseudoNLdSt<(outs GPR:$wb),
1680 (ins addrmode6:$addr, am6offset:$offset, QPR:$src), itin,
1681 "$addr.addr = $wb">;
1682 class VSTQWBfixedPseudo<InstrItinClass itin>
1683 : PseudoNLdSt<(outs GPR:$wb),
1684 (ins addrmode6:$addr, QPR:$src), itin,
1685 "$addr.addr = $wb">;
1686 class VSTQWBregisterPseudo<InstrItinClass itin>
1687 : PseudoNLdSt<(outs GPR:$wb),
1688 (ins addrmode6:$addr, rGPR:$offset, QPR:$src), itin,
1689 "$addr.addr = $wb">;
1690 class VSTQQPseudo<InstrItinClass itin>
1691 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src), itin, "">;
1692 class VSTQQWBPseudo<InstrItinClass itin>
1693 : PseudoNLdSt<(outs GPR:$wb),
1694 (ins addrmode6:$addr, am6offset:$offset, QQPR:$src), itin,
1695 "$addr.addr = $wb">;
1696 class VSTQQWBfixedPseudo<InstrItinClass itin>
1697 : PseudoNLdSt<(outs GPR:$wb),
1698 (ins addrmode6:$addr, QQPR:$src), itin,
1699 "$addr.addr = $wb">;
1700 class VSTQQWBregisterPseudo<InstrItinClass itin>
1701 : PseudoNLdSt<(outs GPR:$wb),
1702 (ins addrmode6:$addr, rGPR:$offset, QQPR:$src), itin,
1703 "$addr.addr = $wb">;
1705 class VSTQQQQPseudo<InstrItinClass itin>
1706 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src), itin, "">;
1707 class VSTQQQQWBPseudo<InstrItinClass itin>
1708 : PseudoNLdSt<(outs GPR:$wb),
1709 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin,
1710 "$addr.addr = $wb">;
1712 // VST1 : Vector Store (multiple single elements)
1713 class VST1D<bits<4> op7_4, string Dt, Operand AddrMode>
1714 : NLdSt<0,0b00,0b0111,op7_4, (outs), (ins AddrMode:$Rn, VecListOneD:$Vd),
1715 IIC_VST1, "vst1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVST1]> {
1717 let Inst{4} = Rn{4};
1718 let DecoderMethod = "DecodeVLDST1Instruction";
1720 class VST1Q<bits<4> op7_4, string Dt, Operand AddrMode>
1721 : NLdSt<0,0b00,0b1010,op7_4, (outs), (ins AddrMode:$Rn, VecListDPair:$Vd),
1722 IIC_VST1x2, "vst1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVST2]> {
1724 let Inst{5-4} = Rn{5-4};
1725 let DecoderMethod = "DecodeVLDST1Instruction";
1728 def VST1d8 : VST1D<{0,0,0,?}, "8", addrmode6align64>;
1729 def VST1d16 : VST1D<{0,1,0,?}, "16", addrmode6align64>;
1730 def VST1d32 : VST1D<{1,0,0,?}, "32", addrmode6align64>;
1731 def VST1d64 : VST1D<{1,1,0,?}, "64", addrmode6align64>;
1733 def VST1q8 : VST1Q<{0,0,?,?}, "8", addrmode6align64or128>;
1734 def VST1q16 : VST1Q<{0,1,?,?}, "16", addrmode6align64or128>;
1735 def VST1q32 : VST1Q<{1,0,?,?}, "32", addrmode6align64or128>;
1736 def VST1q64 : VST1Q<{1,1,?,?}, "64", addrmode6align64or128>;
1738 // ...with address register writeback:
1739 multiclass VST1DWB<bits<4> op7_4, string Dt, Operand AddrMode> {
1740 def _fixed : NLdSt<0,0b00, 0b0111,op7_4, (outs GPR:$wb),
1741 (ins AddrMode:$Rn, VecListOneD:$Vd), IIC_VLD1u,
1742 "vst1", Dt, "$Vd, $Rn!",
1743 "$Rn.addr = $wb", []>, Sched<[WriteVST1]> {
1744 let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
1745 let Inst{4} = Rn{4};
1746 let DecoderMethod = "DecodeVLDST1Instruction";
1748 def _register : NLdSt<0,0b00,0b0111,op7_4, (outs GPR:$wb),
1749 (ins AddrMode:$Rn, rGPR:$Rm, VecListOneD:$Vd),
1751 "vst1", Dt, "$Vd, $Rn, $Rm",
1752 "$Rn.addr = $wb", []>, Sched<[WriteVST1]> {
1753 let Inst{4} = Rn{4};
1754 let DecoderMethod = "DecodeVLDST1Instruction";
1757 multiclass VST1QWB<bits<4> op7_4, string Dt, Operand AddrMode> {
1758 def _fixed : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb),
1759 (ins AddrMode:$Rn, VecListDPair:$Vd), IIC_VLD1x2u,
1760 "vst1", Dt, "$Vd, $Rn!",
1761 "$Rn.addr = $wb", []>, Sched<[WriteVST2]> {
1762 let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
1763 let Inst{5-4} = Rn{5-4};
1764 let DecoderMethod = "DecodeVLDST1Instruction";
1766 def _register : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb),
1767 (ins AddrMode:$Rn, rGPR:$Rm, VecListDPair:$Vd),
1769 "vst1", Dt, "$Vd, $Rn, $Rm",
1770 "$Rn.addr = $wb", []>, Sched<[WriteVST2]> {
1771 let Inst{5-4} = Rn{5-4};
1772 let DecoderMethod = "DecodeVLDST1Instruction";
1776 defm VST1d8wb : VST1DWB<{0,0,0,?}, "8", addrmode6align64>;
1777 defm VST1d16wb : VST1DWB<{0,1,0,?}, "16", addrmode6align64>;
1778 defm VST1d32wb : VST1DWB<{1,0,0,?}, "32", addrmode6align64>;
1779 defm VST1d64wb : VST1DWB<{1,1,0,?}, "64", addrmode6align64>;
1781 defm VST1q8wb : VST1QWB<{0,0,?,?}, "8", addrmode6align64or128>;
1782 defm VST1q16wb : VST1QWB<{0,1,?,?}, "16", addrmode6align64or128>;
1783 defm VST1q32wb : VST1QWB<{1,0,?,?}, "32", addrmode6align64or128>;
1784 defm VST1q64wb : VST1QWB<{1,1,?,?}, "64", addrmode6align64or128>;
1786 // ...with 3 registers
1787 class VST1D3<bits<4> op7_4, string Dt, Operand AddrMode>
1788 : NLdSt<0, 0b00, 0b0110, op7_4, (outs),
1789 (ins AddrMode:$Rn, VecListThreeD:$Vd),
1790 IIC_VST1x3, "vst1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVST3]> {
1792 let Inst{4} = Rn{4};
1793 let DecoderMethod = "DecodeVLDST1Instruction";
1795 multiclass VST1D3WB<bits<4> op7_4, string Dt, Operand AddrMode> {
1796 def _fixed : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb),
1797 (ins AddrMode:$Rn, VecListThreeD:$Vd), IIC_VLD1x3u,
1798 "vst1", Dt, "$Vd, $Rn!",
1799 "$Rn.addr = $wb", []>, Sched<[WriteVST3]> {
1800 let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
1801 let Inst{5-4} = Rn{5-4};
1802 let DecoderMethod = "DecodeVLDST1Instruction";
1804 def _register : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb),
1805 (ins AddrMode:$Rn, rGPR:$Rm, VecListThreeD:$Vd),
1807 "vst1", Dt, "$Vd, $Rn, $Rm",
1808 "$Rn.addr = $wb", []>, Sched<[WriteVST3]> {
1809 let Inst{5-4} = Rn{5-4};
1810 let DecoderMethod = "DecodeVLDST1Instruction";
1814 def VST1d8T : VST1D3<{0,0,0,?}, "8", addrmode6align64>;
1815 def VST1d16T : VST1D3<{0,1,0,?}, "16", addrmode6align64>;
1816 def VST1d32T : VST1D3<{1,0,0,?}, "32", addrmode6align64>;
1817 def VST1d64T : VST1D3<{1,1,0,?}, "64", addrmode6align64>;
1819 defm VST1d8Twb : VST1D3WB<{0,0,0,?}, "8", addrmode6align64>;
1820 defm VST1d16Twb : VST1D3WB<{0,1,0,?}, "16", addrmode6align64>;
1821 defm VST1d32Twb : VST1D3WB<{1,0,0,?}, "32", addrmode6align64>;
1822 defm VST1d64Twb : VST1D3WB<{1,1,0,?}, "64", addrmode6align64>;
1824 def VST1d8TPseudo : VSTQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
1825 def VST1d16TPseudo : VSTQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
1826 def VST1d32TPseudo : VSTQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
1827 def VST1d64TPseudo : VSTQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
1828 def VST1d64TPseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST1x3u>, Sched<[WriteVST3]>;
1829 def VST1d64TPseudoWB_register : VSTQQWBPseudo<IIC_VST1x3u>, Sched<[WriteVST3]>;
1831 def VST1q8HighTPseudo : VSTQQQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
1832 def VST1q8LowTPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
1833 def VST1q16HighTPseudo : VSTQQQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
1834 def VST1q16LowTPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
1835 def VST1q32HighTPseudo : VSTQQQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
1836 def VST1q32LowTPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
1837 def VST1q64HighTPseudo : VSTQQQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
1838 def VST1q64LowTPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
1840 // ...with 4 registers
1841 class VST1D4<bits<4> op7_4, string Dt, Operand AddrMode>
1842 : NLdSt<0, 0b00, 0b0010, op7_4, (outs),
1843 (ins AddrMode:$Rn, VecListFourD:$Vd),
1844 IIC_VST1x4, "vst1", Dt, "$Vd, $Rn", "",
1845 []>, Sched<[WriteVST4]> {
1847 let Inst{5-4} = Rn{5-4};
1848 let DecoderMethod = "DecodeVLDST1Instruction";
1850 multiclass VST1D4WB<bits<4> op7_4, string Dt, Operand AddrMode> {
1851 def _fixed : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb),
1852 (ins AddrMode:$Rn, VecListFourD:$Vd), IIC_VLD1x4u,
1853 "vst1", Dt, "$Vd, $Rn!",
1854 "$Rn.addr = $wb", []>, Sched<[WriteVST4]> {
1855 let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
1856 let Inst{5-4} = Rn{5-4};
1857 let DecoderMethod = "DecodeVLDST1Instruction";
1859 def _register : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb),
1860 (ins AddrMode:$Rn, rGPR:$Rm, VecListFourD:$Vd),
1862 "vst1", Dt, "$Vd, $Rn, $Rm",
1863 "$Rn.addr = $wb", []>, Sched<[WriteVST4]> {
1864 let Inst{5-4} = Rn{5-4};
1865 let DecoderMethod = "DecodeVLDST1Instruction";
1869 def VST1d8Q : VST1D4<{0,0,?,?}, "8", addrmode6align64or128or256>;
1870 def VST1d16Q : VST1D4<{0,1,?,?}, "16", addrmode6align64or128or256>;
1871 def VST1d32Q : VST1D4<{1,0,?,?}, "32", addrmode6align64or128or256>;
1872 def VST1d64Q : VST1D4<{1,1,?,?}, "64", addrmode6align64or128or256>;
1874 defm VST1d8Qwb : VST1D4WB<{0,0,?,?}, "8", addrmode6align64or128or256>;
1875 defm VST1d16Qwb : VST1D4WB<{0,1,?,?}, "16", addrmode6align64or128or256>;
1876 defm VST1d32Qwb : VST1D4WB<{1,0,?,?}, "32", addrmode6align64or128or256>;
1877 defm VST1d64Qwb : VST1D4WB<{1,1,?,?}, "64", addrmode6align64or128or256>;
1879 def VST1d8QPseudo : VSTQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
1880 def VST1d16QPseudo : VSTQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
1881 def VST1d32QPseudo : VSTQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
1882 def VST1d64QPseudo : VSTQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
1883 def VST1d64QPseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST1x4u>, Sched<[WriteVST4]>;
1884 def VST1d64QPseudoWB_register : VSTQQWBPseudo<IIC_VST1x4u>, Sched<[WriteVST4]>;
1886 def VST1q8HighQPseudo : VSTQQQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
1887 def VST1q8LowQPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
1888 def VST1q16HighQPseudo : VSTQQQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
1889 def VST1q16LowQPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
1890 def VST1q32HighQPseudo : VSTQQQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
1891 def VST1q32LowQPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
1892 def VST1q64HighQPseudo : VSTQQQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
1893 def VST1q64LowQPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
1895 // VST2 : Vector Store (multiple 2-element structures)
1896 class VST2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy,
1897 InstrItinClass itin, Operand AddrMode>
1898 : NLdSt<0, 0b00, op11_8, op7_4, (outs), (ins AddrMode:$Rn, VdTy:$Vd),
1899 itin, "vst2", Dt, "$Vd, $Rn", "", []> {
1901 let Inst{5-4} = Rn{5-4};
1902 let DecoderMethod = "DecodeVLDST2Instruction";
1905 def VST2d8 : VST2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VST2,
1906 addrmode6align64or128>, Sched<[WriteVST2]>;
1907 def VST2d16 : VST2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VST2,
1908 addrmode6align64or128>, Sched<[WriteVST2]>;
1909 def VST2d32 : VST2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VST2,
1910 addrmode6align64or128>, Sched<[WriteVST2]>;
1912 def VST2q8 : VST2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VST2x2,
1913 addrmode6align64or128or256>, Sched<[WriteVST4]>;
1914 def VST2q16 : VST2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VST2x2,
1915 addrmode6align64or128or256>, Sched<[WriteVST4]>;
1916 def VST2q32 : VST2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VST2x2,
1917 addrmode6align64or128or256>, Sched<[WriteVST4]>;
1919 def VST2q8Pseudo : VSTQQPseudo<IIC_VST2x2>, Sched<[WriteVST4]>;
1920 def VST2q16Pseudo : VSTQQPseudo<IIC_VST2x2>, Sched<[WriteVST4]>;
1921 def VST2q32Pseudo : VSTQQPseudo<IIC_VST2x2>, Sched<[WriteVST4]>;
1923 // ...with address register writeback:
1924 multiclass VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt,
1925 RegisterOperand VdTy, Operand AddrMode> {
1926 def _fixed : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
1927 (ins AddrMode:$Rn, VdTy:$Vd), IIC_VLD1u,
1928 "vst2", Dt, "$Vd, $Rn!",
1929 "$Rn.addr = $wb", []>, Sched<[WriteVST2]> {
1930 let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
1931 let Inst{5-4} = Rn{5-4};
1932 let DecoderMethod = "DecodeVLDST2Instruction";
1934 def _register : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
1935 (ins AddrMode:$Rn, rGPR:$Rm, VdTy:$Vd), IIC_VLD1u,
1936 "vst2", Dt, "$Vd, $Rn, $Rm",
1937 "$Rn.addr = $wb", []>, Sched<[WriteVST2]> {
1938 let Inst{5-4} = Rn{5-4};
1939 let DecoderMethod = "DecodeVLDST2Instruction";
1942 multiclass VST2QWB<bits<4> op7_4, string Dt, Operand AddrMode> {
1943 def _fixed : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb),
1944 (ins AddrMode:$Rn, VecListFourD:$Vd), IIC_VLD1u,
1945 "vst2", Dt, "$Vd, $Rn!",
1946 "$Rn.addr = $wb", []>, Sched<[WriteVST4]> {
1947 let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
1948 let Inst{5-4} = Rn{5-4};
1949 let DecoderMethod = "DecodeVLDST2Instruction";
1951 def _register : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb),
1952 (ins AddrMode:$Rn, rGPR:$Rm, VecListFourD:$Vd),
1954 "vst2", Dt, "$Vd, $Rn, $Rm",
1955 "$Rn.addr = $wb", []>, Sched<[WriteVST4]> {
1956 let Inst{5-4} = Rn{5-4};
1957 let DecoderMethod = "DecodeVLDST2Instruction";
1961 defm VST2d8wb : VST2DWB<0b1000, {0,0,?,?}, "8", VecListDPair,
1962 addrmode6align64or128>;
1963 defm VST2d16wb : VST2DWB<0b1000, {0,1,?,?}, "16", VecListDPair,
1964 addrmode6align64or128>;
1965 defm VST2d32wb : VST2DWB<0b1000, {1,0,?,?}, "32", VecListDPair,
1966 addrmode6align64or128>;
1968 defm VST2q8wb : VST2QWB<{0,0,?,?}, "8", addrmode6align64or128or256>;
1969 defm VST2q16wb : VST2QWB<{0,1,?,?}, "16", addrmode6align64or128or256>;
1970 defm VST2q32wb : VST2QWB<{1,0,?,?}, "32", addrmode6align64or128or256>;
1972 def VST2q8PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>;
1973 def VST2q16PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>;
1974 def VST2q32PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>;
1975 def VST2q8PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>;
1976 def VST2q16PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>;
1977 def VST2q32PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>;
1979 // ...with double-spaced registers
1980 def VST2b8 : VST2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VST2,
1981 addrmode6align64or128>;
1982 def VST2b16 : VST2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VST2,
1983 addrmode6align64or128>;
1984 def VST2b32 : VST2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VST2,
1985 addrmode6align64or128>;
1986 defm VST2b8wb : VST2DWB<0b1001, {0,0,?,?}, "8", VecListDPairSpaced,
1987 addrmode6align64or128>;
1988 defm VST2b16wb : VST2DWB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced,
1989 addrmode6align64or128>;
1990 defm VST2b32wb : VST2DWB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced,
1991 addrmode6align64or128>;
1993 // VST3 : Vector Store (multiple 3-element structures)
1994 class VST3D<bits<4> op11_8, bits<4> op7_4, string Dt>
1995 : NLdSt<0, 0b00, op11_8, op7_4, (outs),
1996 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3,
1997 "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn", "", []>, Sched<[WriteVST3]> {
1999 let Inst{4} = Rn{4};
2000 let DecoderMethod = "DecodeVLDST3Instruction";
2003 def VST3d8 : VST3D<0b0100, {0,0,0,?}, "8">;
2004 def VST3d16 : VST3D<0b0100, {0,1,0,?}, "16">;
2005 def VST3d32 : VST3D<0b0100, {1,0,0,?}, "32">;
2007 def VST3d8Pseudo : VSTQQPseudo<IIC_VST3>, Sched<[WriteVST3]>;
2008 def VST3d16Pseudo : VSTQQPseudo<IIC_VST3>, Sched<[WriteVST3]>;
2009 def VST3d32Pseudo : VSTQQPseudo<IIC_VST3>, Sched<[WriteVST3]>;
2011 // ...with address register writeback:
2012 class VST3DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
2013 : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
2014 (ins addrmode6:$Rn, am6offset:$Rm,
2015 DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3u,
2016 "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn$Rm",
2017 "$Rn.addr = $wb", []>, Sched<[WriteVST3]> {
2018 let Inst{4} = Rn{4};
2019 let DecoderMethod = "DecodeVLDST3Instruction";
2022 def VST3d8_UPD : VST3DWB<0b0100, {0,0,0,?}, "8">;
2023 def VST3d16_UPD : VST3DWB<0b0100, {0,1,0,?}, "16">;
2024 def VST3d32_UPD : VST3DWB<0b0100, {1,0,0,?}, "32">;
2026 def VST3d8Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
2027 def VST3d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
2028 def VST3d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
2030 // ...with double-spaced registers:
2031 def VST3q8 : VST3D<0b0101, {0,0,0,?}, "8">;
2032 def VST3q16 : VST3D<0b0101, {0,1,0,?}, "16">;
2033 def VST3q32 : VST3D<0b0101, {1,0,0,?}, "32">;
2034 def VST3q8_UPD : VST3DWB<0b0101, {0,0,0,?}, "8">;
2035 def VST3q16_UPD : VST3DWB<0b0101, {0,1,0,?}, "16">;
2036 def VST3q32_UPD : VST3DWB<0b0101, {1,0,0,?}, "32">;
2038 def VST3q8Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
2039 def VST3q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
2040 def VST3q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
2042 // ...alternate versions to be allocated odd register numbers:
2043 def VST3q8oddPseudo : VSTQQQQPseudo<IIC_VST3>, Sched<[WriteVST3]>;
2044 def VST3q16oddPseudo : VSTQQQQPseudo<IIC_VST3>, Sched<[WriteVST3]>;
2045 def VST3q32oddPseudo : VSTQQQQPseudo<IIC_VST3>, Sched<[WriteVST3]>;
2047 def VST3q8oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
2048 def VST3q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
2049 def VST3q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
2051 // VST4 : Vector Store (multiple 4-element structures)
2052 class VST4D<bits<4> op11_8, bits<4> op7_4, string Dt>
2053 : NLdSt<0, 0b00, op11_8, op7_4, (outs),
2054 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4),
2055 IIC_VST4, "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn",
2056 "", []>, Sched<[WriteVST4]> {
2058 let Inst{5-4} = Rn{5-4};
2059 let DecoderMethod = "DecodeVLDST4Instruction";
2062 def VST4d8 : VST4D<0b0000, {0,0,?,?}, "8">;
2063 def VST4d16 : VST4D<0b0000, {0,1,?,?}, "16">;
2064 def VST4d32 : VST4D<0b0000, {1,0,?,?}, "32">;
2066 def VST4d8Pseudo : VSTQQPseudo<IIC_VST4>, Sched<[WriteVST4]>;
2067 def VST4d16Pseudo : VSTQQPseudo<IIC_VST4>, Sched<[WriteVST4]>;
2068 def VST4d32Pseudo : VSTQQPseudo<IIC_VST4>, Sched<[WriteVST4]>;
2070 // ...with address register writeback:
2071 class VST4DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
2072 : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
2073 (ins addrmode6:$Rn, am6offset:$Rm,
2074 DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST4u,
2075 "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm",
2076 "$Rn.addr = $wb", []>, Sched<[WriteVST4]> {
2077 let Inst{5-4} = Rn{5-4};
2078 let DecoderMethod = "DecodeVLDST4Instruction";
2081 def VST4d8_UPD : VST4DWB<0b0000, {0,0,?,?}, "8">;
2082 def VST4d16_UPD : VST4DWB<0b0000, {0,1,?,?}, "16">;
2083 def VST4d32_UPD : VST4DWB<0b0000, {1,0,?,?}, "32">;
2085 def VST4d8Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
2086 def VST4d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
2087 def VST4d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
2089 // ...with double-spaced registers:
2090 def VST4q8 : VST4D<0b0001, {0,0,?,?}, "8">;
2091 def VST4q16 : VST4D<0b0001, {0,1,?,?}, "16">;
2092 def VST4q32 : VST4D<0b0001, {1,0,?,?}, "32">;
2093 def VST4q8_UPD : VST4DWB<0b0001, {0,0,?,?}, "8">;
2094 def VST4q16_UPD : VST4DWB<0b0001, {0,1,?,?}, "16">;
2095 def VST4q32_UPD : VST4DWB<0b0001, {1,0,?,?}, "32">;
2097 def VST4q8Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
2098 def VST4q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
2099 def VST4q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
2101 // ...alternate versions to be allocated odd register numbers:
2102 def VST4q8oddPseudo : VSTQQQQPseudo<IIC_VST4>, Sched<[WriteVST4]>;
2103 def VST4q16oddPseudo : VSTQQQQPseudo<IIC_VST4>, Sched<[WriteVST4]>;
2104 def VST4q32oddPseudo : VSTQQQQPseudo<IIC_VST4>, Sched<[WriteVST4]>;
2106 def VST4q8oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
2107 def VST4q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
2108 def VST4q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
2110 } // mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1
2112 // Classes for VST*LN pseudo-instructions with multi-register operands.
2113 // These are expanded to real instructions after register allocation.
2114 class VSTQLNPseudo<InstrItinClass itin>
2115 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src, nohash_imm:$lane),
2117 class VSTQLNWBPseudo<InstrItinClass itin>
2118 : PseudoNLdSt<(outs GPR:$wb),
2119 (ins addrmode6:$addr, am6offset:$offset, QPR:$src,
2120 nohash_imm:$lane), itin, "$addr.addr = $wb">;
2121 class VSTQQLNPseudo<InstrItinClass itin>
2122 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src, nohash_imm:$lane),
2124 class VSTQQLNWBPseudo<InstrItinClass itin>
2125 : PseudoNLdSt<(outs GPR:$wb),
2126 (ins addrmode6:$addr, am6offset:$offset, QQPR:$src,
2127 nohash_imm:$lane), itin, "$addr.addr = $wb">;
2128 class VSTQQQQLNPseudo<InstrItinClass itin>
2129 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src, nohash_imm:$lane),
2131 class VSTQQQQLNWBPseudo<InstrItinClass itin>
2132 : PseudoNLdSt<(outs GPR:$wb),
2133 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src,
2134 nohash_imm:$lane), itin, "$addr.addr = $wb">;
2136 // VST1LN : Vector Store (single element from one lane)
2137 class VST1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
2138 PatFrag StoreOp, SDNode ExtractOp, Operand AddrMode>
2139 : NLdStLn<1, 0b00, op11_8, op7_4, (outs),
2140 (ins AddrMode:$Rn, DPR:$Vd, nohash_imm:$lane),
2141 IIC_VST1ln, "vst1", Dt, "\\{$Vd[$lane]\\}, $Rn", "",
2142 [(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), AddrMode:$Rn)]>,
2143 Sched<[WriteVST1]> {
2145 let DecoderMethod = "DecodeVST1LN";
2147 class VST1QLNPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp>
2148 : VSTQLNPseudo<IIC_VST1ln>, Sched<[WriteVST1]> {
2149 let Pattern = [(StoreOp (ExtractOp (Ty QPR:$src), imm:$lane),
2153 def VST1LNd8 : VST1LN<0b0000, {?,?,?,0}, "8", v8i8, truncstorei8,
2154 NEONvgetlaneu, addrmode6> {
2155 let Inst{7-5} = lane{2-0};
2157 def VST1LNd16 : VST1LN<0b0100, {?,?,0,?}, "16", v4i16, truncstorei16,
2158 NEONvgetlaneu, addrmode6> {
2159 let Inst{7-6} = lane{1-0};
2160 let Inst{4} = Rn{4};
2163 def VST1LNd32 : VST1LN<0b1000, {?,0,?,?}, "32", v2i32, store, extractelt,
2165 let Inst{7} = lane{0};
2166 let Inst{5-4} = Rn{5-4};
2169 def VST1LNq8Pseudo : VST1QLNPseudo<v16i8, truncstorei8, NEONvgetlaneu>;
2170 def VST1LNq16Pseudo : VST1QLNPseudo<v8i16, truncstorei16, NEONvgetlaneu>;
2171 def VST1LNq32Pseudo : VST1QLNPseudo<v4i32, store, extractelt>;
2173 def : Pat<(store (extractelt (v2f32 DPR:$src), imm:$lane), addrmode6:$addr),
2174 (VST1LNd32 addrmode6:$addr, DPR:$src, imm:$lane)>;
2175 def : Pat<(store (extractelt (v4f32 QPR:$src), imm:$lane), addrmode6:$addr),
2176 (VST1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>;
2178 // ...with address register writeback:
2179 class VST1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
2180 PatFrag StoreOp, SDNode ExtractOp, Operand AdrMode>
2181 : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
2182 (ins AdrMode:$Rn, am6offset:$Rm,
2183 DPR:$Vd, nohash_imm:$lane), IIC_VST1lnu, "vst1", Dt,
2184 "\\{$Vd[$lane]\\}, $Rn$Rm",
2186 [(set GPR:$wb, (StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane),
2187 AdrMode:$Rn, am6offset:$Rm))]>,
2188 Sched<[WriteVST1]> {
2189 let DecoderMethod = "DecodeVST1LN";
2191 class VST1QLNWBPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp>
2192 : VSTQLNWBPseudo<IIC_VST1lnu>, Sched<[WriteVST1]> {
2193 let Pattern = [(set GPR:$wb, (StoreOp (ExtractOp (Ty QPR:$src), imm:$lane),
2194 addrmode6:$addr, am6offset:$offset))];
2197 def VST1LNd8_UPD : VST1LNWB<0b0000, {?,?,?,0}, "8", v8i8, post_truncsti8,
2198 NEONvgetlaneu, addrmode6> {
2199 let Inst{7-5} = lane{2-0};
2201 def VST1LNd16_UPD : VST1LNWB<0b0100, {?,?,0,?}, "16", v4i16, post_truncsti16,
2202 NEONvgetlaneu, addrmode6> {
2203 let Inst{7-6} = lane{1-0};
2204 let Inst{4} = Rn{4};
2206 def VST1LNd32_UPD : VST1LNWB<0b1000, {?,0,?,?}, "32", v2i32, post_store,
2207 extractelt, addrmode6oneL32> {
2208 let Inst{7} = lane{0};
2209 let Inst{5-4} = Rn{5-4};
2212 def VST1LNq8Pseudo_UPD : VST1QLNWBPseudo<v16i8, post_truncsti8, NEONvgetlaneu>;
2213 def VST1LNq16Pseudo_UPD : VST1QLNWBPseudo<v8i16, post_truncsti16,NEONvgetlaneu>;
2214 def VST1LNq32Pseudo_UPD : VST1QLNWBPseudo<v4i32, post_store, extractelt>;
2216 let mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 in {
2218 // VST2LN : Vector Store (single 2-element structure from one lane)
2219 class VST2LN<bits<4> op11_8, bits<4> op7_4, string Dt>
2220 : NLdStLn<1, 0b00, op11_8, op7_4, (outs),
2221 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, nohash_imm:$lane),
2222 IIC_VST2ln, "vst2", Dt, "\\{$Vd[$lane], $src2[$lane]\\}, $Rn",
2223 "", []>, Sched<[WriteVST1]> {
2225 let Inst{4} = Rn{4};
2226 let DecoderMethod = "DecodeVST2LN";
2229 def VST2LNd8 : VST2LN<0b0001, {?,?,?,?}, "8"> {
2230 let Inst{7-5} = lane{2-0};
2232 def VST2LNd16 : VST2LN<0b0101, {?,?,0,?}, "16"> {
2233 let Inst{7-6} = lane{1-0};
2235 def VST2LNd32 : VST2LN<0b1001, {?,0,0,?}, "32"> {
2236 let Inst{7} = lane{0};
2239 def VST2LNd8Pseudo : VSTQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>;
2240 def VST2LNd16Pseudo : VSTQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>;
2241 def VST2LNd32Pseudo : VSTQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>;
2243 // ...with double-spaced registers:
2244 def VST2LNq16 : VST2LN<0b0101, {?,?,1,?}, "16"> {
2245 let Inst{7-6} = lane{1-0};
2246 let Inst{4} = Rn{4};
2248 def VST2LNq32 : VST2LN<0b1001, {?,1,0,?}, "32"> {
2249 let Inst{7} = lane{0};
2250 let Inst{4} = Rn{4};
2253 def VST2LNq16Pseudo : VSTQQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>;
2254 def VST2LNq32Pseudo : VSTQQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>;
2256 // ...with address register writeback:
2257 class VST2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
2258 : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
2259 (ins addrmode6:$Rn, am6offset:$Rm,
2260 DPR:$Vd, DPR:$src2, nohash_imm:$lane), IIC_VST2lnu, "vst2", Dt,
2261 "\\{$Vd[$lane], $src2[$lane]\\}, $Rn$Rm",
2262 "$Rn.addr = $wb", []> {
2263 let Inst{4} = Rn{4};
2264 let DecoderMethod = "DecodeVST2LN";
2267 def VST2LNd8_UPD : VST2LNWB<0b0001, {?,?,?,?}, "8"> {
2268 let Inst{7-5} = lane{2-0};
2270 def VST2LNd16_UPD : VST2LNWB<0b0101, {?,?,0,?}, "16"> {
2271 let Inst{7-6} = lane{1-0};
2273 def VST2LNd32_UPD : VST2LNWB<0b1001, {?,0,0,?}, "32"> {
2274 let Inst{7} = lane{0};
2277 def VST2LNd8Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>;
2278 def VST2LNd16Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>;
2279 def VST2LNd32Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>;
2281 def VST2LNq16_UPD : VST2LNWB<0b0101, {?,?,1,?}, "16"> {
2282 let Inst{7-6} = lane{1-0};
2284 def VST2LNq32_UPD : VST2LNWB<0b1001, {?,1,0,?}, "32"> {
2285 let Inst{7} = lane{0};
2288 def VST2LNq16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>;
2289 def VST2LNq32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>;
2291 // VST3LN : Vector Store (single 3-element structure from one lane)
2292 class VST3LN<bits<4> op11_8, bits<4> op7_4, string Dt>
2293 : NLdStLn<1, 0b00, op11_8, op7_4, (outs),
2294 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3,
2295 nohash_imm:$lane), IIC_VST3ln, "vst3", Dt,
2296 "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn", "", []>,
2297 Sched<[WriteVST2]> {
2299 let DecoderMethod = "DecodeVST3LN";
2302 def VST3LNd8 : VST3LN<0b0010, {?,?,?,0}, "8"> {
2303 let Inst{7-5} = lane{2-0};
2305 def VST3LNd16 : VST3LN<0b0110, {?,?,0,0}, "16"> {
2306 let Inst{7-6} = lane{1-0};
2308 def VST3LNd32 : VST3LN<0b1010, {?,0,0,0}, "32"> {
2309 let Inst{7} = lane{0};
2312 def VST3LNd8Pseudo : VSTQQLNPseudo<IIC_VST3ln>, Sched<[WriteVST2]>;
2313 def VST3LNd16Pseudo : VSTQQLNPseudo<IIC_VST3ln>, Sched<[WriteVST2]>;
2314 def VST3LNd32Pseudo : VSTQQLNPseudo<IIC_VST3ln>, Sched<[WriteVST2]>;
2316 // ...with double-spaced registers:
2317 def VST3LNq16 : VST3LN<0b0110, {?,?,1,0}, "16"> {
2318 let Inst{7-6} = lane{1-0};
2320 def VST3LNq32 : VST3LN<0b1010, {?,1,0,0}, "32"> {
2321 let Inst{7} = lane{0};
2324 def VST3LNq16Pseudo : VSTQQQQLNPseudo<IIC_VST3ln>;
2325 def VST3LNq32Pseudo : VSTQQQQLNPseudo<IIC_VST3ln>;
2327 // ...with address register writeback:
2328 class VST3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
2329 : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
2330 (ins addrmode6:$Rn, am6offset:$Rm,
2331 DPR:$Vd, DPR:$src2, DPR:$src3, nohash_imm:$lane),
2332 IIC_VST3lnu, "vst3", Dt,
2333 "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn$Rm",
2334 "$Rn.addr = $wb", []> {
2335 let DecoderMethod = "DecodeVST3LN";
2338 def VST3LNd8_UPD : VST3LNWB<0b0010, {?,?,?,0}, "8"> {
2339 let Inst{7-5} = lane{2-0};
2341 def VST3LNd16_UPD : VST3LNWB<0b0110, {?,?,0,0}, "16"> {
2342 let Inst{7-6} = lane{1-0};
2344 def VST3LNd32_UPD : VST3LNWB<0b1010, {?,0,0,0}, "32"> {
2345 let Inst{7} = lane{0};
2348 def VST3LNd8Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>;
2349 def VST3LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>;
2350 def VST3LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>;
2352 def VST3LNq16_UPD : VST3LNWB<0b0110, {?,?,1,0}, "16"> {
2353 let Inst{7-6} = lane{1-0};
2355 def VST3LNq32_UPD : VST3LNWB<0b1010, {?,1,0,0}, "32"> {
2356 let Inst{7} = lane{0};
2359 def VST3LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>;
2360 def VST3LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>;
2362 // VST4LN : Vector Store (single 4-element structure from one lane)
2363 class VST4LN<bits<4> op11_8, bits<4> op7_4, string Dt>
2364 : NLdStLn<1, 0b00, op11_8, op7_4, (outs),
2365 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4,
2366 nohash_imm:$lane), IIC_VST4ln, "vst4", Dt,
2367 "\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn",
2368 "", []>, Sched<[WriteVST2]> {
2370 let Inst{4} = Rn{4};
2371 let DecoderMethod = "DecodeVST4LN";
2374 def VST4LNd8 : VST4LN<0b0011, {?,?,?,?}, "8"> {
2375 let Inst{7-5} = lane{2-0};
2377 def VST4LNd16 : VST4LN<0b0111, {?,?,0,?}, "16"> {
2378 let Inst{7-6} = lane{1-0};
2380 def VST4LNd32 : VST4LN<0b1011, {?,0,?,?}, "32"> {
2381 let Inst{7} = lane{0};
2382 let Inst{5} = Rn{5};
2385 def VST4LNd8Pseudo : VSTQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>;
2386 def VST4LNd16Pseudo : VSTQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>;
2387 def VST4LNd32Pseudo : VSTQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>;
2389 // ...with double-spaced registers:
2390 def VST4LNq16 : VST4LN<0b0111, {?,?,1,?}, "16"> {
2391 let Inst{7-6} = lane{1-0};
2393 def VST4LNq32 : VST4LN<0b1011, {?,1,?,?}, "32"> {
2394 let Inst{7} = lane{0};
2395 let Inst{5} = Rn{5};
2398 def VST4LNq16Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>;
2399 def VST4LNq32Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>;
2401 // ...with address register writeback:
2402 class VST4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
2403 : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
2404 (ins addrmode6:$Rn, am6offset:$Rm,
2405 DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane),
2406 IIC_VST4lnu, "vst4", Dt,
2407 "\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn$Rm",
2408 "$Rn.addr = $wb", []> {
2409 let Inst{4} = Rn{4};
2410 let DecoderMethod = "DecodeVST4LN";
2413 def VST4LNd8_UPD : VST4LNWB<0b0011, {?,?,?,?}, "8"> {
2414 let Inst{7-5} = lane{2-0};
2416 def VST4LNd16_UPD : VST4LNWB<0b0111, {?,?,0,?}, "16"> {
2417 let Inst{7-6} = lane{1-0};
2419 def VST4LNd32_UPD : VST4LNWB<0b1011, {?,0,?,?}, "32"> {
2420 let Inst{7} = lane{0};
2421 let Inst{5} = Rn{5};
2424 def VST4LNd8Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>;
2425 def VST4LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>;
2426 def VST4LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>;
2428 def VST4LNq16_UPD : VST4LNWB<0b0111, {?,?,1,?}, "16"> {
2429 let Inst{7-6} = lane{1-0};
2431 def VST4LNq32_UPD : VST4LNWB<0b1011, {?,1,?,?}, "32"> {
2432 let Inst{7} = lane{0};
2433 let Inst{5} = Rn{5};
2436 def VST4LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>;
2437 def VST4LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>;
2439 } // mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1
2441 // Use vld1/vst1 for unaligned f64 load / store
2442 def : Pat<(f64 (hword_alignedload addrmode6:$addr)),
2443 (VLD1d16 addrmode6:$addr)>, Requires<[IsLE]>;
2444 def : Pat<(hword_alignedstore (f64 DPR:$value), addrmode6:$addr),
2445 (VST1d16 addrmode6:$addr, DPR:$value)>, Requires<[IsLE]>;
2446 def : Pat<(f64 (byte_alignedload addrmode6:$addr)),
2447 (VLD1d8 addrmode6:$addr)>, Requires<[IsLE]>;
2448 def : Pat<(byte_alignedstore (f64 DPR:$value), addrmode6:$addr),
2449 (VST1d8 addrmode6:$addr, DPR:$value)>, Requires<[IsLE]>;
2450 def : Pat<(f64 (non_word_alignedload addrmode6:$addr)),
2451 (VLD1d64 addrmode6:$addr)>, Requires<[IsBE]>;
2452 def : Pat<(non_word_alignedstore (f64 DPR:$value), addrmode6:$addr),
2453 (VST1d64 addrmode6:$addr, DPR:$value)>, Requires<[IsBE]>;
2455 // Use vld1/vst1 for Q and QQ. Also use them for unaligned v2f64
2456 // load / store if it's legal.
2457 def : Pat<(v2f64 (dword_alignedload addrmode6:$addr)),
2458 (VLD1q64 addrmode6:$addr)>;
2459 def : Pat<(dword_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
2460 (VST1q64 addrmode6:$addr, QPR:$value)>;
2461 def : Pat<(v2f64 (word_alignedload addrmode6:$addr)),
2462 (VLD1q32 addrmode6:$addr)>, Requires<[IsLE]>;
2463 def : Pat<(word_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
2464 (VST1q32 addrmode6:$addr, QPR:$value)>, Requires<[IsLE]>;
2465 def : Pat<(v2f64 (hword_alignedload addrmode6:$addr)),
2466 (VLD1q16 addrmode6:$addr)>, Requires<[IsLE]>;
2467 def : Pat<(hword_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
2468 (VST1q16 addrmode6:$addr, QPR:$value)>, Requires<[IsLE]>;
2469 def : Pat<(v2f64 (byte_alignedload addrmode6:$addr)),
2470 (VLD1q8 addrmode6:$addr)>, Requires<[IsLE]>;
2471 def : Pat<(byte_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
2472 (VST1q8 addrmode6:$addr, QPR:$value)>, Requires<[IsLE]>;
2474 //===----------------------------------------------------------------------===//
2475 // NEON pattern fragments
2476 //===----------------------------------------------------------------------===//
2478 // Extract D sub-registers of Q registers.
2479 def DSubReg_i8_reg : SDNodeXForm<imm, [{
2480 assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
2481 return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/8, SDLoc(N),
2484 def DSubReg_i16_reg : SDNodeXForm<imm, [{
2485 assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
2486 return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/4, SDLoc(N),
2489 def DSubReg_i32_reg : SDNodeXForm<imm, [{
2490 assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
2491 return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/2, SDLoc(N),
2494 def DSubReg_f64_reg : SDNodeXForm<imm, [{
2495 assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
2496 return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue(), SDLoc(N),
2500 // Extract S sub-registers of Q/D registers.
2501 def SSubReg_f32_reg : SDNodeXForm<imm, [{
2502 assert(ARM::ssub_3 == ARM::ssub_0+3 && "Unexpected subreg numbering");
2503 return CurDAG->getTargetConstant(ARM::ssub_0 + N->getZExtValue(), SDLoc(N),
2507 // Translate lane numbers from Q registers to D subregs.
2508 def SubReg_i8_lane : SDNodeXForm<imm, [{
2509 return CurDAG->getTargetConstant(N->getZExtValue() & 7, SDLoc(N), MVT::i32);
2511 def SubReg_i16_lane : SDNodeXForm<imm, [{
2512 return CurDAG->getTargetConstant(N->getZExtValue() & 3, SDLoc(N), MVT::i32);
2514 def SubReg_i32_lane : SDNodeXForm<imm, [{
2515 return CurDAG->getTargetConstant(N->getZExtValue() & 1, SDLoc(N), MVT::i32);
2518 //===----------------------------------------------------------------------===//
2519 // Instruction Classes
2520 //===----------------------------------------------------------------------===//
2522 // Basic 2-register operations: double- and quad-register.
2523 class N2VD<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
2524 bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,
2525 string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode>
2526 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd),
2527 (ins DPR:$Vm), IIC_VUNAD, OpcodeStr, Dt,"$Vd, $Vm", "",
2528 [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vm))))]>;
2529 class N2VQ<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
2530 bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,
2531 string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode>
2532 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd),
2533 (ins QPR:$Vm), IIC_VUNAQ, OpcodeStr, Dt,"$Vd, $Vm", "",
2534 [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vm))))]>;
2536 // Basic 2-register intrinsics, both double- and quad-register.
2537 class N2VDInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
2538 bits<2> op17_16, bits<5> op11_7, bit op4,
2539 InstrItinClass itin, string OpcodeStr, string Dt,
2540 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2541 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd),
2542 (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
2543 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>;
2544 class N2VQInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
2545 bits<2> op17_16, bits<5> op11_7, bit op4,
2546 InstrItinClass itin, string OpcodeStr, string Dt,
2547 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2548 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd),
2549 (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
2550 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>;
2552 // Same as above, but not predicated.
2553 class N2VDIntnp<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op7,
2554 InstrItinClass itin, string OpcodeStr, string Dt,
2555 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2556 : N2Vnp<op19_18, op17_16, op10_8, op7, 0, (outs DPR:$Vd), (ins DPR:$Vm),
2557 itin, OpcodeStr, Dt,
2558 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>;
2560 class N2VQIntnp<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op7,
2561 InstrItinClass itin, string OpcodeStr, string Dt,
2562 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2563 : N2Vnp<op19_18, op17_16, op10_8, op7, 1, (outs QPR:$Vd), (ins QPR:$Vm),
2564 itin, OpcodeStr, Dt,
2565 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>;
2567 // Similar to NV2VQIntnp with some more encoding bits exposed (crypto).
2568 class N2VQIntXnp<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op6,
2569 bit op7, InstrItinClass itin, string OpcodeStr, string Dt,
2570 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2571 : N2Vnp<op19_18, op17_16, op10_8, op7, op6, (outs QPR:$Vd), (ins QPR:$Vm),
2572 itin, OpcodeStr, Dt,
2573 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>;
2575 // Same as N2VQIntXnp but with Vd as a src register.
2576 class N2VQIntX2np<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op6,
2577 bit op7, InstrItinClass itin, string OpcodeStr, string Dt,
2578 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2579 : N2Vnp<op19_18, op17_16, op10_8, op7, op6,
2580 (outs QPR:$Vd), (ins QPR:$src, QPR:$Vm),
2581 itin, OpcodeStr, Dt,
2582 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src), (OpTy QPR:$Vm))))]> {
2583 let Constraints = "$src = $Vd";
2586 // Narrow 2-register operations.
2587 class N2VN<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
2588 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
2589 InstrItinClass itin, string OpcodeStr, string Dt,
2590 ValueType TyD, ValueType TyQ, SDNode OpNode>
2591 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$Vd),
2592 (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
2593 [(set DPR:$Vd, (TyD (OpNode (TyQ QPR:$Vm))))]>;
2595 // Narrow 2-register intrinsics.
2596 class N2VNInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
2597 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
2598 InstrItinClass itin, string OpcodeStr, string Dt,
2599 ValueType TyD, ValueType TyQ, SDPatternOperator IntOp>
2600 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$Vd),
2601 (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
2602 [(set DPR:$Vd, (TyD (IntOp (TyQ QPR:$Vm))))]>;
2604 // Long 2-register operations (currently only used for VMOVL).
2605 class N2VL<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
2606 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
2607 InstrItinClass itin, string OpcodeStr, string Dt,
2608 ValueType TyQ, ValueType TyD, SDNode OpNode>
2609 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$Vd),
2610 (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
2611 [(set QPR:$Vd, (TyQ (OpNode (TyD DPR:$Vm))))]>;
2613 // Long 2-register intrinsics.
2614 class N2VLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
2615 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
2616 InstrItinClass itin, string OpcodeStr, string Dt,
2617 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp>
2618 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$Vd),
2619 (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
2620 [(set QPR:$Vd, (TyQ (IntOp (TyD DPR:$Vm))))]>;
2622 // 2-register shuffles (VTRN/VZIP/VUZP), both double- and quad-register.
2623 class N2VDShuffle<bits<2> op19_18, bits<5> op11_7, string OpcodeStr, string Dt>
2624 : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 0, 0, (outs DPR:$Vd, DPR:$Vm),
2625 (ins DPR:$src1, DPR:$src2), IIC_VPERMD,
2626 OpcodeStr, Dt, "$Vd, $Vm",
2627 "$src1 = $Vd, $src2 = $Vm", []>;
2628 class N2VQShuffle<bits<2> op19_18, bits<5> op11_7,
2629 InstrItinClass itin, string OpcodeStr, string Dt>
2630 : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 1, 0, (outs QPR:$Vd, QPR:$Vm),
2631 (ins QPR:$src1, QPR:$src2), itin, OpcodeStr, Dt, "$Vd, $Vm",
2632 "$src1 = $Vd, $src2 = $Vm", []>;
2634 // Basic 3-register operations: double- and quad-register.
2635 class N3VD<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2636 InstrItinClass itin, string OpcodeStr, string Dt,
2637 ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable>
2638 : N3V<op24, op23, op21_20, op11_8, 0, op4,
2639 (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
2640 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
2641 [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> {
2642 // All of these have a two-operand InstAlias.
2643 let TwoOperandAliasConstraint = "$Vn = $Vd";
2644 let isCommutable = Commutable;
2646 // Same as N3VD but no data type.
2647 class N3VDX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2648 InstrItinClass itin, string OpcodeStr,
2649 ValueType ResTy, ValueType OpTy,
2650 SDNode OpNode, bit Commutable>
2651 : N3VX<op24, op23, op21_20, op11_8, 0, op4,
2652 (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
2653 OpcodeStr, "$Vd, $Vn, $Vm", "",
2654 [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>{
2655 // All of these have a two-operand InstAlias.
2656 let TwoOperandAliasConstraint = "$Vn = $Vd";
2657 let isCommutable = Commutable;
2660 class N3VDSL<bits<2> op21_20, bits<4> op11_8,
2661 InstrItinClass itin, string OpcodeStr, string Dt,
2662 ValueType Ty, SDNode ShOp>
2663 : N3VLane32<0, 1, op21_20, op11_8, 1, 0,
2664 (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
2665 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
2667 (Ty (ShOp (Ty DPR:$Vn),
2668 (Ty (NEONvduplane (Ty DPR_VFP2:$Vm),imm:$lane)))))]> {
2669 // All of these have a two-operand InstAlias.
2670 let TwoOperandAliasConstraint = "$Vn = $Vd";
2671 let isCommutable = 0;
2673 class N3VDSL16<bits<2> op21_20, bits<4> op11_8,
2674 string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp>
2675 : N3VLane16<0, 1, op21_20, op11_8, 1, 0,
2676 (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
2677 NVMulSLFrm, IIC_VMULi16D, OpcodeStr, Dt,"$Vd, $Vn, $Vm$lane","",
2679 (Ty (ShOp (Ty DPR:$Vn),
2680 (Ty (NEONvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> {
2681 // All of these have a two-operand InstAlias.
2682 let TwoOperandAliasConstraint = "$Vn = $Vd";
2683 let isCommutable = 0;
2686 class N3VQ<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2687 InstrItinClass itin, string OpcodeStr, string Dt,
2688 ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable>
2689 : N3V<op24, op23, op21_20, op11_8, 1, op4,
2690 (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
2691 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
2692 [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]> {
2693 // All of these have a two-operand InstAlias.
2694 let TwoOperandAliasConstraint = "$Vn = $Vd";
2695 let isCommutable = Commutable;
2697 class N3VQX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2698 InstrItinClass itin, string OpcodeStr,
2699 ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable>
2700 : N3VX<op24, op23, op21_20, op11_8, 1, op4,
2701 (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
2702 OpcodeStr, "$Vd, $Vn, $Vm", "",
2703 [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>{
2704 // All of these have a two-operand InstAlias.
2705 let TwoOperandAliasConstraint = "$Vn = $Vd";
2706 let isCommutable = Commutable;
2708 class N3VQSL<bits<2> op21_20, bits<4> op11_8,
2709 InstrItinClass itin, string OpcodeStr, string Dt,
2710 ValueType ResTy, ValueType OpTy, SDNode ShOp>
2711 : N3VLane32<1, 1, op21_20, op11_8, 1, 0,
2712 (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
2713 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
2714 [(set (ResTy QPR:$Vd),
2715 (ResTy (ShOp (ResTy QPR:$Vn),
2716 (ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm),
2718 // All of these have a two-operand InstAlias.
2719 let TwoOperandAliasConstraint = "$Vn = $Vd";
2720 let isCommutable = 0;
2722 class N3VQSL16<bits<2> op21_20, bits<4> op11_8, string OpcodeStr, string Dt,
2723 ValueType ResTy, ValueType OpTy, SDNode ShOp>
2724 : N3VLane16<1, 1, op21_20, op11_8, 1, 0,
2725 (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
2726 NVMulSLFrm, IIC_VMULi16Q, OpcodeStr, Dt,"$Vd, $Vn, $Vm$lane", "",
2727 [(set (ResTy QPR:$Vd),
2728 (ResTy (ShOp (ResTy QPR:$Vn),
2729 (ResTy (NEONvduplane (OpTy DPR_8:$Vm),
2731 // All of these have a two-operand InstAlias.
2732 let TwoOperandAliasConstraint = "$Vn = $Vd";
2733 let isCommutable = 0;
2736 // Basic 3-register intrinsics, both double- and quad-register.
2737 class N3VDInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2738 Format f, InstrItinClass itin, string OpcodeStr, string Dt,
2739 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp, bit Commutable>
2740 : N3V<op24, op23, op21_20, op11_8, 0, op4,
2741 (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), f, itin,
2742 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
2743 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> {
2744 // All of these have a two-operand InstAlias.
2745 let TwoOperandAliasConstraint = "$Vn = $Vd";
2746 let isCommutable = Commutable;
2749 class N3VDIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6,
2750 bit op4, Format f, InstrItinClass itin, string OpcodeStr,
2751 string Dt, ValueType ResTy, ValueType OpTy,
2752 SDPatternOperator IntOp, bit Commutable>
2753 : N3Vnp<op27_23, op21_20, op11_8, op6, op4,
2754 (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, OpcodeStr, Dt,
2755 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>;
2757 class N3VDIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
2758 string OpcodeStr, string Dt, ValueType Ty, SDPatternOperator IntOp>
2759 : N3VLane32<0, 1, op21_20, op11_8, 1, 0,
2760 (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
2761 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
2763 (Ty (IntOp (Ty DPR:$Vn),
2764 (Ty (NEONvduplane (Ty DPR_VFP2:$Vm),
2766 let isCommutable = 0;
2769 class N3VDIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
2770 string OpcodeStr, string Dt, ValueType Ty, SDPatternOperator IntOp>
2771 : N3VLane16<0, 1, op21_20, op11_8, 1, 0,
2772 (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
2773 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
2775 (Ty (IntOp (Ty DPR:$Vn),
2776 (Ty (NEONvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> {
2777 let isCommutable = 0;
2779 class N3VDIntSh<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2780 Format f, InstrItinClass itin, string OpcodeStr, string Dt,
2781 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2782 : N3V<op24, op23, op21_20, op11_8, 0, op4,
2783 (outs DPR:$Vd), (ins DPR:$Vm, DPR:$Vn), f, itin,
2784 OpcodeStr, Dt, "$Vd, $Vm, $Vn", "",
2785 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm), (OpTy DPR:$Vn))))]> {
2786 let TwoOperandAliasConstraint = "$Vm = $Vd";
2787 let isCommutable = 0;
2790 class N3VQInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2791 Format f, InstrItinClass itin, string OpcodeStr, string Dt,
2792 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp, bit Commutable>
2793 : N3V<op24, op23, op21_20, op11_8, 1, op4,
2794 (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), f, itin,
2795 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
2796 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]> {
2797 // All of these have a two-operand InstAlias.
2798 let TwoOperandAliasConstraint = "$Vn = $Vd";
2799 let isCommutable = Commutable;
2802 class N3VQIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6,
2803 bit op4, Format f, InstrItinClass itin, string OpcodeStr,
2804 string Dt, ValueType ResTy, ValueType OpTy,
2805 SDPatternOperator IntOp, bit Commutable>
2806 : N3Vnp<op27_23, op21_20, op11_8, op6, op4,
2807 (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), f, itin, OpcodeStr, Dt,
2808 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>;
2810 // Same as N3VQIntnp but with Vd as a src register.
2811 class N3VQInt3np<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6,
2812 bit op4, Format f, InstrItinClass itin, string OpcodeStr,
2813 string Dt, ValueType ResTy, ValueType OpTy,
2814 SDPatternOperator IntOp, bit Commutable>
2815 : N3Vnp<op27_23, op21_20, op11_8, op6, op4,
2816 (outs QPR:$Vd), (ins QPR:$src, QPR:$Vn, QPR:$Vm),
2817 f, itin, OpcodeStr, Dt,
2818 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src), (OpTy QPR:$Vn),
2819 (OpTy QPR:$Vm))))]> {
2820 let Constraints = "$src = $Vd";
2823 class N3VQIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
2824 string OpcodeStr, string Dt,
2825 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2826 : N3VLane32<1, 1, op21_20, op11_8, 1, 0,
2827 (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
2828 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
2829 [(set (ResTy QPR:$Vd),
2830 (ResTy (IntOp (ResTy QPR:$Vn),
2831 (ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm),
2833 let isCommutable = 0;
2835 class N3VQIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
2836 string OpcodeStr, string Dt,
2837 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2838 : N3VLane16<1, 1, op21_20, op11_8, 1, 0,
2839 (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
2840 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
2841 [(set (ResTy QPR:$Vd),
2842 (ResTy (IntOp (ResTy QPR:$Vn),
2843 (ResTy (NEONvduplane (OpTy DPR_8:$Vm),
2845 let isCommutable = 0;
2847 class N3VQIntSh<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2848 Format f, InstrItinClass itin, string OpcodeStr, string Dt,
2849 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2850 : N3V<op24, op23, op21_20, op11_8, 1, op4,
2851 (outs QPR:$Vd), (ins QPR:$Vm, QPR:$Vn), f, itin,
2852 OpcodeStr, Dt, "$Vd, $Vm, $Vn", "",
2853 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm), (OpTy QPR:$Vn))))]> {
2854 let TwoOperandAliasConstraint = "$Vm = $Vd";
2855 let isCommutable = 0;
2858 // Multiply-Add/Sub operations: double- and quad-register.
2859 class N3VDMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2860 InstrItinClass itin, string OpcodeStr, string Dt,
2861 ValueType Ty, SDPatternOperator MulOp, SDPatternOperator OpNode>
2862 : N3V<op24, op23, op21_20, op11_8, 0, op4,
2863 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
2864 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
2865 [(set DPR:$Vd, (Ty (OpNode DPR:$src1,
2866 (Ty (MulOp DPR:$Vn, DPR:$Vm)))))]>;
2868 class N3VDMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
2869 string OpcodeStr, string Dt,
2870 ValueType Ty, SDPatternOperator MulOp, SDPatternOperator ShOp>
2871 : N3VLane32<0, 1, op21_20, op11_8, 1, 0,
2873 (ins DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
2875 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
2877 (Ty (ShOp (Ty DPR:$src1),
2879 (Ty (NEONvduplane (Ty DPR_VFP2:$Vm),
2881 class N3VDMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
2882 string OpcodeStr, string Dt,
2883 ValueType Ty, SDPatternOperator MulOp, SDPatternOperator ShOp>
2884 : N3VLane16<0, 1, op21_20, op11_8, 1, 0,
2886 (ins DPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
2888 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
2890 (Ty (ShOp (Ty DPR:$src1),
2892 (Ty (NEONvduplane (Ty DPR_8:$Vm),
2895 class N3VQMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2896 InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty,
2897 SDPatternOperator MulOp, SDPatternOperator OpNode>
2898 : N3V<op24, op23, op21_20, op11_8, 1, op4,
2899 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
2900 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
2901 [(set QPR:$Vd, (Ty (OpNode QPR:$src1,
2902 (Ty (MulOp QPR:$Vn, QPR:$Vm)))))]>;
2903 class N3VQMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
2904 string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
2905 SDPatternOperator MulOp, SDPatternOperator ShOp>
2906 : N3VLane32<1, 1, op21_20, op11_8, 1, 0,
2908 (ins QPR:$src1, QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
2910 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
2911 [(set (ResTy QPR:$Vd),
2912 (ResTy (ShOp (ResTy QPR:$src1),
2913 (ResTy (MulOp QPR:$Vn,
2914 (ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm),
2916 class N3VQMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
2917 string OpcodeStr, string Dt,
2918 ValueType ResTy, ValueType OpTy,
2919 SDPatternOperator MulOp, SDPatternOperator ShOp>
2920 : N3VLane16<1, 1, op21_20, op11_8, 1, 0,
2922 (ins QPR:$src1, QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
2924 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
2925 [(set (ResTy QPR:$Vd),
2926 (ResTy (ShOp (ResTy QPR:$src1),
2927 (ResTy (MulOp QPR:$Vn,
2928 (ResTy (NEONvduplane (OpTy DPR_8:$Vm),
2931 // Neon Intrinsic-Op instructions (VABA): double- and quad-register.
2932 class N3VDIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2933 InstrItinClass itin, string OpcodeStr, string Dt,
2934 ValueType Ty, SDPatternOperator IntOp, SDNode OpNode>
2935 : N3V<op24, op23, op21_20, op11_8, 0, op4,
2936 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
2937 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
2938 [(set DPR:$Vd, (Ty (OpNode DPR:$src1,
2939 (Ty (IntOp (Ty DPR:$Vn), (Ty DPR:$Vm))))))]>;
2940 class N3VQIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2941 InstrItinClass itin, string OpcodeStr, string Dt,
2942 ValueType Ty, SDPatternOperator IntOp, SDNode OpNode>
2943 : N3V<op24, op23, op21_20, op11_8, 1, op4,
2944 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
2945 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
2946 [(set QPR:$Vd, (Ty (OpNode QPR:$src1,
2947 (Ty (IntOp (Ty QPR:$Vn), (Ty QPR:$Vm))))))]>;
2949 // Neon 3-argument intrinsics, both double- and quad-register.
2950 // The destination register is also used as the first source operand register.
2951 class N3VDInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2952 InstrItinClass itin, string OpcodeStr, string Dt,
2953 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2954 : N3V<op24, op23, op21_20, op11_8, 0, op4,
2955 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
2956 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
2957 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$src1),
2958 (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>;
2959 class N3VQInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2960 InstrItinClass itin, string OpcodeStr, string Dt,
2961 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2962 : N3V<op24, op23, op21_20, op11_8, 1, op4,
2963 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
2964 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
2965 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src1),
2966 (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>;
2968 // Long Multiply-Add/Sub operations.
2969 class N3VLMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2970 InstrItinClass itin, string OpcodeStr, string Dt,
2971 ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode>
2972 : N3V<op24, op23, op21_20, op11_8, 0, op4,
2973 (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
2974 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
2975 [(set QPR:$Vd, (OpNode (TyQ QPR:$src1),
2976 (TyQ (MulOp (TyD DPR:$Vn),
2977 (TyD DPR:$Vm)))))]>;
2978 class N3VLMulOpSL<bit op24, bits<2> op21_20, bits<4> op11_8,
2979 InstrItinClass itin, string OpcodeStr, string Dt,
2980 ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode>
2981 : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd),
2982 (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
2984 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
2986 (OpNode (TyQ QPR:$src1),
2987 (TyQ (MulOp (TyD DPR:$Vn),
2988 (TyD (NEONvduplane (TyD DPR_VFP2:$Vm),
2990 class N3VLMulOpSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
2991 InstrItinClass itin, string OpcodeStr, string Dt,
2992 ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode>
2993 : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd),
2994 (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
2996 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
2998 (OpNode (TyQ QPR:$src1),
2999 (TyQ (MulOp (TyD DPR:$Vn),
3000 (TyD (NEONvduplane (TyD DPR_8:$Vm),
3003 // Long Intrinsic-Op vector operations with explicit extend (VABAL).
3004 class N3VLIntExtOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
3005 InstrItinClass itin, string OpcodeStr, string Dt,
3006 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, SDNode ExtOp,
3008 : N3V<op24, op23, op21_20, op11_8, 0, op4,
3009 (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
3010 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
3011 [(set QPR:$Vd, (OpNode (TyQ QPR:$src1),
3012 (TyQ (ExtOp (TyD (IntOp (TyD DPR:$Vn),
3013 (TyD DPR:$Vm)))))))]>;
3015 // Neon Long 3-argument intrinsic. The destination register is
3016 // a quad-register and is also used as the first source operand register.
3017 class N3VLInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
3018 InstrItinClass itin, string OpcodeStr, string Dt,
3019 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp>
3020 : N3V<op24, op23, op21_20, op11_8, 0, op4,
3021 (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
3022 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
3024 (TyQ (IntOp (TyQ QPR:$src1), (TyD DPR:$Vn), (TyD DPR:$Vm))))]>;
3025 class N3VLInt3SL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
3026 string OpcodeStr, string Dt,
3027 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
3028 : N3VLane32<op24, 1, op21_20, op11_8, 1, 0,
3030 (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
3032 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
3033 [(set (ResTy QPR:$Vd),
3034 (ResTy (IntOp (ResTy QPR:$src1),
3036 (OpTy (NEONvduplane (OpTy DPR_VFP2:$Vm),
3038 class N3VLInt3SL16<bit op24, bits<2> op21_20, bits<4> op11_8,
3039 InstrItinClass itin, string OpcodeStr, string Dt,
3040 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
3041 : N3VLane16<op24, 1, op21_20, op11_8, 1, 0,
3043 (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
3045 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
3046 [(set (ResTy QPR:$Vd),
3047 (ResTy (IntOp (ResTy QPR:$src1),
3049 (OpTy (NEONvduplane (OpTy DPR_8:$Vm),
3052 // Narrowing 3-register intrinsics.
3053 class N3VNInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
3054 string OpcodeStr, string Dt, ValueType TyD, ValueType TyQ,
3055 SDPatternOperator IntOp, bit Commutable>
3056 : N3V<op24, op23, op21_20, op11_8, 0, op4,
3057 (outs DPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINi4D,
3058 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
3059 [(set DPR:$Vd, (TyD (IntOp (TyQ QPR:$Vn), (TyQ QPR:$Vm))))]> {
3060 let isCommutable = Commutable;
3063 // Long 3-register operations.
3064 class N3VL<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
3065 InstrItinClass itin, string OpcodeStr, string Dt,
3066 ValueType TyQ, ValueType TyD, SDNode OpNode, bit Commutable>
3067 : N3V<op24, op23, op21_20, op11_8, 0, op4,
3068 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
3069 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
3070 [(set QPR:$Vd, (TyQ (OpNode (TyD DPR:$Vn), (TyD DPR:$Vm))))]> {
3071 let isCommutable = Commutable;
3074 class N3VLSL<bit op24, bits<2> op21_20, bits<4> op11_8,
3075 InstrItinClass itin, string OpcodeStr, string Dt,
3076 ValueType TyQ, ValueType TyD, SDNode OpNode>
3077 : N3VLane32<op24, 1, op21_20, op11_8, 1, 0,
3078 (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
3079 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
3081 (TyQ (OpNode (TyD DPR:$Vn),
3082 (TyD (NEONvduplane (TyD DPR_VFP2:$Vm),imm:$lane)))))]>;
3083 class N3VLSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
3084 InstrItinClass itin, string OpcodeStr, string Dt,
3085 ValueType TyQ, ValueType TyD, SDNode OpNode>
3086 : N3VLane16<op24, 1, op21_20, op11_8, 1, 0,
3087 (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
3088 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
3090 (TyQ (OpNode (TyD DPR:$Vn),
3091 (TyD (NEONvduplane (TyD DPR_8:$Vm), imm:$lane)))))]>;
3093 // Long 3-register operations with explicitly extended operands.
3094 class N3VLExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
3095 InstrItinClass itin, string OpcodeStr, string Dt,
3096 ValueType TyQ, ValueType TyD, SDNode OpNode, SDNode ExtOp,
3098 : N3V<op24, op23, op21_20, op11_8, 0, op4,
3099 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
3100 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
3101 [(set QPR:$Vd, (OpNode (TyQ (ExtOp (TyD DPR:$Vn))),
3102 (TyQ (ExtOp (TyD DPR:$Vm)))))]> {
3103 let isCommutable = Commutable;
3106 // Long 3-register intrinsics with explicit extend (VABDL).
3107 class N3VLIntExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
3108 InstrItinClass itin, string OpcodeStr, string Dt,
3109 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, SDNode ExtOp,
3111 : N3V<op24, op23, op21_20, op11_8, 0, op4,
3112 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
3113 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
3114 [(set QPR:$Vd, (TyQ (ExtOp (TyD (IntOp (TyD DPR:$Vn),
3115 (TyD DPR:$Vm))))))]> {
3116 let isCommutable = Commutable;
3119 // Long 3-register intrinsics.
3120 class N3VLInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
3121 InstrItinClass itin, string OpcodeStr, string Dt,
3122 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, bit Commutable>
3123 : N3V<op24, op23, op21_20, op11_8, 0, op4,
3124 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
3125 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
3126 [(set QPR:$Vd, (TyQ (IntOp (TyD DPR:$Vn), (TyD DPR:$Vm))))]> {
3127 let isCommutable = Commutable;
3130 // Same as above, but not predicated.
3131 class N3VLIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6,
3132 bit op4, InstrItinClass itin, string OpcodeStr,
3133 string Dt, ValueType ResTy, ValueType OpTy,
3134 SDPatternOperator IntOp, bit Commutable>
3135 : N3Vnp<op27_23, op21_20, op11_8, op6, op4,
3136 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, OpcodeStr, Dt,
3137 [(set QPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>;
3139 class N3VLIntSL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
3140 string OpcodeStr, string Dt,
3141 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
3142 : N3VLane32<op24, 1, op21_20, op11_8, 1, 0,
3143 (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
3144 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
3145 [(set (ResTy QPR:$Vd),
3146 (ResTy (IntOp (OpTy DPR:$Vn),
3147 (OpTy (NEONvduplane (OpTy DPR_VFP2:$Vm),
3149 class N3VLIntSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
3150 InstrItinClass itin, string OpcodeStr, string Dt,
3151 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
3152 : N3VLane16<op24, 1, op21_20, op11_8, 1, 0,
3153 (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
3154 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
3155 [(set (ResTy QPR:$Vd),
3156 (ResTy (IntOp (OpTy DPR:$Vn),
3157 (OpTy (NEONvduplane (OpTy DPR_8:$Vm),
3160 // Wide 3-register operations.
3161 class N3VW<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
3162 string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD,
3163 SDNode OpNode, SDNode ExtOp, bit Commutable>
3164 : N3V<op24, op23, op21_20, op11_8, 0, op4,
3165 (outs QPR:$Vd), (ins QPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VSUBiD,
3166 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
3167 [(set QPR:$Vd, (OpNode (TyQ QPR:$Vn),
3168 (TyQ (ExtOp (TyD DPR:$Vm)))))]> {
3169 // All of these have a two-operand InstAlias.
3170 let TwoOperandAliasConstraint = "$Vn = $Vd";
3171 let isCommutable = Commutable;
3174 // Pairwise long 2-register intrinsics, both double- and quad-register.
3175 class N2VDPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
3176 bits<2> op17_16, bits<5> op11_7, bit op4,
3177 string OpcodeStr, string Dt,
3178 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
3179 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd),
3180 (ins DPR:$Vm), IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "",
3181 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>;
3182 class N2VQPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
3183 bits<2> op17_16, bits<5> op11_7, bit op4,
3184 string OpcodeStr, string Dt,
3185 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
3186 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd),
3187 (ins QPR:$Vm), IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "",
3188 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>;
3190 // Pairwise long 2-register accumulate intrinsics,
3191 // both double- and quad-register.
3192 // The destination register is also used as the first source operand register.
3193 class N2VDPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
3194 bits<2> op17_16, bits<5> op11_7, bit op4,
3195 string OpcodeStr, string Dt,
3196 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
3197 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4,
3198 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vm), IIC_VPALiD,
3199 OpcodeStr, Dt, "$Vd, $Vm", "$src1 = $Vd",
3200 [(set DPR:$Vd, (ResTy (IntOp (ResTy DPR:$src1), (OpTy DPR:$Vm))))]>;
3201 class N2VQPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
3202 bits<2> op17_16, bits<5> op11_7, bit op4,
3203 string OpcodeStr, string Dt,
3204 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
3205 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4,
3206 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vm), IIC_VPALiQ,
3207 OpcodeStr, Dt, "$Vd, $Vm", "$src1 = $Vd",
3208 [(set QPR:$Vd, (ResTy (IntOp (ResTy QPR:$src1), (OpTy QPR:$Vm))))]>;
3210 // Shift by immediate,
3211 // both double- and quad-register.
3212 let TwoOperandAliasConstraint = "$Vm = $Vd" in {
3213 class N2VDSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
3214 Format f, InstrItinClass itin, Operand ImmTy,
3215 string OpcodeStr, string Dt, ValueType Ty, SDNode OpNode>
3216 : N2VImm<op24, op23, op11_8, op7, 0, op4,
3217 (outs DPR:$Vd), (ins DPR:$Vm, ImmTy:$SIMM), f, itin,
3218 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
3219 [(set DPR:$Vd, (Ty (OpNode (Ty DPR:$Vm), (i32 imm:$SIMM))))]>;
3220 class N2VQSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
3221 Format f, InstrItinClass itin, Operand ImmTy,
3222 string OpcodeStr, string Dt, ValueType Ty, SDNode OpNode>
3223 : N2VImm<op24, op23, op11_8, op7, 1, op4,
3224 (outs QPR:$Vd), (ins QPR:$Vm, ImmTy:$SIMM), f, itin,
3225 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
3226 [(set QPR:$Vd, (Ty (OpNode (Ty QPR:$Vm), (i32 imm:$SIMM))))]>;
3229 // Long shift by immediate.
3230 class N2VLSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4,
3231 string OpcodeStr, string Dt,
3232 ValueType ResTy, ValueType OpTy, Operand ImmTy,
3233 SDPatternOperator OpNode>
3234 : N2VImm<op24, op23, op11_8, op7, op6, op4,
3235 (outs QPR:$Vd), (ins DPR:$Vm, ImmTy:$SIMM), N2RegVShLFrm,
3236 IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
3237 [(set QPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vm), ImmTy:$SIMM)))]>;
3239 // Narrow shift by immediate.
3240 class N2VNSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4,
3241 InstrItinClass itin, string OpcodeStr, string Dt,
3242 ValueType ResTy, ValueType OpTy, Operand ImmTy,
3243 SDPatternOperator OpNode>
3244 : N2VImm<op24, op23, op11_8, op7, op6, op4,
3245 (outs DPR:$Vd), (ins QPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, itin,
3246 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
3247 [(set DPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vm),
3248 (i32 ImmTy:$SIMM))))]>;
3250 // Shift right by immediate and accumulate,
3251 // both double- and quad-register.
3252 let TwoOperandAliasConstraint = "$Vm = $Vd" in {
3253 class N2VDShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
3254 Operand ImmTy, string OpcodeStr, string Dt,
3255 ValueType Ty, SDNode ShOp>
3256 : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$Vd),
3257 (ins DPR:$src1, DPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, IIC_VPALiD,
3258 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd",
3259 [(set DPR:$Vd, (Ty (add DPR:$src1,
3260 (Ty (ShOp DPR:$Vm, (i32 imm:$SIMM))))))]>;
3261 class N2VQShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
3262 Operand ImmTy, string OpcodeStr, string Dt,
3263 ValueType Ty, SDNode ShOp>
3264 : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$Vd),
3265 (ins QPR:$src1, QPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, IIC_VPALiD,
3266 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd",
3267 [(set QPR:$Vd, (Ty (add QPR:$src1,
3268 (Ty (ShOp QPR:$Vm, (i32 imm:$SIMM))))))]>;
3271 // Shift by immediate and insert,
3272 // both double- and quad-register.
3273 let TwoOperandAliasConstraint = "$Vm = $Vd" in {
3274 class N2VDShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
3275 Operand ImmTy, Format f, string OpcodeStr, string Dt,
3276 ValueType Ty,SDNode ShOp>
3277 : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$Vd),
3278 (ins DPR:$src1, DPR:$Vm, ImmTy:$SIMM), f, IIC_VSHLiD,
3279 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd",
3280 [(set DPR:$Vd, (Ty (ShOp DPR:$src1, DPR:$Vm, (i32 imm:$SIMM))))]>;
3281 class N2VQShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
3282 Operand ImmTy, Format f, string OpcodeStr, string Dt,
3283 ValueType Ty,SDNode ShOp>
3284 : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$Vd),
3285 (ins QPR:$src1, QPR:$Vm, ImmTy:$SIMM), f, IIC_VSHLiQ,
3286 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd",
3287 [(set QPR:$Vd, (Ty (ShOp QPR:$src1, QPR:$Vm, (i32 imm:$SIMM))))]>;
3290 // Convert, with fractional bits immediate,
3291 // both double- and quad-register.
3292 class N2VCvtD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
3293 string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
3294 SDPatternOperator IntOp>
3295 : N2VImm<op24, op23, op11_8, op7, 0, op4,
3296 (outs DPR:$Vd), (ins DPR:$Vm, neon_vcvt_imm32:$SIMM), NVCVTFrm,
3297 IIC_VUNAD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
3298 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm), (i32 imm:$SIMM))))]>;
3299 class N2VCvtQ<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
3300 string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
3301 SDPatternOperator IntOp>
3302 : N2VImm<op24, op23, op11_8, op7, 1, op4,
3303 (outs QPR:$Vd), (ins QPR:$Vm, neon_vcvt_imm32:$SIMM), NVCVTFrm,
3304 IIC_VUNAQ, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
3305 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm), (i32 imm:$SIMM))))]>;
3307 //===----------------------------------------------------------------------===//
3309 //===----------------------------------------------------------------------===//
3311 // Abbreviations used in multiclass suffixes:
3312 // Q = quarter int (8 bit) elements
3313 // H = half int (16 bit) elements
3314 // S = single int (32 bit) elements
3315 // D = double int (64 bit) elements
3317 // Neon 2-register vector operations and intrinsics.
3319 // Neon 2-register comparisons.
3320 // source operand element sizes of 8, 16 and 32 bits:
3321 multiclass N2V_QHS_cmp<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
3322 bits<5> op11_7, bit op4, string opc, string Dt,
3323 string asm, SDNode OpNode> {
3324 // 64-bit vector types.
3325 def v8i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 0, op4,
3326 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
3327 opc, !strconcat(Dt, "8"), asm, "",
3328 [(set DPR:$Vd, (v8i8 (OpNode (v8i8 DPR:$Vm))))]>;
3329 def v4i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 0, op4,
3330 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
3331 opc, !strconcat(Dt, "16"), asm, "",
3332 [(set DPR:$Vd, (v4i16 (OpNode (v4i16 DPR:$Vm))))]>;
3333 def v2i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4,
3334 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
3335 opc, !strconcat(Dt, "32"), asm, "",
3336 [(set DPR:$Vd, (v2i32 (OpNode (v2i32 DPR:$Vm))))]>;
3337 def v2f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4,
3338 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
3339 opc, "f32", asm, "",
3340 [(set DPR:$Vd, (v2i32 (OpNode (v2f32 DPR:$Vm))))]> {
3341 let Inst{10} = 1; // overwrite F = 1
3343 def v4f16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 0, op4,
3344 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
3345 opc, "f16", asm, "",
3346 [(set DPR:$Vd, (v4i16 (OpNode (v4f16 DPR:$Vm))))]>,
3347 Requires<[HasNEON,HasFullFP16]> {
3348 let Inst{10} = 1; // overwrite F = 1
3351 // 128-bit vector types.
3352 def v16i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 1, op4,
3353 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
3354 opc, !strconcat(Dt, "8"), asm, "",
3355 [(set QPR:$Vd, (v16i8 (OpNode (v16i8 QPR:$Vm))))]>;
3356 def v8i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 1, op4,
3357 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
3358 opc, !strconcat(Dt, "16"), asm, "",
3359 [(set QPR:$Vd, (v8i16 (OpNode (v8i16 QPR:$Vm))))]>;
3360 def v4i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4,
3361 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
3362 opc, !strconcat(Dt, "32"), asm, "",
3363 [(set QPR:$Vd, (v4i32 (OpNode (v4i32 QPR:$Vm))))]>;
3364 def v4f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4,
3365 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
3366 opc, "f32", asm, "",
3367 [(set QPR:$Vd, (v4i32 (OpNode (v4f32 QPR:$Vm))))]> {
3368 let Inst{10} = 1; // overwrite F = 1
3370 def v8f16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 1, op4,
3371 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
3372 opc, "f16", asm, "",
3373 [(set QPR:$Vd, (v8i16 (OpNode (v8f16 QPR:$Vm))))]>,
3374 Requires<[HasNEON,HasFullFP16]> {
3375 let Inst{10} = 1; // overwrite F = 1
3380 // Neon 2-register vector intrinsics,
3381 // element sizes of 8, 16 and 32 bits:
3382 multiclass N2VInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
3383 bits<5> op11_7, bit op4,
3384 InstrItinClass itinD, InstrItinClass itinQ,
3385 string OpcodeStr, string Dt, SDPatternOperator IntOp> {
3386 // 64-bit vector types.
3387 def v8i8 : N2VDInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
3388 itinD, OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>;
3389 def v4i16 : N2VDInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
3390 itinD, OpcodeStr, !strconcat(Dt, "16"),v4i16,v4i16,IntOp>;
3391 def v2i32 : N2VDInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
3392 itinD, OpcodeStr, !strconcat(Dt, "32"),v2i32,v2i32,IntOp>;
3394 // 128-bit vector types.
3395 def v16i8 : N2VQInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
3396 itinQ, OpcodeStr, !strconcat(Dt, "8"), v16i8,v16i8,IntOp>;
3397 def v8i16 : N2VQInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
3398 itinQ, OpcodeStr, !strconcat(Dt, "16"),v8i16,v8i16,IntOp>;
3399 def v4i32 : N2VQInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
3400 itinQ, OpcodeStr, !strconcat(Dt, "32"),v4i32,v4i32,IntOp>;
3404 // Neon Narrowing 2-register vector operations,
3405 // source operand element sizes of 16, 32 and 64 bits:
3406 multiclass N2VN_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
3407 bits<5> op11_7, bit op6, bit op4,
3408 InstrItinClass itin, string OpcodeStr, string Dt,
3410 def v8i8 : N2VN<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4,
3411 itin, OpcodeStr, !strconcat(Dt, "16"),
3412 v8i8, v8i16, OpNode>;
3413 def v4i16 : N2VN<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4,
3414 itin, OpcodeStr, !strconcat(Dt, "32"),
3415 v4i16, v4i32, OpNode>;
3416 def v2i32 : N2VN<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4,
3417 itin, OpcodeStr, !strconcat(Dt, "64"),
3418 v2i32, v2i64, OpNode>;
3421 // Neon Narrowing 2-register vector intrinsics,
3422 // source operand element sizes of 16, 32 and 64 bits:
3423 multiclass N2VNInt_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
3424 bits<5> op11_7, bit op6, bit op4,
3425 InstrItinClass itin, string OpcodeStr, string Dt,
3426 SDPatternOperator IntOp> {
3427 def v8i8 : N2VNInt<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4,
3428 itin, OpcodeStr, !strconcat(Dt, "16"),
3429 v8i8, v8i16, IntOp>;
3430 def v4i16 : N2VNInt<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4,
3431 itin, OpcodeStr, !strconcat(Dt, "32"),
3432 v4i16, v4i32, IntOp>;
3433 def v2i32 : N2VNInt<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4,
3434 itin, OpcodeStr, !strconcat(Dt, "64"),
3435 v2i32, v2i64, IntOp>;
3439 // Neon Lengthening 2-register vector intrinsic (currently specific to VMOVL).
3440 // source operand element sizes of 16, 32 and 64 bits:
3441 multiclass N2VL_QHS<bits<2> op24_23, bits<5> op11_7, bit op6, bit op4,
3442 string OpcodeStr, string Dt, SDNode OpNode> {
3443 def v8i16 : N2VL<op24_23, 0b00, 0b10, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
3444 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, OpNode>;
3445 def v4i32 : N2VL<op24_23, 0b01, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
3446 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, OpNode>;
3447 def v2i64 : N2VL<op24_23, 0b10, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
3448 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, OpNode>;
3452 // Neon 3-register vector operations.
3454 // First with only element sizes of 8, 16 and 32 bits:
3455 multiclass N3V_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3456 InstrItinClass itinD16, InstrItinClass itinD32,
3457 InstrItinClass itinQ16, InstrItinClass itinQ32,
3458 string OpcodeStr, string Dt,
3459 SDNode OpNode, bit Commutable = 0> {
3460 // 64-bit vector types.
3461 def v8i8 : N3VD<op24, op23, 0b00, op11_8, op4, itinD16,
3462 OpcodeStr, !strconcat(Dt, "8"),
3463 v8i8, v8i8, OpNode, Commutable>;
3464 def v4i16 : N3VD<op24, op23, 0b01, op11_8, op4, itinD16,
3465 OpcodeStr, !strconcat(Dt, "16"),
3466 v4i16, v4i16, OpNode, Commutable>;
3467 def v2i32 : N3VD<op24, op23, 0b10, op11_8, op4, itinD32,
3468 OpcodeStr, !strconcat(Dt, "32"),
3469 v2i32, v2i32, OpNode, Commutable>;
3471 // 128-bit vector types.
3472 def v16i8 : N3VQ<op24, op23, 0b00, op11_8, op4, itinQ16,
3473 OpcodeStr, !strconcat(Dt, "8"),
3474 v16i8, v16i8, OpNode, Commutable>;
3475 def v8i16 : N3VQ<op24, op23, 0b01, op11_8, op4, itinQ16,
3476 OpcodeStr, !strconcat(Dt, "16"),
3477 v8i16, v8i16, OpNode, Commutable>;
3478 def v4i32 : N3VQ<op24, op23, 0b10, op11_8, op4, itinQ32,
3479 OpcodeStr, !strconcat(Dt, "32"),
3480 v4i32, v4i32, OpNode, Commutable>;
3483 multiclass N3VSL_HS<bits<4> op11_8, string OpcodeStr, SDNode ShOp> {
3484 def v4i16 : N3VDSL16<0b01, op11_8, OpcodeStr, "i16", v4i16, ShOp>;
3485 def v2i32 : N3VDSL<0b10, op11_8, IIC_VMULi32D, OpcodeStr, "i32", v2i32, ShOp>;
3486 def v8i16 : N3VQSL16<0b01, op11_8, OpcodeStr, "i16", v8i16, v4i16, ShOp>;
3487 def v4i32 : N3VQSL<0b10, op11_8, IIC_VMULi32Q, OpcodeStr, "i32",
3488 v4i32, v2i32, ShOp>;
3491 // ....then also with element size 64 bits:
3492 multiclass N3V_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
3493 InstrItinClass itinD, InstrItinClass itinQ,
3494 string OpcodeStr, string Dt,
3495 SDNode OpNode, bit Commutable = 0>
3496 : N3V_QHS<op24, op23, op11_8, op4, itinD, itinD, itinQ, itinQ,
3497 OpcodeStr, Dt, OpNode, Commutable> {
3498 def v1i64 : N3VD<op24, op23, 0b11, op11_8, op4, itinD,
3499 OpcodeStr, !strconcat(Dt, "64"),
3500 v1i64, v1i64, OpNode, Commutable>;
3501 def v2i64 : N3VQ<op24, op23, 0b11, op11_8, op4, itinQ,
3502 OpcodeStr, !strconcat(Dt, "64"),
3503 v2i64, v2i64, OpNode, Commutable>;
3507 // Neon 3-register vector intrinsics.
3509 // First with only element sizes of 16 and 32 bits:
3510 multiclass N3VInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
3511 InstrItinClass itinD16, InstrItinClass itinD32,
3512 InstrItinClass itinQ16, InstrItinClass itinQ32,
3513 string OpcodeStr, string Dt,
3514 SDPatternOperator IntOp, bit Commutable = 0> {
3515 // 64-bit vector types.
3516 def v4i16 : N3VDInt<op24, op23, 0b01, op11_8, op4, f, itinD16,
3517 OpcodeStr, !strconcat(Dt, "16"),
3518 v4i16, v4i16, IntOp, Commutable>;
3519 def v2i32 : N3VDInt<op24, op23, 0b10, op11_8, op4, f, itinD32,
3520 OpcodeStr, !strconcat(Dt, "32"),
3521 v2i32, v2i32, IntOp, Commutable>;
3523 // 128-bit vector types.
3524 def v8i16 : N3VQInt<op24, op23, 0b01, op11_8, op4, f, itinQ16,
3525 OpcodeStr, !strconcat(Dt, "16"),
3526 v8i16, v8i16, IntOp, Commutable>;
3527 def v4i32 : N3VQInt<op24, op23, 0b10, op11_8, op4, f, itinQ32,
3528 OpcodeStr, !strconcat(Dt, "32"),
3529 v4i32, v4i32, IntOp, Commutable>;
3531 multiclass N3VInt_HSSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
3532 InstrItinClass itinD16, InstrItinClass itinD32,
3533 InstrItinClass itinQ16, InstrItinClass itinQ32,
3534 string OpcodeStr, string Dt,
3535 SDPatternOperator IntOp> {
3536 // 64-bit vector types.
3537 def v4i16 : N3VDIntSh<op24, op23, 0b01, op11_8, op4, f, itinD16,
3538 OpcodeStr, !strconcat(Dt, "16"),
3539 v4i16, v4i16, IntOp>;
3540 def v2i32 : N3VDIntSh<op24, op23, 0b10, op11_8, op4, f, itinD32,
3541 OpcodeStr, !strconcat(Dt, "32"),
3542 v2i32, v2i32, IntOp>;
3544 // 128-bit vector types.
3545 def v8i16 : N3VQIntSh<op24, op23, 0b01, op11_8, op4, f, itinQ16,
3546 OpcodeStr, !strconcat(Dt, "16"),
3547 v8i16, v8i16, IntOp>;
3548 def v4i32 : N3VQIntSh<op24, op23, 0b10, op11_8, op4, f, itinQ32,
3549 OpcodeStr, !strconcat(Dt, "32"),
3550 v4i32, v4i32, IntOp>;
3553 multiclass N3VIntSL_HS<bits<4> op11_8,
3554 InstrItinClass itinD16, InstrItinClass itinD32,
3555 InstrItinClass itinQ16, InstrItinClass itinQ32,
3556 string OpcodeStr, string Dt, SDPatternOperator IntOp> {
3557 def v4i16 : N3VDIntSL16<0b01, op11_8, itinD16,
3558 OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp>;
3559 def v2i32 : N3VDIntSL<0b10, op11_8, itinD32,
3560 OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp>;
3561 def v8i16 : N3VQIntSL16<0b01, op11_8, itinQ16,
3562 OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16, IntOp>;
3563 def v4i32 : N3VQIntSL<0b10, op11_8, itinQ32,
3564 OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32, IntOp>;
3567 // ....then also with element size of 8 bits:
3568 multiclass N3VInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
3569 InstrItinClass itinD16, InstrItinClass itinD32,
3570 InstrItinClass itinQ16, InstrItinClass itinQ32,
3571 string OpcodeStr, string Dt,
3572 SDPatternOperator IntOp, bit Commutable = 0>
3573 : N3VInt_HS<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32,
3574 OpcodeStr, Dt, IntOp, Commutable> {
3575 def v8i8 : N3VDInt<op24, op23, 0b00, op11_8, op4, f, itinD16,
3576 OpcodeStr, !strconcat(Dt, "8"),
3577 v8i8, v8i8, IntOp, Commutable>;
3578 def v16i8 : N3VQInt<op24, op23, 0b00, op11_8, op4, f, itinQ16,
3579 OpcodeStr, !strconcat(Dt, "8"),
3580 v16i8, v16i8, IntOp, Commutable>;
3582 multiclass N3VInt_QHSSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
3583 InstrItinClass itinD16, InstrItinClass itinD32,
3584 InstrItinClass itinQ16, InstrItinClass itinQ32,
3585 string OpcodeStr, string Dt,
3586 SDPatternOperator IntOp>
3587 : N3VInt_HSSh<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32,
3588 OpcodeStr, Dt, IntOp> {
3589 def v8i8 : N3VDIntSh<op24, op23, 0b00, op11_8, op4, f, itinD16,
3590 OpcodeStr, !strconcat(Dt, "8"),
3592 def v16i8 : N3VQIntSh<op24, op23, 0b00, op11_8, op4, f, itinQ16,
3593 OpcodeStr, !strconcat(Dt, "8"),
3594 v16i8, v16i8, IntOp>;
3598 // ....then also with element size of 64 bits:
3599 multiclass N3VInt_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
3600 InstrItinClass itinD16, InstrItinClass itinD32,
3601 InstrItinClass itinQ16, InstrItinClass itinQ32,
3602 string OpcodeStr, string Dt,
3603 SDPatternOperator IntOp, bit Commutable = 0>
3604 : N3VInt_QHS<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32,
3605 OpcodeStr, Dt, IntOp, Commutable> {
3606 def v1i64 : N3VDInt<op24, op23, 0b11, op11_8, op4, f, itinD32,
3607 OpcodeStr, !strconcat(Dt, "64"),
3608 v1i64, v1i64, IntOp, Commutable>;
3609 def v2i64 : N3VQInt<op24, op23, 0b11, op11_8, op4, f, itinQ32,
3610 OpcodeStr, !strconcat(Dt, "64"),
3611 v2i64, v2i64, IntOp, Commutable>;
3613 multiclass N3VInt_QHSDSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
3614 InstrItinClass itinD16, InstrItinClass itinD32,
3615 InstrItinClass itinQ16, InstrItinClass itinQ32,
3616 string OpcodeStr, string Dt,
3617 SDPatternOperator IntOp>
3618 : N3VInt_QHSSh<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32,
3619 OpcodeStr, Dt, IntOp> {
3620 def v1i64 : N3VDIntSh<op24, op23, 0b11, op11_8, op4, f, itinD32,
3621 OpcodeStr, !strconcat(Dt, "64"),
3622 v1i64, v1i64, IntOp>;
3623 def v2i64 : N3VQIntSh<op24, op23, 0b11, op11_8, op4, f, itinQ32,
3624 OpcodeStr, !strconcat(Dt, "64"),
3625 v2i64, v2i64, IntOp>;
3628 // Neon Narrowing 3-register vector intrinsics,
3629 // source operand element sizes of 16, 32 and 64 bits:
3630 multiclass N3VNInt_HSD<bit op24, bit op23, bits<4> op11_8, bit op4,
3631 string OpcodeStr, string Dt,
3632 SDPatternOperator IntOp, bit Commutable = 0> {
3633 def v8i8 : N3VNInt<op24, op23, 0b00, op11_8, op4,
3634 OpcodeStr, !strconcat(Dt, "16"),
3635 v8i8, v8i16, IntOp, Commutable>;
3636 def v4i16 : N3VNInt<op24, op23, 0b01, op11_8, op4,
3637 OpcodeStr, !strconcat(Dt, "32"),
3638 v4i16, v4i32, IntOp, Commutable>;
3639 def v2i32 : N3VNInt<op24, op23, 0b10, op11_8, op4,
3640 OpcodeStr, !strconcat(Dt, "64"),
3641 v2i32, v2i64, IntOp, Commutable>;
3645 // Neon Long 3-register vector operations.
3647 multiclass N3VL_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3648 InstrItinClass itin16, InstrItinClass itin32,
3649 string OpcodeStr, string Dt,
3650 SDNode OpNode, bit Commutable = 0> {
3651 def v8i16 : N3VL<op24, op23, 0b00, op11_8, op4, itin16,
3652 OpcodeStr, !strconcat(Dt, "8"),
3653 v8i16, v8i8, OpNode, Commutable>;
3654 def v4i32 : N3VL<op24, op23, 0b01, op11_8, op4, itin16,
3655 OpcodeStr, !strconcat(Dt, "16"),
3656 v4i32, v4i16, OpNode, Commutable>;
3657 def v2i64 : N3VL<op24, op23, 0b10, op11_8, op4, itin32,
3658 OpcodeStr, !strconcat(Dt, "32"),
3659 v2i64, v2i32, OpNode, Commutable>;
3662 multiclass N3VLSL_HS<bit op24, bits<4> op11_8,
3663 InstrItinClass itin, string OpcodeStr, string Dt,
3665 def v4i16 : N3VLSL16<op24, 0b01, op11_8, itin, OpcodeStr,
3666 !strconcat(Dt, "16"), v4i32, v4i16, OpNode>;
3667 def v2i32 : N3VLSL<op24, 0b10, op11_8, itin, OpcodeStr,
3668 !strconcat(Dt, "32"), v2i64, v2i32, OpNode>;
3671 multiclass N3VLExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3672 InstrItinClass itin16, InstrItinClass itin32,
3673 string OpcodeStr, string Dt,
3674 SDNode OpNode, SDNode ExtOp, bit Commutable = 0> {
3675 def v8i16 : N3VLExt<op24, op23, 0b00, op11_8, op4, itin16,
3676 OpcodeStr, !strconcat(Dt, "8"),
3677 v8i16, v8i8, OpNode, ExtOp, Commutable>;
3678 def v4i32 : N3VLExt<op24, op23, 0b01, op11_8, op4, itin16,
3679 OpcodeStr, !strconcat(Dt, "16"),
3680 v4i32, v4i16, OpNode, ExtOp, Commutable>;
3681 def v2i64 : N3VLExt<op24, op23, 0b10, op11_8, op4, itin32,
3682 OpcodeStr, !strconcat(Dt, "32"),
3683 v2i64, v2i32, OpNode, ExtOp, Commutable>;
3686 // Neon Long 3-register vector intrinsics.
3688 // First with only element sizes of 16 and 32 bits:
3689 multiclass N3VLInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4,
3690 InstrItinClass itin16, InstrItinClass itin32,
3691 string OpcodeStr, string Dt,
3692 SDPatternOperator IntOp, bit Commutable = 0> {
3693 def v4i32 : N3VLInt<op24, op23, 0b01, op11_8, op4, itin16,
3694 OpcodeStr, !strconcat(Dt, "16"),
3695 v4i32, v4i16, IntOp, Commutable>;
3696 def v2i64 : N3VLInt<op24, op23, 0b10, op11_8, op4, itin32,
3697 OpcodeStr, !strconcat(Dt, "32"),
3698 v2i64, v2i32, IntOp, Commutable>;
3701 multiclass N3VLIntSL_HS<bit op24, bits<4> op11_8,
3702 InstrItinClass itin, string OpcodeStr, string Dt,
3703 SDPatternOperator IntOp> {
3704 def v4i16 : N3VLIntSL16<op24, 0b01, op11_8, itin,
3705 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>;
3706 def v2i32 : N3VLIntSL<op24, 0b10, op11_8, itin,
3707 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>;
3710 // ....then also with element size of 8 bits:
3711 multiclass N3VLInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3712 InstrItinClass itin16, InstrItinClass itin32,
3713 string OpcodeStr, string Dt,
3714 SDPatternOperator IntOp, bit Commutable = 0>
3715 : N3VLInt_HS<op24, op23, op11_8, op4, itin16, itin32, OpcodeStr, Dt,
3716 IntOp, Commutable> {
3717 def v8i16 : N3VLInt<op24, op23, 0b00, op11_8, op4, itin16,
3718 OpcodeStr, !strconcat(Dt, "8"),
3719 v8i16, v8i8, IntOp, Commutable>;
3722 // ....with explicit extend (VABDL).
3723 multiclass N3VLIntExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3724 InstrItinClass itin, string OpcodeStr, string Dt,
3725 SDPatternOperator IntOp, SDNode ExtOp, bit Commutable = 0> {
3726 def v8i16 : N3VLIntExt<op24, op23, 0b00, op11_8, op4, itin,
3727 OpcodeStr, !strconcat(Dt, "8"),
3728 v8i16, v8i8, IntOp, ExtOp, Commutable>;
3729 def v4i32 : N3VLIntExt<op24, op23, 0b01, op11_8, op4, itin,
3730 OpcodeStr, !strconcat(Dt, "16"),
3731 v4i32, v4i16, IntOp, ExtOp, Commutable>;
3732 def v2i64 : N3VLIntExt<op24, op23, 0b10, op11_8, op4, itin,
3733 OpcodeStr, !strconcat(Dt, "32"),
3734 v2i64, v2i32, IntOp, ExtOp, Commutable>;
3738 // Neon Wide 3-register vector intrinsics,
3739 // source operand element sizes of 8, 16 and 32 bits:
3740 multiclass N3VW_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3741 string OpcodeStr, string Dt,
3742 SDNode OpNode, SDNode ExtOp, bit Commutable = 0> {
3743 def v8i16 : N3VW<op24, op23, 0b00, op11_8, op4,
3744 OpcodeStr, !strconcat(Dt, "8"),
3745 v8i16, v8i8, OpNode, ExtOp, Commutable>;
3746 def v4i32 : N3VW<op24, op23, 0b01, op11_8, op4,
3747 OpcodeStr, !strconcat(Dt, "16"),
3748 v4i32, v4i16, OpNode, ExtOp, Commutable>;
3749 def v2i64 : N3VW<op24, op23, 0b10, op11_8, op4,
3750 OpcodeStr, !strconcat(Dt, "32"),
3751 v2i64, v2i32, OpNode, ExtOp, Commutable>;
3755 // Neon Multiply-Op vector operations,
3756 // element sizes of 8, 16 and 32 bits:
3757 multiclass N3VMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3758 InstrItinClass itinD16, InstrItinClass itinD32,
3759 InstrItinClass itinQ16, InstrItinClass itinQ32,
3760 string OpcodeStr, string Dt, SDNode OpNode> {
3761 // 64-bit vector types.
3762 def v8i8 : N3VDMulOp<op24, op23, 0b00, op11_8, op4, itinD16,
3763 OpcodeStr, !strconcat(Dt, "8"), v8i8, mul, OpNode>;
3764 def v4i16 : N3VDMulOp<op24, op23, 0b01, op11_8, op4, itinD16,
3765 OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, OpNode>;
3766 def v2i32 : N3VDMulOp<op24, op23, 0b10, op11_8, op4, itinD32,
3767 OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, OpNode>;
3769 // 128-bit vector types.
3770 def v16i8 : N3VQMulOp<op24, op23, 0b00, op11_8, op4, itinQ16,
3771 OpcodeStr, !strconcat(Dt, "8"), v16i8, mul, OpNode>;
3772 def v8i16 : N3VQMulOp<op24, op23, 0b01, op11_8, op4, itinQ16,
3773 OpcodeStr, !strconcat(Dt, "16"), v8i16, mul, OpNode>;
3774 def v4i32 : N3VQMulOp<op24, op23, 0b10, op11_8, op4, itinQ32,
3775 OpcodeStr, !strconcat(Dt, "32"), v4i32, mul, OpNode>;
3778 multiclass N3VMulOpSL_HS<bits<4> op11_8,
3779 InstrItinClass itinD16, InstrItinClass itinD32,
3780 InstrItinClass itinQ16, InstrItinClass itinQ32,
3781 string OpcodeStr, string Dt, SDPatternOperator ShOp> {
3782 def v4i16 : N3VDMulOpSL16<0b01, op11_8, itinD16,
3783 OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, ShOp>;
3784 def v2i32 : N3VDMulOpSL<0b10, op11_8, itinD32,
3785 OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, ShOp>;
3786 def v8i16 : N3VQMulOpSL16<0b01, op11_8, itinQ16,
3787 OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16,
3789 def v4i32 : N3VQMulOpSL<0b10, op11_8, itinQ32,
3790 OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32,
3794 // Neon Intrinsic-Op vector operations,
3795 // element sizes of 8, 16 and 32 bits:
3796 multiclass N3VIntOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3797 InstrItinClass itinD, InstrItinClass itinQ,
3798 string OpcodeStr, string Dt, SDPatternOperator IntOp,
3800 // 64-bit vector types.
3801 def v8i8 : N3VDIntOp<op24, op23, 0b00, op11_8, op4, itinD,
3802 OpcodeStr, !strconcat(Dt, "8"), v8i8, IntOp, OpNode>;
3803 def v4i16 : N3VDIntOp<op24, op23, 0b01, op11_8, op4, itinD,
3804 OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp, OpNode>;
3805 def v2i32 : N3VDIntOp<op24, op23, 0b10, op11_8, op4, itinD,
3806 OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp, OpNode>;
3808 // 128-bit vector types.
3809 def v16i8 : N3VQIntOp<op24, op23, 0b00, op11_8, op4, itinQ,
3810 OpcodeStr, !strconcat(Dt, "8"), v16i8, IntOp, OpNode>;
3811 def v8i16 : N3VQIntOp<op24, op23, 0b01, op11_8, op4, itinQ,
3812 OpcodeStr, !strconcat(Dt, "16"), v8i16, IntOp, OpNode>;
3813 def v4i32 : N3VQIntOp<op24, op23, 0b10, op11_8, op4, itinQ,
3814 OpcodeStr, !strconcat(Dt, "32"), v4i32, IntOp, OpNode>;
3817 // Neon 3-argument intrinsics,
3818 // element sizes of 16 and 32 bits:
3819 multiclass N3VInt3_HS<bit op24, bit op23, bits<4> op11_8, bit op4,
3820 InstrItinClass itinD16, InstrItinClass itinD32,
3821 InstrItinClass itinQ16, InstrItinClass itinQ32,
3822 string OpcodeStr, string Dt, SDPatternOperator IntOp> {
3823 // 64-bit vector types.
3824 def v4i16 : N3VDInt3<op24, op23, 0b01, op11_8, op4, itinD16,
3825 OpcodeStr, !strconcat(Dt, "16"), v4i16, v4i16, IntOp>;
3826 def v2i32 : N3VDInt3<op24, op23, 0b10, op11_8, op4, itinD32,
3827 OpcodeStr, !strconcat(Dt, "32"), v2i32, v2i32, IntOp>;
3829 // 128-bit vector types.
3830 def v8i16 : N3VQInt3<op24, op23, 0b01, op11_8, op4, itinQ16,
3831 OpcodeStr, !strconcat(Dt, "16"), v8i16, v8i16, IntOp>;
3832 def v4i32 : N3VQInt3<op24, op23, 0b10, op11_8, op4, itinQ32,
3833 OpcodeStr, !strconcat(Dt, "32"), v4i32, v4i32, IntOp>;
3836 // element sizes of 8, 16 and 32 bits:
3837 multiclass N3VInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3838 InstrItinClass itinD16, InstrItinClass itinD32,
3839 InstrItinClass itinQ16, InstrItinClass itinQ32,
3840 string OpcodeStr, string Dt, SDPatternOperator IntOp>
3841 :N3VInt3_HS <op24, op23, op11_8, op4, itinD16, itinD32,
3842 itinQ16, itinQ32, OpcodeStr, Dt, IntOp>{
3843 // 64-bit vector types.
3844 def v8i8 : N3VDInt3<op24, op23, 0b00, op11_8, op4, itinD16,
3845 OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>;
3846 // 128-bit vector types.
3847 def v16i8 : N3VQInt3<op24, op23, 0b00, op11_8, op4, itinQ16,
3848 OpcodeStr, !strconcat(Dt, "8"), v16i8, v16i8, IntOp>;
3851 // Neon Long Multiply-Op vector operations,
3852 // element sizes of 8, 16 and 32 bits:
3853 multiclass N3VLMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3854 InstrItinClass itin16, InstrItinClass itin32,
3855 string OpcodeStr, string Dt, SDNode MulOp,
3857 def v8i16 : N3VLMulOp<op24, op23, 0b00, op11_8, op4, itin16, OpcodeStr,
3858 !strconcat(Dt, "8"), v8i16, v8i8, MulOp, OpNode>;
3859 def v4i32 : N3VLMulOp<op24, op23, 0b01, op11_8, op4, itin16, OpcodeStr,
3860 !strconcat(Dt, "16"), v4i32, v4i16, MulOp, OpNode>;
3861 def v2i64 : N3VLMulOp<op24, op23, 0b10, op11_8, op4, itin32, OpcodeStr,
3862 !strconcat(Dt, "32"), v2i64, v2i32, MulOp, OpNode>;
3865 multiclass N3VLMulOpSL_HS<bit op24, bits<4> op11_8, string OpcodeStr,
3866 string Dt, SDNode MulOp, SDNode OpNode> {
3867 def v4i16 : N3VLMulOpSL16<op24, 0b01, op11_8, IIC_VMACi16D, OpcodeStr,
3868 !strconcat(Dt,"16"), v4i32, v4i16, MulOp, OpNode>;
3869 def v2i32 : N3VLMulOpSL<op24, 0b10, op11_8, IIC_VMACi32D, OpcodeStr,
3870 !strconcat(Dt, "32"), v2i64, v2i32, MulOp, OpNode>;
3874 // Neon Long 3-argument intrinsics.
3876 // First with only element sizes of 16 and 32 bits:
3877 multiclass N3VLInt3_HS<bit op24, bit op23, bits<4> op11_8, bit op4,
3878 InstrItinClass itin16, InstrItinClass itin32,
3879 string OpcodeStr, string Dt, SDPatternOperator IntOp> {
3880 def v4i32 : N3VLInt3<op24, op23, 0b01, op11_8, op4, itin16,
3881 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>;
3882 def v2i64 : N3VLInt3<op24, op23, 0b10, op11_8, op4, itin32,
3883 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>;
3886 multiclass N3VLInt3SL_HS<bit op24, bits<4> op11_8,
3887 string OpcodeStr, string Dt, SDPatternOperator IntOp> {
3888 def v4i16 : N3VLInt3SL16<op24, 0b01, op11_8, IIC_VMACi16D,
3889 OpcodeStr, !strconcat(Dt,"16"), v4i32, v4i16, IntOp>;
3890 def v2i32 : N3VLInt3SL<op24, 0b10, op11_8, IIC_VMACi32D,
3891 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>;
3894 // ....then also with element size of 8 bits:
3895 multiclass N3VLInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3896 InstrItinClass itin16, InstrItinClass itin32,
3897 string OpcodeStr, string Dt, SDPatternOperator IntOp>
3898 : N3VLInt3_HS<op24, op23, op11_8, op4, itin16, itin32, OpcodeStr, Dt, IntOp> {
3899 def v8i16 : N3VLInt3<op24, op23, 0b00, op11_8, op4, itin16,
3900 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, IntOp>;
3903 // ....with explicit extend (VABAL).
3904 multiclass N3VLIntExtOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3905 InstrItinClass itin, string OpcodeStr, string Dt,
3906 SDPatternOperator IntOp, SDNode ExtOp, SDNode OpNode> {
3907 def v8i16 : N3VLIntExtOp<op24, op23, 0b00, op11_8, op4, itin,
3908 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8,
3909 IntOp, ExtOp, OpNode>;
3910 def v4i32 : N3VLIntExtOp<op24, op23, 0b01, op11_8, op4, itin,
3911 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16,
3912 IntOp, ExtOp, OpNode>;
3913 def v2i64 : N3VLIntExtOp<op24, op23, 0b10, op11_8, op4, itin,
3914 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32,
3915 IntOp, ExtOp, OpNode>;
3919 // Neon Pairwise long 2-register intrinsics,
3920 // element sizes of 8, 16 and 32 bits:
3921 multiclass N2VPLInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
3922 bits<5> op11_7, bit op4,
3923 string OpcodeStr, string Dt, SDPatternOperator IntOp> {
3924 // 64-bit vector types.
3925 def v8i8 : N2VDPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
3926 OpcodeStr, !strconcat(Dt, "8"), v4i16, v8i8, IntOp>;
3927 def v4i16 : N2VDPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
3928 OpcodeStr, !strconcat(Dt, "16"), v2i32, v4i16, IntOp>;
3929 def v2i32 : N2VDPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
3930 OpcodeStr, !strconcat(Dt, "32"), v1i64, v2i32, IntOp>;
3932 // 128-bit vector types.
3933 def v16i8 : N2VQPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
3934 OpcodeStr, !strconcat(Dt, "8"), v8i16, v16i8, IntOp>;
3935 def v8i16 : N2VQPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
3936 OpcodeStr, !strconcat(Dt, "16"), v4i32, v8i16, IntOp>;
3937 def v4i32 : N2VQPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
3938 OpcodeStr, !strconcat(Dt, "32"), v2i64, v4i32, IntOp>;
3942 // Neon Pairwise long 2-register accumulate intrinsics,
3943 // element sizes of 8, 16 and 32 bits:
3944 multiclass N2VPLInt2_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
3945 bits<5> op11_7, bit op4,
3946 string OpcodeStr, string Dt, SDPatternOperator IntOp> {
3947 // 64-bit vector types.
3948 def v8i8 : N2VDPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
3949 OpcodeStr, !strconcat(Dt, "8"), v4i16, v8i8, IntOp>;
3950 def v4i16 : N2VDPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
3951 OpcodeStr, !strconcat(Dt, "16"), v2i32, v4i16, IntOp>;
3952 def v2i32 : N2VDPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
3953 OpcodeStr, !strconcat(Dt, "32"), v1i64, v2i32, IntOp>;
3955 // 128-bit vector types.
3956 def v16i8 : N2VQPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
3957 OpcodeStr, !strconcat(Dt, "8"), v8i16, v16i8, IntOp>;
3958 def v8i16 : N2VQPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
3959 OpcodeStr, !strconcat(Dt, "16"), v4i32, v8i16, IntOp>;
3960 def v4i32 : N2VQPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
3961 OpcodeStr, !strconcat(Dt, "32"), v2i64, v4i32, IntOp>;
3965 // Neon 2-register vector shift by immediate,
3966 // with f of either N2RegVShLFrm or N2RegVShRFrm
3967 // element sizes of 8, 16, 32 and 64 bits:
3968 multiclass N2VShL_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
3969 InstrItinClass itin, string OpcodeStr, string Dt,
3971 // 64-bit vector types.
3972 def v8i8 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
3973 OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> {
3974 let Inst{21-19} = 0b001; // imm6 = 001xxx
3976 def v4i16 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
3977 OpcodeStr, !strconcat(Dt, "16"), v4i16, OpNode> {
3978 let Inst{21-20} = 0b01; // imm6 = 01xxxx
3980 def v2i32 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
3981 OpcodeStr, !strconcat(Dt, "32"), v2i32, OpNode> {
3982 let Inst{21} = 0b1; // imm6 = 1xxxxx
3984 def v1i64 : N2VDSh<op24, op23, op11_8, 1, op4, N2RegVShLFrm, itin, i32imm,
3985 OpcodeStr, !strconcat(Dt, "64"), v1i64, OpNode>;
3988 // 128-bit vector types.
3989 def v16i8 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
3990 OpcodeStr, !strconcat(Dt, "8"), v16i8, OpNode> {
3991 let Inst{21-19} = 0b001; // imm6 = 001xxx
3993 def v8i16 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
3994 OpcodeStr, !strconcat(Dt, "16"), v8i16, OpNode> {
3995 let Inst{21-20} = 0b01; // imm6 = 01xxxx
3997 def v4i32 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
3998 OpcodeStr, !strconcat(Dt, "32"), v4i32, OpNode> {
3999 let Inst{21} = 0b1; // imm6 = 1xxxxx
4001 def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, N2RegVShLFrm, itin, i32imm,
4002 OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>;
4005 multiclass N2VShR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
4006 InstrItinClass itin, string OpcodeStr, string Dt,
4007 string baseOpc, SDNode OpNode> {
4008 // 64-bit vector types.
4009 def v8i8 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm8,
4010 OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> {
4011 let Inst{21-19} = 0b001; // imm6 = 001xxx
4013 def v4i16 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm16,
4014 OpcodeStr, !strconcat(Dt, "16"), v4i16, OpNode> {
4015 let Inst{21-20} = 0b01; // imm6 = 01xxxx
4017 def v2i32 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm32,
4018 OpcodeStr, !strconcat(Dt, "32"), v2i32, OpNode> {
4019 let Inst{21} = 0b1; // imm6 = 1xxxxx
4021 def v1i64 : N2VDSh<op24, op23, op11_8, 1, op4, N2RegVShRFrm, itin, shr_imm64,
4022 OpcodeStr, !strconcat(Dt, "64"), v1i64, OpNode>;
4025 // 128-bit vector types.
4026 def v16i8 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm8,
4027 OpcodeStr, !strconcat(Dt, "8"), v16i8, OpNode> {
4028 let Inst{21-19} = 0b001; // imm6 = 001xxx
4030 def v8i16 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm16,
4031 OpcodeStr, !strconcat(Dt, "16"), v8i16, OpNode> {
4032 let Inst{21-20} = 0b01; // imm6 = 01xxxx
4034 def v4i32 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm32,
4035 OpcodeStr, !strconcat(Dt, "32"), v4i32, OpNode> {
4036 let Inst{21} = 0b1; // imm6 = 1xxxxx
4038 def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, N2RegVShRFrm, itin, shr_imm64,
4039 OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>;
4043 // Neon Shift-Accumulate vector operations,
4044 // element sizes of 8, 16, 32 and 64 bits:
4045 multiclass N2VShAdd_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
4046 string OpcodeStr, string Dt, SDNode ShOp> {
4047 // 64-bit vector types.
4048 def v8i8 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm8,
4049 OpcodeStr, !strconcat(Dt, "8"), v8i8, ShOp> {
4050 let Inst{21-19} = 0b001; // imm6 = 001xxx
4052 def v4i16 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm16,
4053 OpcodeStr, !strconcat(Dt, "16"), v4i16, ShOp> {
4054 let Inst{21-20} = 0b01; // imm6 = 01xxxx
4056 def v2i32 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm32,
4057 OpcodeStr, !strconcat(Dt, "32"), v2i32, ShOp> {
4058 let Inst{21} = 0b1; // imm6 = 1xxxxx
4060 def v1i64 : N2VDShAdd<op24, op23, op11_8, 1, op4, shr_imm64,
4061 OpcodeStr, !strconcat(Dt, "64"), v1i64, ShOp>;
4064 // 128-bit vector types.
4065 def v16i8 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm8,
4066 OpcodeStr, !strconcat(Dt, "8"), v16i8, ShOp> {
4067 let Inst{21-19} = 0b001; // imm6 = 001xxx
4069 def v8i16 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm16,
4070 OpcodeStr, !strconcat(Dt, "16"), v8i16, ShOp> {
4071 let Inst{21-20} = 0b01; // imm6 = 01xxxx
4073 def v4i32 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm32,
4074 OpcodeStr, !strconcat(Dt, "32"), v4i32, ShOp> {
4075 let Inst{21} = 0b1; // imm6 = 1xxxxx
4077 def v2i64 : N2VQShAdd<op24, op23, op11_8, 1, op4, shr_imm64,
4078 OpcodeStr, !strconcat(Dt, "64"), v2i64, ShOp>;
4082 // Neon Shift-Insert vector operations,
4083 // with f of either N2RegVShLFrm or N2RegVShRFrm
4084 // element sizes of 8, 16, 32 and 64 bits:
4085 multiclass N2VShInsL_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
4087 // 64-bit vector types.
4088 def v8i8 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm,
4089 N2RegVShLFrm, OpcodeStr, "8", v8i8, NEONvsli> {
4090 let Inst{21-19} = 0b001; // imm6 = 001xxx
4092 def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm,
4093 N2RegVShLFrm, OpcodeStr, "16", v4i16, NEONvsli> {
4094 let Inst{21-20} = 0b01; // imm6 = 01xxxx
4096 def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm,
4097 N2RegVShLFrm, OpcodeStr, "32", v2i32, NEONvsli> {
4098 let Inst{21} = 0b1; // imm6 = 1xxxxx
4100 def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4, i32imm,
4101 N2RegVShLFrm, OpcodeStr, "64", v1i64, NEONvsli>;
4104 // 128-bit vector types.
4105 def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm,
4106 N2RegVShLFrm, OpcodeStr, "8", v16i8, NEONvsli> {
4107 let Inst{21-19} = 0b001; // imm6 = 001xxx
4109 def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm,
4110 N2RegVShLFrm, OpcodeStr, "16", v8i16, NEONvsli> {
4111 let Inst{21-20} = 0b01; // imm6 = 01xxxx
4113 def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm,
4114 N2RegVShLFrm, OpcodeStr, "32", v4i32, NEONvsli> {
4115 let Inst{21} = 0b1; // imm6 = 1xxxxx
4117 def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4, i32imm,
4118 N2RegVShLFrm, OpcodeStr, "64", v2i64, NEONvsli>;
4121 multiclass N2VShInsR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
4123 // 64-bit vector types.
4124 def v8i8 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm8,
4125 N2RegVShRFrm, OpcodeStr, "8", v8i8, NEONvsri> {
4126 let Inst{21-19} = 0b001; // imm6 = 001xxx
4128 def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm16,
4129 N2RegVShRFrm, OpcodeStr, "16", v4i16, NEONvsri> {
4130 let Inst{21-20} = 0b01; // imm6 = 01xxxx
4132 def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm32,
4133 N2RegVShRFrm, OpcodeStr, "32", v2i32, NEONvsri> {
4134 let Inst{21} = 0b1; // imm6 = 1xxxxx
4136 def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4, shr_imm64,
4137 N2RegVShRFrm, OpcodeStr, "64", v1i64, NEONvsri>;
4140 // 128-bit vector types.
4141 def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm8,
4142 N2RegVShRFrm, OpcodeStr, "8", v16i8, NEONvsri> {
4143 let Inst{21-19} = 0b001; // imm6 = 001xxx
4145 def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm16,
4146 N2RegVShRFrm, OpcodeStr, "16", v8i16, NEONvsri> {
4147 let Inst{21-20} = 0b01; // imm6 = 01xxxx
4149 def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm32,
4150 N2RegVShRFrm, OpcodeStr, "32", v4i32, NEONvsri> {
4151 let Inst{21} = 0b1; // imm6 = 1xxxxx
4153 def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4, shr_imm64,
4154 N2RegVShRFrm, OpcodeStr, "64", v2i64, NEONvsri>;
4158 // Neon Shift Long operations,
4159 // element sizes of 8, 16, 32 bits:
4160 multiclass N2VLSh_QHS<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6,
4161 bit op4, string OpcodeStr, string Dt,
4162 SDPatternOperator OpNode> {
4163 def v8i16 : N2VLSh<op24, op23, op11_8, op7, op6, op4,
4164 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, imm1_7, OpNode> {
4165 let Inst{21-19} = 0b001; // imm6 = 001xxx
4167 def v4i32 : N2VLSh<op24, op23, op11_8, op7, op6, op4,
4168 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, imm1_15, OpNode> {
4169 let Inst{21-20} = 0b01; // imm6 = 01xxxx
4171 def v2i64 : N2VLSh<op24, op23, op11_8, op7, op6, op4,
4172 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, imm1_31, OpNode> {
4173 let Inst{21} = 0b1; // imm6 = 1xxxxx
4177 // Neon Shift Narrow operations,
4178 // element sizes of 16, 32, 64 bits:
4179 multiclass N2VNSh_HSD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6,
4180 bit op4, InstrItinClass itin, string OpcodeStr, string Dt,
4181 SDPatternOperator OpNode> {
4182 def v8i8 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin,
4183 OpcodeStr, !strconcat(Dt, "16"),
4184 v8i8, v8i16, shr_imm8, OpNode> {
4185 let Inst{21-19} = 0b001; // imm6 = 001xxx
4187 def v4i16 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin,
4188 OpcodeStr, !strconcat(Dt, "32"),
4189 v4i16, v4i32, shr_imm16, OpNode> {
4190 let Inst{21-20} = 0b01; // imm6 = 01xxxx
4192 def v2i32 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin,
4193 OpcodeStr, !strconcat(Dt, "64"),
4194 v2i32, v2i64, shr_imm32, OpNode> {
4195 let Inst{21} = 0b1; // imm6 = 1xxxxx
4199 //===----------------------------------------------------------------------===//
4200 // Instruction Definitions.
4201 //===----------------------------------------------------------------------===//
4203 // Vector Add Operations.
4205 // VADD : Vector Add (integer and floating-point)
4206 defm VADD : N3V_QHSD<0, 0, 0b1000, 0, IIC_VBINiD, IIC_VBINiQ, "vadd", "i",
4208 def VADDfd : N3VD<0, 0, 0b00, 0b1101, 0, IIC_VBIND, "vadd", "f32",
4209 v2f32, v2f32, fadd, 1>;
4210 def VADDfq : N3VQ<0, 0, 0b00, 0b1101, 0, IIC_VBINQ, "vadd", "f32",
4211 v4f32, v4f32, fadd, 1>;
4212 def VADDhd : N3VD<0, 0, 0b01, 0b1101, 0, IIC_VBIND, "vadd", "f16",
4213 v4f16, v4f16, fadd, 1>,
4214 Requires<[HasNEON,HasFullFP16]>;
4215 def VADDhq : N3VQ<0, 0, 0b01, 0b1101, 0, IIC_VBINQ, "vadd", "f16",
4216 v8f16, v8f16, fadd, 1>,
4217 Requires<[HasNEON,HasFullFP16]>;
4218 // VADDL : Vector Add Long (Q = D + D)
4219 defm VADDLs : N3VLExt_QHS<0,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD,
4220 "vaddl", "s", add, sext, 1>;
4221 defm VADDLu : N3VLExt_QHS<1,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD,
4222 "vaddl", "u", add, zext, 1>;
4223 // VADDW : Vector Add Wide (Q = Q + D)
4224 defm VADDWs : N3VW_QHS<0,1,0b0001,0, "vaddw", "s", add, sext, 0>;
4225 defm VADDWu : N3VW_QHS<1,1,0b0001,0, "vaddw", "u", add, zext, 0>;
4226 // VHADD : Vector Halving Add
4227 defm VHADDs : N3VInt_QHS<0, 0, 0b0000, 0, N3RegFrm,
4228 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
4229 "vhadd", "s", int_arm_neon_vhadds, 1>;
4230 defm VHADDu : N3VInt_QHS<1, 0, 0b0000, 0, N3RegFrm,
4231 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
4232 "vhadd", "u", int_arm_neon_vhaddu, 1>;
4233 // VRHADD : Vector Rounding Halving Add
4234 defm VRHADDs : N3VInt_QHS<0, 0, 0b0001, 0, N3RegFrm,
4235 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
4236 "vrhadd", "s", int_arm_neon_vrhadds, 1>;
4237 defm VRHADDu : N3VInt_QHS<1, 0, 0b0001, 0, N3RegFrm,
4238 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
4239 "vrhadd", "u", int_arm_neon_vrhaddu, 1>;
4240 // VQADD : Vector Saturating Add
4241 defm VQADDs : N3VInt_QHSD<0, 0, 0b0000, 1, N3RegFrm,
4242 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
4243 "vqadd", "s", int_arm_neon_vqadds, 1>;
4244 defm VQADDu : N3VInt_QHSD<1, 0, 0b0000, 1, N3RegFrm,
4245 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
4246 "vqadd", "u", int_arm_neon_vqaddu, 1>;
4247 // VADDHN : Vector Add and Narrow Returning High Half (D = Q + Q)
4248 defm VADDHN : N3VNInt_HSD<0,1,0b0100,0, "vaddhn", "i", null_frag, 1>;
4249 // VRADDHN : Vector Rounding Add and Narrow Returning High Half (D = Q + Q)
4250 defm VRADDHN : N3VNInt_HSD<1,1,0b0100,0, "vraddhn", "i",
4251 int_arm_neon_vraddhn, 1>;
4253 def : Pat<(v8i8 (trunc (NEONvshru (add (v8i16 QPR:$Vn), QPR:$Vm), 8))),
4254 (VADDHNv8i8 QPR:$Vn, QPR:$Vm)>;
4255 def : Pat<(v4i16 (trunc (NEONvshru (add (v4i32 QPR:$Vn), QPR:$Vm), 16))),
4256 (VADDHNv4i16 QPR:$Vn, QPR:$Vm)>;
4257 def : Pat<(v2i32 (trunc (NEONvshru (add (v2i64 QPR:$Vn), QPR:$Vm), 32))),
4258 (VADDHNv2i32 QPR:$Vn, QPR:$Vm)>;
4260 // Vector Multiply Operations.
4262 // VMUL : Vector Multiply (integer, polynomial and floating-point)
4263 defm VMUL : N3V_QHS<0, 0, 0b1001, 1, IIC_VMULi16D, IIC_VMULi32D,
4264 IIC_VMULi16Q, IIC_VMULi32Q, "vmul", "i", mul, 1>;
4265 def VMULpd : N3VDInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16D, "vmul",
4266 "p8", v8i8, v8i8, int_arm_neon_vmulp, 1>;
4267 def VMULpq : N3VQInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16Q, "vmul",
4268 "p8", v16i8, v16i8, int_arm_neon_vmulp, 1>;
4269 def VMULfd : N3VD<1, 0, 0b00, 0b1101, 1, IIC_VFMULD, "vmul", "f32",
4270 v2f32, v2f32, fmul, 1>;
4271 def VMULfq : N3VQ<1, 0, 0b00, 0b1101, 1, IIC_VFMULQ, "vmul", "f32",
4272 v4f32, v4f32, fmul, 1>;
4273 def VMULhd : N3VD<1, 0, 0b01, 0b1101, 1, IIC_VFMULD, "vmul", "f16",
4274 v4f16, v4f16, fmul, 1>,
4275 Requires<[HasNEON,HasFullFP16]>;
4276 def VMULhq : N3VQ<1, 0, 0b01, 0b1101, 1, IIC_VFMULQ, "vmul", "f16",
4277 v8f16, v8f16, fmul, 1>,
4278 Requires<[HasNEON,HasFullFP16]>;
4279 defm VMULsl : N3VSL_HS<0b1000, "vmul", mul>;
4280 def VMULslfd : N3VDSL<0b10, 0b1001, IIC_VBIND, "vmul", "f32", v2f32, fmul>;
4281 def VMULslfq : N3VQSL<0b10, 0b1001, IIC_VBINQ, "vmul", "f32", v4f32,
4283 def VMULslhd : N3VDSL16<0b01, 0b1001, "vmul", "f16", v4f16, fmul>,
4284 Requires<[HasNEON,HasFullFP16]>;
4285 def VMULslhq : N3VQSL16<0b01, 0b1001, "vmul", "f16", v8f16,
4287 Requires<[HasNEON,HasFullFP16]>;
4289 def : Pat<(v8i16 (mul (v8i16 QPR:$src1),
4290 (v8i16 (NEONvduplane (v8i16 QPR:$src2), imm:$lane)))),
4291 (v8i16 (VMULslv8i16 (v8i16 QPR:$src1),
4292 (v4i16 (EXTRACT_SUBREG QPR:$src2,
4293 (DSubReg_i16_reg imm:$lane))),
4294 (SubReg_i16_lane imm:$lane)))>;
4295 def : Pat<(v4i32 (mul (v4i32 QPR:$src1),
4296 (v4i32 (NEONvduplane (v4i32 QPR:$src2), imm:$lane)))),
4297 (v4i32 (VMULslv4i32 (v4i32 QPR:$src1),
4298 (v2i32 (EXTRACT_SUBREG QPR:$src2,
4299 (DSubReg_i32_reg imm:$lane))),
4300 (SubReg_i32_lane imm:$lane)))>;
4301 def : Pat<(v4f32 (fmul (v4f32 QPR:$src1),
4302 (v4f32 (NEONvduplane (v4f32 QPR:$src2), imm:$lane)))),
4303 (v4f32 (VMULslfq (v4f32 QPR:$src1),
4304 (v2f32 (EXTRACT_SUBREG QPR:$src2,
4305 (DSubReg_i32_reg imm:$lane))),
4306 (SubReg_i32_lane imm:$lane)))>;
4307 def : Pat<(v8f16 (fmul (v8f16 QPR:$src1),
4308 (v8f16 (NEONvduplane (v8f16 QPR:$src2), imm:$lane)))),
4309 (v8f16 (VMULslhq(v8f16 QPR:$src1),
4310 (v4f16 (EXTRACT_SUBREG QPR:$src2,
4311 (DSubReg_i16_reg imm:$lane))),
4312 (SubReg_i16_lane imm:$lane)))>;
4314 def : Pat<(v2f32 (fmul DPR:$Rn, (NEONvdup (f32 SPR:$Rm)))),
4316 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$Rm, ssub_0),
4318 def : Pat<(v4f16 (fmul DPR:$Rn, (NEONvdup (f16 HPR:$Rm)))),
4320 (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), HPR:$Rm, ssub_0),
4322 def : Pat<(v4f32 (fmul QPR:$Rn, (NEONvdup (f32 SPR:$Rm)))),
4324 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$Rm, ssub_0),
4326 def : Pat<(v8f16 (fmul QPR:$Rn, (NEONvdup (f16 HPR:$Rm)))),
4328 (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), HPR:$Rm, ssub_0),
4331 // VQDMULH : Vector Saturating Doubling Multiply Returning High Half
4332 defm VQDMULH : N3VInt_HS<0, 0, 0b1011, 0, N3RegFrm, IIC_VMULi16D, IIC_VMULi32D,
4333 IIC_VMULi16Q, IIC_VMULi32Q,
4334 "vqdmulh", "s", int_arm_neon_vqdmulh, 1>;
4335 defm VQDMULHsl: N3VIntSL_HS<0b1100, IIC_VMULi16D, IIC_VMULi32D,
4336 IIC_VMULi16Q, IIC_VMULi32Q,
4337 "vqdmulh", "s", int_arm_neon_vqdmulh>;
4338 def : Pat<(v8i16 (int_arm_neon_vqdmulh (v8i16 QPR:$src1),
4339 (v8i16 (NEONvduplane (v8i16 QPR:$src2),
4341 (v8i16 (VQDMULHslv8i16 (v8i16 QPR:$src1),
4342 (v4i16 (EXTRACT_SUBREG QPR:$src2,
4343 (DSubReg_i16_reg imm:$lane))),
4344 (SubReg_i16_lane imm:$lane)))>;
4345 def : Pat<(v4i32 (int_arm_neon_vqdmulh (v4i32 QPR:$src1),
4346 (v4i32 (NEONvduplane (v4i32 QPR:$src2),
4348 (v4i32 (VQDMULHslv4i32 (v4i32 QPR:$src1),
4349 (v2i32 (EXTRACT_SUBREG QPR:$src2,
4350 (DSubReg_i32_reg imm:$lane))),
4351 (SubReg_i32_lane imm:$lane)))>;
4353 // VQRDMULH : Vector Rounding Saturating Doubling Multiply Returning High Half
4354 defm VQRDMULH : N3VInt_HS<1, 0, 0b1011, 0, N3RegFrm,
4355 IIC_VMULi16D,IIC_VMULi32D,IIC_VMULi16Q,IIC_VMULi32Q,
4356 "vqrdmulh", "s", int_arm_neon_vqrdmulh, 1>;
4357 defm VQRDMULHsl : N3VIntSL_HS<0b1101, IIC_VMULi16D, IIC_VMULi32D,
4358 IIC_VMULi16Q, IIC_VMULi32Q,
4359 "vqrdmulh", "s", int_arm_neon_vqrdmulh>;
4360 def : Pat<(v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$src1),
4361 (v8i16 (NEONvduplane (v8i16 QPR:$src2),
4363 (v8i16 (VQRDMULHslv8i16 (v8i16 QPR:$src1),
4364 (v4i16 (EXTRACT_SUBREG QPR:$src2,
4365 (DSubReg_i16_reg imm:$lane))),
4366 (SubReg_i16_lane imm:$lane)))>;
4367 def : Pat<(v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$src1),
4368 (v4i32 (NEONvduplane (v4i32 QPR:$src2),
4370 (v4i32 (VQRDMULHslv4i32 (v4i32 QPR:$src1),
4371 (v2i32 (EXTRACT_SUBREG QPR:$src2,
4372 (DSubReg_i32_reg imm:$lane))),
4373 (SubReg_i32_lane imm:$lane)))>;
4375 // VMULL : Vector Multiply Long (integer and polynomial) (Q = D * D)
4376 let PostEncoderMethod = "NEONThumb2DataIPostEncoder",
4377 DecoderNamespace = "NEONData" in {
4378 defm VMULLs : N3VL_QHS<0,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D,
4379 "vmull", "s", NEONvmulls, 1>;
4380 defm VMULLu : N3VL_QHS<1,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D,
4381 "vmull", "u", NEONvmullu, 1>;
4382 def VMULLp8 : N3VLInt<0, 1, 0b00, 0b1110, 0, IIC_VMULi16D, "vmull", "p8",
4383 v8i16, v8i8, int_arm_neon_vmullp, 1>;
4384 def VMULLp64 : N3VLIntnp<0b00101, 0b10, 0b1110, 0, 0, NoItinerary,
4385 "vmull", "p64", v2i64, v1i64, int_arm_neon_vmullp, 1>,
4386 Requires<[HasV8, HasCrypto]>;
4388 defm VMULLsls : N3VLSL_HS<0, 0b1010, IIC_VMULi16D, "vmull", "s", NEONvmulls>;
4389 defm VMULLslu : N3VLSL_HS<1, 0b1010, IIC_VMULi16D, "vmull", "u", NEONvmullu>;
4391 // VQDMULL : Vector Saturating Doubling Multiply Long (Q = D * D)
4392 defm VQDMULL : N3VLInt_HS<0,1,0b1101,0, IIC_VMULi16D, IIC_VMULi32D,
4393 "vqdmull", "s", int_arm_neon_vqdmull, 1>;
4394 defm VQDMULLsl: N3VLIntSL_HS<0, 0b1011, IIC_VMULi16D,
4395 "vqdmull", "s", int_arm_neon_vqdmull>;
4397 // Vector Multiply-Accumulate and Multiply-Subtract Operations.
4399 // VMLA : Vector Multiply Accumulate (integer and floating-point)
4400 defm VMLA : N3VMulOp_QHS<0, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
4401 IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>;
4402 def VMLAfd : N3VDMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla", "f32",
4403 v2f32, fmul_su, fadd_mlx>,
4404 Requires<[HasNEON, UseFPVMLx]>;
4405 def VMLAfq : N3VQMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACQ, "vmla", "f32",
4406 v4f32, fmul_su, fadd_mlx>,
4407 Requires<[HasNEON, UseFPVMLx]>;
4408 def VMLAhd : N3VDMulOp<0, 0, 0b01, 0b1101, 1, IIC_VMACD, "vmla", "f16",
4409 v4f16, fmul_su, fadd_mlx>,
4410 Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
4411 def VMLAhq : N3VQMulOp<0, 0, 0b01, 0b1101, 1, IIC_VMACQ, "vmla", "f16",
4412 v8f16, fmul_su, fadd_mlx>,
4413 Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
4414 defm VMLAsl : N3VMulOpSL_HS<0b0000, IIC_VMACi16D, IIC_VMACi32D,
4415 IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>;
4416 def VMLAslfd : N3VDMulOpSL<0b10, 0b0001, IIC_VMACD, "vmla", "f32",
4417 v2f32, fmul_su, fadd_mlx>,
4418 Requires<[HasNEON, UseFPVMLx]>;
4419 def VMLAslfq : N3VQMulOpSL<0b10, 0b0001, IIC_VMACQ, "vmla", "f32",
4420 v4f32, v2f32, fmul_su, fadd_mlx>,
4421 Requires<[HasNEON, UseFPVMLx]>;
4422 def VMLAslhd : N3VDMulOpSL16<0b01, 0b0001, IIC_VMACD, "vmla", "f16",
4424 Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
4425 def VMLAslhq : N3VQMulOpSL16<0b01, 0b0001, IIC_VMACQ, "vmla", "f16",
4426 v8f16, v4f16, fmul, fadd>,
4427 Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
4429 def : Pat<(v8i16 (add (v8i16 QPR:$src1),
4430 (mul (v8i16 QPR:$src2),
4431 (v8i16 (NEONvduplane (v8i16 QPR:$src3), imm:$lane))))),
4432 (v8i16 (VMLAslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2),
4433 (v4i16 (EXTRACT_SUBREG QPR:$src3,
4434 (DSubReg_i16_reg imm:$lane))),
4435 (SubReg_i16_lane imm:$lane)))>;
4437 def : Pat<(v4i32 (add (v4i32 QPR:$src1),
4438 (mul (v4i32 QPR:$src2),
4439 (v4i32 (NEONvduplane (v4i32 QPR:$src3), imm:$lane))))),
4440 (v4i32 (VMLAslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2),
4441 (v2i32 (EXTRACT_SUBREG QPR:$src3,
4442 (DSubReg_i32_reg imm:$lane))),
4443 (SubReg_i32_lane imm:$lane)))>;
4445 def : Pat<(v4f32 (fadd_mlx (v4f32 QPR:$src1),
4446 (fmul_su (v4f32 QPR:$src2),
4447 (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))),
4448 (v4f32 (VMLAslfq (v4f32 QPR:$src1),
4450 (v2f32 (EXTRACT_SUBREG QPR:$src3,
4451 (DSubReg_i32_reg imm:$lane))),
4452 (SubReg_i32_lane imm:$lane)))>,
4453 Requires<[HasNEON, UseFPVMLx]>;
4455 // VMLAL : Vector Multiply Accumulate Long (Q += D * D)
4456 defm VMLALs : N3VLMulOp_QHS<0,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D,
4457 "vmlal", "s", NEONvmulls, add>;
4458 defm VMLALu : N3VLMulOp_QHS<1,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D,
4459 "vmlal", "u", NEONvmullu, add>;
4461 defm VMLALsls : N3VLMulOpSL_HS<0, 0b0010, "vmlal", "s", NEONvmulls, add>;
4462 defm VMLALslu : N3VLMulOpSL_HS<1, 0b0010, "vmlal", "u", NEONvmullu, add>;
4464 let Predicates = [HasNEON, HasV8_1a] in {
4465 // v8.1a Neon Rounding Double Multiply-Op vector operations,
4466 // VQRDMLAH : Vector Saturating Rounding Doubling Multiply Accumulate Long
4468 defm VQRDMLAH : N3VInt3_HS<1, 0, 0b1011, 1, IIC_VMACi16D, IIC_VMACi32D,
4469 IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlah", "s",
4471 def : Pat<(v4i16 (int_arm_neon_vqadds
4473 (v4i16 (int_arm_neon_vqrdmulh (v4i16 DPR:$Vn),
4474 (v4i16 DPR:$Vm))))),
4475 (v4i16 (VQRDMLAHv4i16 DPR:$src1, DPR:$Vn, DPR:$Vm))>;
4476 def : Pat<(v2i32 (int_arm_neon_vqadds
4478 (v2i32 (int_arm_neon_vqrdmulh (v2i32 DPR:$Vn),
4479 (v2i32 DPR:$Vm))))),
4480 (v2i32 (VQRDMLAHv2i32 DPR:$src1, DPR:$Vn, DPR:$Vm))>;
4481 def : Pat<(v8i16 (int_arm_neon_vqadds
4483 (v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$Vn),
4484 (v8i16 QPR:$Vm))))),
4485 (v8i16 (VQRDMLAHv8i16 QPR:$src1, QPR:$Vn, QPR:$Vm))>;
4486 def : Pat<(v4i32 (int_arm_neon_vqadds
4488 (v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$Vn),
4489 (v4i32 QPR:$Vm))))),
4490 (v4i32 (VQRDMLAHv4i32 QPR:$src1, QPR:$Vn, QPR:$Vm))>;
4492 defm VQRDMLAHsl : N3VMulOpSL_HS<0b1110, IIC_VMACi16D, IIC_VMACi32D,
4493 IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlah", "s",
4495 def : Pat<(v4i16 (int_arm_neon_vqadds
4497 (v4i16 (int_arm_neon_vqrdmulh
4499 (v4i16 (NEONvduplane (v4i16 DPR_8:$Vm),
4501 (v4i16 (VQRDMLAHslv4i16 DPR:$src1, DPR:$Vn, DPR_8:$Vm,
4503 def : Pat<(v2i32 (int_arm_neon_vqadds
4505 (v2i32 (int_arm_neon_vqrdmulh
4507 (v2i32 (NEONvduplane (v2i32 DPR_VFP2:$Vm),
4509 (v2i32 (VQRDMLAHslv2i32 DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm,
4511 def : Pat<(v8i16 (int_arm_neon_vqadds
4513 (v8i16 (int_arm_neon_vqrdmulh
4515 (v8i16 (NEONvduplane (v8i16 QPR:$src3),
4517 (v8i16 (VQRDMLAHslv8i16 (v8i16 QPR:$src1),
4519 (v4i16 (EXTRACT_SUBREG
4521 (DSubReg_i16_reg imm:$lane))),
4522 (SubReg_i16_lane imm:$lane)))>;
4523 def : Pat<(v4i32 (int_arm_neon_vqadds
4525 (v4i32 (int_arm_neon_vqrdmulh
4527 (v4i32 (NEONvduplane (v4i32 QPR:$src3),
4529 (v4i32 (VQRDMLAHslv4i32 (v4i32 QPR:$src1),
4531 (v2i32 (EXTRACT_SUBREG
4533 (DSubReg_i32_reg imm:$lane))),
4534 (SubReg_i32_lane imm:$lane)))>;
4536 // VQRDMLSH : Vector Saturating Rounding Doubling Multiply Subtract Long
4538 defm VQRDMLSH : N3VInt3_HS<1, 0, 0b1100, 1, IIC_VMACi16D, IIC_VMACi32D,
4539 IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlsh", "s",
4541 def : Pat<(v4i16 (int_arm_neon_vqsubs
4543 (v4i16 (int_arm_neon_vqrdmulh (v4i16 DPR:$Vn),
4544 (v4i16 DPR:$Vm))))),
4545 (v4i16 (VQRDMLSHv4i16 DPR:$src1, DPR:$Vn, DPR:$Vm))>;
4546 def : Pat<(v2i32 (int_arm_neon_vqsubs
4548 (v2i32 (int_arm_neon_vqrdmulh (v2i32 DPR:$Vn),
4549 (v2i32 DPR:$Vm))))),
4550 (v2i32 (VQRDMLSHv2i32 DPR:$src1, DPR:$Vn, DPR:$Vm))>;
4551 def : Pat<(v8i16 (int_arm_neon_vqsubs
4553 (v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$Vn),
4554 (v8i16 QPR:$Vm))))),
4555 (v8i16 (VQRDMLSHv8i16 QPR:$src1, QPR:$Vn, QPR:$Vm))>;
4556 def : Pat<(v4i32 (int_arm_neon_vqsubs
4558 (v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$Vn),
4559 (v4i32 QPR:$Vm))))),
4560 (v4i32 (VQRDMLSHv4i32 QPR:$src1, QPR:$Vn, QPR:$Vm))>;
4562 defm VQRDMLSHsl : N3VMulOpSL_HS<0b1111, IIC_VMACi16D, IIC_VMACi32D,
4563 IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlsh", "s",
4565 def : Pat<(v4i16 (int_arm_neon_vqsubs
4567 (v4i16 (int_arm_neon_vqrdmulh
4569 (v4i16 (NEONvduplane (v4i16 DPR_8:$Vm),
4571 (v4i16 (VQRDMLSHslv4i16 DPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane))>;
4572 def : Pat<(v2i32 (int_arm_neon_vqsubs
4574 (v2i32 (int_arm_neon_vqrdmulh
4576 (v2i32 (NEONvduplane (v2i32 DPR_VFP2:$Vm),
4578 (v2i32 (VQRDMLSHslv2i32 DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm,
4580 def : Pat<(v8i16 (int_arm_neon_vqsubs
4582 (v8i16 (int_arm_neon_vqrdmulh
4584 (v8i16 (NEONvduplane (v8i16 QPR:$src3),
4586 (v8i16 (VQRDMLSHslv8i16 (v8i16 QPR:$src1),
4588 (v4i16 (EXTRACT_SUBREG
4590 (DSubReg_i16_reg imm:$lane))),
4591 (SubReg_i16_lane imm:$lane)))>;
4592 def : Pat<(v4i32 (int_arm_neon_vqsubs
4594 (v4i32 (int_arm_neon_vqrdmulh
4596 (v4i32 (NEONvduplane (v4i32 QPR:$src3),
4598 (v4i32 (VQRDMLSHslv4i32 (v4i32 QPR:$src1),
4600 (v2i32 (EXTRACT_SUBREG
4602 (DSubReg_i32_reg imm:$lane))),
4603 (SubReg_i32_lane imm:$lane)))>;
4605 // VQDMLAL : Vector Saturating Doubling Multiply Accumulate Long (Q += D * D)
4606 defm VQDMLAL : N3VLInt3_HS<0, 1, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
4607 "vqdmlal", "s", null_frag>;
4608 defm VQDMLALsl: N3VLInt3SL_HS<0, 0b0011, "vqdmlal", "s", null_frag>;
4610 def : Pat<(v4i32 (int_arm_neon_vqadds (v4i32 QPR:$src1),
4611 (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn),
4612 (v4i16 DPR:$Vm))))),
4613 (VQDMLALv4i32 QPR:$src1, DPR:$Vn, DPR:$Vm)>;
4614 def : Pat<(v2i64 (int_arm_neon_vqadds (v2i64 QPR:$src1),
4615 (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn),
4616 (v2i32 DPR:$Vm))))),
4617 (VQDMLALv2i64 QPR:$src1, DPR:$Vn, DPR:$Vm)>;
4618 def : Pat<(v4i32 (int_arm_neon_vqadds (v4i32 QPR:$src1),
4619 (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn),
4620 (v4i16 (NEONvduplane (v4i16 DPR_8:$Vm),
4622 (VQDMLALslv4i16 QPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane)>;
4623 def : Pat<(v2i64 (int_arm_neon_vqadds (v2i64 QPR:$src1),
4624 (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn),
4625 (v2i32 (NEONvduplane (v2i32 DPR_VFP2:$Vm),
4627 (VQDMLALslv2i32 QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, imm:$lane)>;
4629 // VMLS : Vector Multiply Subtract (integer and floating-point)
4630 defm VMLS : N3VMulOp_QHS<1, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
4631 IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>;
4632 def VMLSfd : N3VDMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls", "f32",
4633 v2f32, fmul_su, fsub_mlx>,
4634 Requires<[HasNEON, UseFPVMLx]>;
4635 def VMLSfq : N3VQMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACQ, "vmls", "f32",
4636 v4f32, fmul_su, fsub_mlx>,
4637 Requires<[HasNEON, UseFPVMLx]>;
4638 def VMLShd : N3VDMulOp<0, 0, 0b11, 0b1101, 1, IIC_VMACD, "vmls", "f16",
4640 Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
4641 def VMLShq : N3VQMulOp<0, 0, 0b11, 0b1101, 1, IIC_VMACQ, "vmls", "f16",
4643 Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
4644 defm VMLSsl : N3VMulOpSL_HS<0b0100, IIC_VMACi16D, IIC_VMACi32D,
4645 IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>;
4646 def VMLSslfd : N3VDMulOpSL<0b10, 0b0101, IIC_VMACD, "vmls", "f32",
4647 v2f32, fmul_su, fsub_mlx>,
4648 Requires<[HasNEON, UseFPVMLx]>;
4649 def VMLSslfq : N3VQMulOpSL<0b10, 0b0101, IIC_VMACQ, "vmls", "f32",
4650 v4f32, v2f32, fmul_su, fsub_mlx>,
4651 Requires<[HasNEON, UseFPVMLx]>;
4652 def VMLSslhd : N3VDMulOpSL16<0b01, 0b0101, IIC_VMACD, "vmls", "f16",
4654 Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
4655 def VMLSslhq : N3VQMulOpSL16<0b01, 0b0101, IIC_VMACQ, "vmls", "f16",
4656 v8f16, v4f16, fmul, fsub>,
4657 Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
4659 def : Pat<(v8i16 (sub (v8i16 QPR:$src1),
4660 (mul (v8i16 QPR:$src2),
4661 (v8i16 (NEONvduplane (v8i16 QPR:$src3), imm:$lane))))),
4662 (v8i16 (VMLSslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2),
4663 (v4i16 (EXTRACT_SUBREG QPR:$src3,
4664 (DSubReg_i16_reg imm:$lane))),
4665 (SubReg_i16_lane imm:$lane)))>;
4667 def : Pat<(v4i32 (sub (v4i32 QPR:$src1),
4668 (mul (v4i32 QPR:$src2),
4669 (v4i32 (NEONvduplane (v4i32 QPR:$src3), imm:$lane))))),
4670 (v4i32 (VMLSslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2),
4671 (v2i32 (EXTRACT_SUBREG QPR:$src3,
4672 (DSubReg_i32_reg imm:$lane))),
4673 (SubReg_i32_lane imm:$lane)))>;
4675 def : Pat<(v4f32 (fsub_mlx (v4f32 QPR:$src1),
4676 (fmul_su (v4f32 QPR:$src2),
4677 (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))),
4678 (v4f32 (VMLSslfq (v4f32 QPR:$src1), (v4f32 QPR:$src2),
4679 (v2f32 (EXTRACT_SUBREG QPR:$src3,
4680 (DSubReg_i32_reg imm:$lane))),
4681 (SubReg_i32_lane imm:$lane)))>,
4682 Requires<[HasNEON, UseFPVMLx]>;
4684 // VMLSL : Vector Multiply Subtract Long (Q -= D * D)
4685 defm VMLSLs : N3VLMulOp_QHS<0,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D,
4686 "vmlsl", "s", NEONvmulls, sub>;
4687 defm VMLSLu : N3VLMulOp_QHS<1,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D,
4688 "vmlsl", "u", NEONvmullu, sub>;
4690 defm VMLSLsls : N3VLMulOpSL_HS<0, 0b0110, "vmlsl", "s", NEONvmulls, sub>;
4691 defm VMLSLslu : N3VLMulOpSL_HS<1, 0b0110, "vmlsl", "u", NEONvmullu, sub>;
4693 // VQDMLSL : Vector Saturating Doubling Multiply Subtract Long (Q -= D * D)
4694 defm VQDMLSL : N3VLInt3_HS<0, 1, 0b1011, 0, IIC_VMACi16D, IIC_VMACi32D,
4695 "vqdmlsl", "s", null_frag>;
4696 defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b0111, "vqdmlsl", "s", null_frag>;
4698 def : Pat<(v4i32 (int_arm_neon_vqsubs (v4i32 QPR:$src1),
4699 (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn),
4700 (v4i16 DPR:$Vm))))),
4701 (VQDMLSLv4i32 QPR:$src1, DPR:$Vn, DPR:$Vm)>;
4702 def : Pat<(v2i64 (int_arm_neon_vqsubs (v2i64 QPR:$src1),
4703 (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn),
4704 (v2i32 DPR:$Vm))))),
4705 (VQDMLSLv2i64 QPR:$src1, DPR:$Vn, DPR:$Vm)>;
4706 def : Pat<(v4i32 (int_arm_neon_vqsubs (v4i32 QPR:$src1),
4707 (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn),
4708 (v4i16 (NEONvduplane (v4i16 DPR_8:$Vm),
4710 (VQDMLSLslv4i16 QPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane)>;
4711 def : Pat<(v2i64 (int_arm_neon_vqsubs (v2i64 QPR:$src1),
4712 (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn),
4713 (v2i32 (NEONvduplane (v2i32 DPR_VFP2:$Vm),
4715 (VQDMLSLslv2i32 QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, imm:$lane)>;
4717 // Fused Vector Multiply-Accumulate and Fused Multiply-Subtract Operations.
4718 def VFMAfd : N3VDMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACD, "vfma", "f32",
4719 v2f32, fmul_su, fadd_mlx>,
4720 Requires<[HasNEON,HasVFP4,UseFusedMAC]>;
4722 def VFMAfq : N3VQMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACQ, "vfma", "f32",
4723 v4f32, fmul_su, fadd_mlx>,
4724 Requires<[HasNEON,HasVFP4,UseFusedMAC]>;
4725 def VFMAhd : N3VDMulOp<0, 0, 0b01, 0b1100, 1, IIC_VFMACD, "vfma", "f16",
4727 Requires<[HasNEON,HasFullFP16,UseFusedMAC]>;
4729 def VFMAhq : N3VQMulOp<0, 0, 0b01, 0b1100, 1, IIC_VFMACQ, "vfma", "f16",
4731 Requires<[HasNEON,HasFullFP16,UseFusedMAC]>;
4733 // Fused Vector Multiply Subtract (floating-point)
4734 def VFMSfd : N3VDMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACD, "vfms", "f32",
4735 v2f32, fmul_su, fsub_mlx>,
4736 Requires<[HasNEON,HasVFP4,UseFusedMAC]>;
4737 def VFMSfq : N3VQMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACQ, "vfms", "f32",
4738 v4f32, fmul_su, fsub_mlx>,
4739 Requires<[HasNEON,HasVFP4,UseFusedMAC]>;
4740 def VFMShd : N3VDMulOp<0, 0, 0b11, 0b1100, 1, IIC_VFMACD, "vfms", "f16",
4742 Requires<[HasNEON,HasFullFP16,UseFusedMAC]>;
4743 def VFMShq : N3VQMulOp<0, 0, 0b11, 0b1100, 1, IIC_VFMACQ, "vfms", "f16",
4745 Requires<[HasNEON,HasFullFP16,UseFusedMAC]>;
4747 // Match @llvm.fma.* intrinsics
4748 def : Pat<(v4f16 (fma DPR:$Vn, DPR:$Vm, DPR:$src1)),
4749 (VFMAhd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
4750 Requires<[HasNEON,HasFullFP16]>;
4751 def : Pat<(v8f16 (fma QPR:$Vn, QPR:$Vm, QPR:$src1)),
4752 (VFMAhq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
4753 Requires<[HasNEON,HasFullFP16]>;
4754 def : Pat<(v2f32 (fma DPR:$Vn, DPR:$Vm, DPR:$src1)),
4755 (VFMAfd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
4756 Requires<[HasVFP4]>;
4757 def : Pat<(v4f32 (fma QPR:$Vn, QPR:$Vm, QPR:$src1)),
4758 (VFMAfq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
4759 Requires<[HasVFP4]>;
4760 def : Pat<(v2f32 (fma (fneg DPR:$Vn), DPR:$Vm, DPR:$src1)),
4761 (VFMSfd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
4762 Requires<[HasVFP4]>;
4763 def : Pat<(v4f32 (fma (fneg QPR:$Vn), QPR:$Vm, QPR:$src1)),
4764 (VFMSfq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
4765 Requires<[HasVFP4]>;
4767 // ARMv8.2a dot product instructions.
4768 // We put them in the VFPV8 decoder namespace because the ARM and Thumb
4769 // encodings are the same and thus no further bit twiddling is necessary
4770 // in the disassembler.
4771 class VDOT<bit op6, bit op4, RegisterClass RegTy, string Asm, string AsmTy,
4772 ValueType AccumTy, ValueType InputTy,
4773 SDPatternOperator OpNode> :
4774 N3Vnp<0b11000, 0b10, 0b1101, op6, op4, (outs RegTy:$dst),
4775 (ins RegTy:$Vd, RegTy:$Vn, RegTy:$Vm), N3RegFrm, IIC_VDOTPROD,
4777 [(set (AccumTy RegTy:$dst),
4778 (OpNode (AccumTy RegTy:$Vd),
4779 (InputTy RegTy:$Vn),
4780 (InputTy RegTy:$Vm)))]> {
4781 let Predicates = [HasDotProd];
4782 let DecoderNamespace = "VFPV8";
4783 let Constraints = "$dst = $Vd";
4786 def VUDOTD : VDOT<0, 1, DPR, "vudot", "u8", v2i32, v8i8, int_arm_neon_udot>;
4787 def VSDOTD : VDOT<0, 0, DPR, "vsdot", "s8", v2i32, v8i8, int_arm_neon_sdot>;
4788 def VUDOTQ : VDOT<1, 1, QPR, "vudot", "u8", v4i32, v16i8, int_arm_neon_udot>;
4789 def VSDOTQ : VDOT<1, 0, QPR, "vsdot", "s8", v4i32, v16i8, int_arm_neon_sdot>;
4791 // Indexed dot product instructions:
4792 multiclass DOTI<string opc, string dt, bit Q, bit U, RegisterClass Ty,
4793 ValueType AccumType, ValueType InputType, SDPatternOperator OpNode,
4795 def "" : N3Vnp<0b11100, 0b10, 0b1101, Q, U, (outs Ty:$dst),
4796 (ins Ty:$Vd, Ty:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
4797 N3RegFrm, IIC_VDOTPROD, opc, dt, []> {
4800 let AsmString = !strconcat(opc, ".", dt, "\t$Vd, $Vn, $Vm$lane");
4801 let Constraints = "$dst = $Vd";
4802 let Predicates = [HasDotProd];
4803 let DecoderNamespace = "VFPV8";
4807 (AccumType (OpNode (AccumType Ty:$Vd),
4809 (InputType (bitconvert (AccumType
4810 (NEONvduplane (AccumType Ty:$Vm),
4811 VectorIndex32:$lane)))))),
4812 (!cast<Instruction>(NAME) Ty:$Vd, Ty:$Vn, RHS, VectorIndex32:$lane)>;
4815 defm VUDOTDI : DOTI<"vudot", "u8", 0b0, 0b1, DPR, v2i32, v8i8,
4816 int_arm_neon_udot, (v2i32 DPR_VFP2:$Vm)>;
4817 defm VSDOTDI : DOTI<"vsdot", "s8", 0b0, 0b0, DPR, v2i32, v8i8,
4818 int_arm_neon_sdot, (v2i32 DPR_VFP2:$Vm)>;
4819 defm VUDOTQI : DOTI<"vudot", "u8", 0b1, 0b1, QPR, v4i32, v16i8,
4820 int_arm_neon_udot, (EXTRACT_SUBREG QPR:$Vm, dsub_0)>;
4821 defm VSDOTQI : DOTI<"vsdot", "s8", 0b1, 0b0, QPR, v4i32, v16i8,
4822 int_arm_neon_sdot, (EXTRACT_SUBREG QPR:$Vm, dsub_0)>;
4825 // ARMv8.3 complex operations
4826 class BaseN3VCP8ComplexTied<bit op21, bit op4, bit s, bit q,
4827 InstrItinClass itin, dag oops, dag iops,
4828 string opc, string dt, list<dag> pattern>
4829 : N3VCP8<{?,?}, {op21,s}, q, op4, oops,
4830 iops, itin, opc, dt, "$Vd, $Vn, $Vm, $rot", "$src1 = $Vd", pattern>{
4832 let Inst{24-23} = rot;
4835 class BaseN3VCP8ComplexOdd<bit op23, bit op21, bit op4, bit s, bit q,
4836 InstrItinClass itin, dag oops, dag iops, string opc,
4837 string dt, list<dag> pattern>
4838 : N3VCP8<{?,op23}, {op21,s}, q, op4, oops,
4839 iops, itin, opc, dt, "$Vd, $Vn, $Vm, $rot", "", pattern> {
4844 class BaseN3VCP8ComplexTiedLane32<bit op4, bit s, bit q, InstrItinClass itin,
4845 dag oops, dag iops, string opc, string dt,
4847 : N3VLaneCP8<s, {?,?}, q, op4, oops, iops, itin, opc, dt,
4848 "$Vd, $Vn, $Vm$lane, $rot", "$src1 = $Vd", pattern> {
4852 let Inst{21-20} = rot;
4856 class BaseN3VCP8ComplexTiedLane64<bit op4, bit s, bit q, InstrItinClass itin,
4857 dag oops, dag iops, string opc, string dt,
4859 : N3VLaneCP8<s, {?,?}, q, op4, oops, iops, itin, opc, dt,
4860 "$Vd, $Vn, $Vm$lane, $rot", "$src1 = $Vd", pattern> {
4864 let Inst{21-20} = rot;
4865 let Inst{5} = Vm{4};
4866 // This is needed because the lane operand does not have any bits in the
4867 // encoding (it only has one possible value), so we need to manually set it
4868 // to it's default value.
4869 let DecoderMethod = "DecodeNEONComplexLane64Instruction";
4872 multiclass N3VCP8ComplexTied<bit op21, bit op4,
4873 string OpcodeStr, SDPatternOperator Op> {
4874 let Predicates = [HasNEON,HasV8_3a,HasFullFP16] in {
4875 def v4f16 : BaseN3VCP8ComplexTied<op21, op4, 0, 0, IIC_VMACD, (outs DPR:$Vd),
4876 (ins DPR:$src1, DPR:$Vn, DPR:$Vm, complexrotateop:$rot),
4877 OpcodeStr, "f16", []>;
4878 def v8f16 : BaseN3VCP8ComplexTied<op21, op4, 0, 1, IIC_VMACQ, (outs QPR:$Vd),
4879 (ins QPR:$src1, QPR:$Vn, QPR:$Vm, complexrotateop:$rot),
4880 OpcodeStr, "f16", []>;
4882 let Predicates = [HasNEON,HasV8_3a] in {
4883 def v2f32 : BaseN3VCP8ComplexTied<op21, op4, 1, 0, IIC_VMACD, (outs DPR:$Vd),
4884 (ins DPR:$src1, DPR:$Vn, DPR:$Vm, complexrotateop:$rot),
4885 OpcodeStr, "f32", []>;
4886 def v4f32 : BaseN3VCP8ComplexTied<op21, op4, 1, 1, IIC_VMACQ, (outs QPR:$Vd),
4887 (ins QPR:$src1, QPR:$Vn, QPR:$Vm, complexrotateop:$rot),
4888 OpcodeStr, "f32", []>;
4892 multiclass N3VCP8ComplexOdd<bit op23, bit op21, bit op4,
4893 string OpcodeStr, SDPatternOperator Op> {
4894 let Predicates = [HasNEON,HasV8_3a,HasFullFP16] in {
4895 def v4f16 : BaseN3VCP8ComplexOdd<op23, op21, op4, 0, 0, IIC_VMACD,
4897 (ins DPR:$Vn, DPR:$Vm, complexrotateopodd:$rot),
4898 OpcodeStr, "f16", []>;
4899 def v8f16 : BaseN3VCP8ComplexOdd<op23, op21, op4, 0, 1, IIC_VMACQ,
4901 (ins QPR:$Vn, QPR:$Vm, complexrotateopodd:$rot),
4902 OpcodeStr, "f16", []>;
4904 let Predicates = [HasNEON,HasV8_3a] in {
4905 def v2f32 : BaseN3VCP8ComplexOdd<op23, op21, op4, 1, 0, IIC_VMACD,
4907 (ins DPR:$Vn, DPR:$Vm, complexrotateopodd:$rot),
4908 OpcodeStr, "f32", []>;
4909 def v4f32 : BaseN3VCP8ComplexOdd<op23, op21, op4, 1, 1, IIC_VMACQ,
4911 (ins QPR:$Vn, QPR:$Vm, complexrotateopodd:$rot),
4912 OpcodeStr, "f32", []>;
4916 // These instructions index by pairs of lanes, so the VectorIndexes are twice
4917 // as wide as the data types.
4918 multiclass N3VCP8ComplexTiedLane<bit op4, string OpcodeStr,
4919 SDPatternOperator Op> {
4920 let Predicates = [HasNEON,HasV8_3a,HasFullFP16] in {
4921 def v4f16_indexed : BaseN3VCP8ComplexTiedLane32<op4, 0, 0, IIC_VMACD,
4923 (ins DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm,
4924 VectorIndex32:$lane, complexrotateop:$rot),
4925 OpcodeStr, "f16", []>;
4926 def v8f16_indexed : BaseN3VCP8ComplexTiedLane32<op4, 0, 1, IIC_VMACQ,
4928 (ins QPR:$src1, QPR:$Vn, DPR_VFP2:$Vm,
4929 VectorIndex32:$lane, complexrotateop:$rot),
4930 OpcodeStr, "f16", []>;
4932 let Predicates = [HasNEON,HasV8_3a] in {
4933 def v2f32_indexed : BaseN3VCP8ComplexTiedLane64<op4, 1, 0, IIC_VMACD,
4935 (ins DPR:$src1, DPR:$Vn, DPR:$Vm, VectorIndex64:$lane,
4936 complexrotateop:$rot),
4937 OpcodeStr, "f32", []>;
4938 def v4f32_indexed : BaseN3VCP8ComplexTiedLane64<op4, 1, 1, IIC_VMACQ,
4940 (ins QPR:$src1, QPR:$Vn, DPR:$Vm, VectorIndex64:$lane,
4941 complexrotateop:$rot),
4942 OpcodeStr, "f32", []>;
4946 defm VCMLA : N3VCP8ComplexTied<1, 0, "vcmla", null_frag>;
4947 defm VCADD : N3VCP8ComplexOdd<1, 0, 0, "vcadd", null_frag>;
4948 defm VCMLA : N3VCP8ComplexTiedLane<0, "vcmla", null_frag>;
4950 // Vector Subtract Operations.
4952 // VSUB : Vector Subtract (integer and floating-point)
4953 defm VSUB : N3V_QHSD<1, 0, 0b1000, 0, IIC_VSUBiD, IIC_VSUBiQ,
4954 "vsub", "i", sub, 0>;
4955 def VSUBfd : N3VD<0, 0, 0b10, 0b1101, 0, IIC_VBIND, "vsub", "f32",
4956 v2f32, v2f32, fsub, 0>;
4957 def VSUBfq : N3VQ<0, 0, 0b10, 0b1101, 0, IIC_VBINQ, "vsub", "f32",
4958 v4f32, v4f32, fsub, 0>;
4959 def VSUBhd : N3VD<0, 0, 0b11, 0b1101, 0, IIC_VBIND, "vsub", "f16",
4960 v4f16, v4f16, fsub, 0>,
4961 Requires<[HasNEON,HasFullFP16]>;
4962 def VSUBhq : N3VQ<0, 0, 0b11, 0b1101, 0, IIC_VBINQ, "vsub", "f16",
4963 v8f16, v8f16, fsub, 0>,
4964 Requires<[HasNEON,HasFullFP16]>;
4965 // VSUBL : Vector Subtract Long (Q = D - D)
4966 defm VSUBLs : N3VLExt_QHS<0,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD,
4967 "vsubl", "s", sub, sext, 0>;
4968 defm VSUBLu : N3VLExt_QHS<1,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD,
4969 "vsubl", "u", sub, zext, 0>;
4970 // VSUBW : Vector Subtract Wide (Q = Q - D)
4971 defm VSUBWs : N3VW_QHS<0,1,0b0011,0, "vsubw", "s", sub, sext, 0>;
4972 defm VSUBWu : N3VW_QHS<1,1,0b0011,0, "vsubw", "u", sub, zext, 0>;
4973 // VHSUB : Vector Halving Subtract
4974 defm VHSUBs : N3VInt_QHS<0, 0, 0b0010, 0, N3RegFrm,
4975 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
4976 "vhsub", "s", int_arm_neon_vhsubs, 0>;
4977 defm VHSUBu : N3VInt_QHS<1, 0, 0b0010, 0, N3RegFrm,
4978 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
4979 "vhsub", "u", int_arm_neon_vhsubu, 0>;
4980 // VQSUB : Vector Saturing Subtract
4981 defm VQSUBs : N3VInt_QHSD<0, 0, 0b0010, 1, N3RegFrm,
4982 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
4983 "vqsub", "s", int_arm_neon_vqsubs, 0>;
4984 defm VQSUBu : N3VInt_QHSD<1, 0, 0b0010, 1, N3RegFrm,
4985 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
4986 "vqsub", "u", int_arm_neon_vqsubu, 0>;
4987 // VSUBHN : Vector Subtract and Narrow Returning High Half (D = Q - Q)
4988 defm VSUBHN : N3VNInt_HSD<0,1,0b0110,0, "vsubhn", "i", null_frag, 0>;
4989 // VRSUBHN : Vector Rounding Subtract and Narrow Returning High Half (D=Q-Q)
4990 defm VRSUBHN : N3VNInt_HSD<1,1,0b0110,0, "vrsubhn", "i",
4991 int_arm_neon_vrsubhn, 0>;
4993 def : Pat<(v8i8 (trunc (NEONvshru (sub (v8i16 QPR:$Vn), QPR:$Vm), 8))),
4994 (VSUBHNv8i8 QPR:$Vn, QPR:$Vm)>;
4995 def : Pat<(v4i16 (trunc (NEONvshru (sub (v4i32 QPR:$Vn), QPR:$Vm), 16))),
4996 (VSUBHNv4i16 QPR:$Vn, QPR:$Vm)>;
4997 def : Pat<(v2i32 (trunc (NEONvshru (sub (v2i64 QPR:$Vn), QPR:$Vm), 32))),
4998 (VSUBHNv2i32 QPR:$Vn, QPR:$Vm)>;
5000 // Vector Comparisons.
5002 // VCEQ : Vector Compare Equal
5003 defm VCEQ : N3V_QHS<1, 0, 0b1000, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
5004 IIC_VSUBi4Q, "vceq", "i", NEONvceq, 1>;
5005 def VCEQfd : N3VD<0,0,0b00,0b1110,0, IIC_VBIND, "vceq", "f32", v2i32, v2f32,
5007 def VCEQfq : N3VQ<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq", "f32", v4i32, v4f32,
5009 def VCEQhd : N3VD<0,0,0b01,0b1110,0, IIC_VBIND, "vceq", "f16", v4i16, v4f16,
5011 Requires<[HasNEON, HasFullFP16]>;
5012 def VCEQhq : N3VQ<0,0,0b01,0b1110,0, IIC_VBINQ, "vceq", "f16", v8i16, v8f16,
5014 Requires<[HasNEON, HasFullFP16]>;
5016 let TwoOperandAliasConstraint = "$Vm = $Vd" in
5017 defm VCEQz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00010, 0, "vceq", "i",
5018 "$Vd, $Vm, #0", NEONvceqz>;
5020 // VCGE : Vector Compare Greater Than or Equal
5021 defm VCGEs : N3V_QHS<0, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
5022 IIC_VSUBi4Q, "vcge", "s", NEONvcge, 0>;
5023 defm VCGEu : N3V_QHS<1, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
5024 IIC_VSUBi4Q, "vcge", "u", NEONvcgeu, 0>;
5025 def VCGEfd : N3VD<1,0,0b00,0b1110,0, IIC_VBIND, "vcge", "f32", v2i32, v2f32,
5027 def VCGEfq : N3VQ<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge", "f32", v4i32, v4f32,
5029 def VCGEhd : N3VD<1,0,0b01,0b1110,0, IIC_VBIND, "vcge", "f16", v4i16, v4f16,
5031 Requires<[HasNEON, HasFullFP16]>;
5032 def VCGEhq : N3VQ<1,0,0b01,0b1110,0, IIC_VBINQ, "vcge", "f16", v8i16, v8f16,
5034 Requires<[HasNEON, HasFullFP16]>;
5036 let TwoOperandAliasConstraint = "$Vm = $Vd" in {
5037 defm VCGEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00001, 0, "vcge", "s",
5038 "$Vd, $Vm, #0", NEONvcgez>;
5039 defm VCLEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00011, 0, "vcle", "s",
5040 "$Vd, $Vm, #0", NEONvclez>;
5043 // VCGT : Vector Compare Greater Than
5044 defm VCGTs : N3V_QHS<0, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
5045 IIC_VSUBi4Q, "vcgt", "s", NEONvcgt, 0>;
5046 defm VCGTu : N3V_QHS<1, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
5047 IIC_VSUBi4Q, "vcgt", "u", NEONvcgtu, 0>;
5048 def VCGTfd : N3VD<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt", "f32", v2i32, v2f32,
5050 def VCGTfq : N3VQ<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt", "f32", v4i32, v4f32,
5052 def VCGThd : N3VD<1,0,0b11,0b1110,0, IIC_VBIND, "vcgt", "f16", v4i16, v4f16,
5054 Requires<[HasNEON, HasFullFP16]>;
5055 def VCGThq : N3VQ<1,0,0b11,0b1110,0, IIC_VBINQ, "vcgt", "f16", v8i16, v8f16,
5057 Requires<[HasNEON, HasFullFP16]>;
5059 let TwoOperandAliasConstraint = "$Vm = $Vd" in {
5060 defm VCGTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00000, 0, "vcgt", "s",
5061 "$Vd, $Vm, #0", NEONvcgtz>;
5062 defm VCLTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00100, 0, "vclt", "s",
5063 "$Vd, $Vm, #0", NEONvcltz>;
5066 // VACGE : Vector Absolute Compare Greater Than or Equal (aka VCAGE)
5067 def VACGEfd : N3VDInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge",
5068 "f32", v2i32, v2f32, int_arm_neon_vacge, 0>;
5069 def VACGEfq : N3VQInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacge",
5070 "f32", v4i32, v4f32, int_arm_neon_vacge, 0>;
5071 def VACGEhd : N3VDInt<1, 0, 0b01, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge",
5072 "f16", v4i16, v4f16, int_arm_neon_vacge, 0>,
5073 Requires<[HasNEON, HasFullFP16]>;
5074 def VACGEhq : N3VQInt<1, 0, 0b01, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacge",
5075 "f16", v8i16, v8f16, int_arm_neon_vacge, 0>,
5076 Requires<[HasNEON, HasFullFP16]>;
5077 // VACGT : Vector Absolute Compare Greater Than (aka VCAGT)
5078 def VACGTfd : N3VDInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacgt",
5079 "f32", v2i32, v2f32, int_arm_neon_vacgt, 0>;
5080 def VACGTfq : N3VQInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt",
5081 "f32", v4i32, v4f32, int_arm_neon_vacgt, 0>;
5082 def VACGThd : N3VDInt<1, 0, 0b11, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacgt",
5083 "f16", v4i16, v4f16, int_arm_neon_vacgt, 0>,
5084 Requires<[HasNEON, HasFullFP16]>;
5085 def VACGThq : N3VQInt<1, 0, 0b11, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt",
5086 "f16", v8i16, v8f16, int_arm_neon_vacgt, 0>,
5087 Requires<[HasNEON, HasFullFP16]>;
5088 // VTST : Vector Test Bits
5089 defm VTST : N3V_QHS<0, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
5090 IIC_VBINi4Q, "vtst", "", NEONvtst, 1>;
5092 def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vn, $Vm",
5093 (VACGTfd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>;
5094 def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vn, $Vm",
5095 (VACGTfq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>;
5096 def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vn, $Vm",
5097 (VACGEfd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>;
5098 def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vn, $Vm",
5099 (VACGEfq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>;
5100 let Predicates = [HasNEON, HasFullFP16] in {
5101 def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vn, $Vm",
5102 (VACGThd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>;
5103 def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vn, $Vm",
5104 (VACGThq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>;
5105 def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vn, $Vm",
5106 (VACGEhd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>;
5107 def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vn, $Vm",
5108 (VACGEhq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>;
5111 // +fp16fml Floating Point Multiplication Variants
5112 let Predicates = [HasNEON, HasFP16FML], DecoderNamespace= "VFPV8" in {
5114 class N3VCP8F16Q1<string asm, RegisterClass Td, RegisterClass Tn,
5115 RegisterClass Tm, bits<2> op1, bits<2> op2, bit op3>
5116 : N3VCP8<op1, op2, 1, op3, (outs Td:$Vd), (ins Tn:$Vn, Tm:$Vm), NoItinerary,
5117 asm, "f16", "$Vd, $Vn, $Vm", "", []>;
5119 class N3VCP8F16Q0<string asm, RegisterClass Td, RegisterClass Tn,
5120 RegisterClass Tm, bits<2> op1, bits<2> op2, bit op3>
5121 : N3VCP8Q0<op1, op2, 0, op3, (outs Td:$Vd), (ins Tn:$Vn, Tm:$Vm), NoItinerary,
5122 asm, "f16", "$Vd, $Vn, $Vm", "", []>;
5124 class VFMQ0<string opc, bits<2> S>
5125 : N3VLaneCP8<0, S, 0, 1, (outs DPR:$Vd),
5126 (ins SPR:$Vn, SPR:$Vm, VectorIndex32:$idx),
5127 IIC_VMACD, opc, "f16", "$Vd, $Vn, $Vm$idx", "", []> {
5130 let Inst{19-16} = Vn{4-1};
5131 let Inst{7} = Vn{0};
5132 let Inst{5} = Vm{0};
5133 let Inst{2-0} = Vm{3-1};
5136 class VFMQ1<string opc, bits<2> S>
5137 : N3VLaneCP8<0, S, 1, 1, (outs QPR:$Vd),
5138 (ins DPR:$Vn, DPR:$Vm, VectorIndex16:$idx),
5139 IIC_VMACD, opc, "f16", "$Vd, $Vn, $Vm$idx", "", []> {
5141 let Inst{5} = idx{1};
5142 let Inst{3} = idx{0};
5145 let hasNoSchedulingInfo = 1 in {
5147 def VFMALD : N3VCP8F16Q0<"vfmal", DPR, SPR, SPR, 0b00, 0b10, 1>;
5148 def VFMSLD : N3VCP8F16Q0<"vfmsl", DPR, SPR, SPR, 0b01, 0b10, 1>;
5149 def VFMALQ : N3VCP8F16Q1<"vfmal", QPR, DPR, DPR, 0b00, 0b10, 1>;
5150 def VFMSLQ : N3VCP8F16Q1<"vfmsl", QPR, DPR, DPR, 0b01, 0b10, 1>;
5151 def VFMALDI : VFMQ0<"vfmal", 0b00>;
5152 def VFMSLDI : VFMQ0<"vfmsl", 0b01>;
5153 def VFMALQI : VFMQ1<"vfmal", 0b00>;
5154 def VFMSLQI : VFMQ1<"vfmsl", 0b01>;
5156 } // HasNEON, HasFP16FML
5159 def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vm",
5160 (VACGTfd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>;
5161 def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vm",
5162 (VACGTfq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>;
5163 def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vm",
5164 (VACGEfd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>;
5165 def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vm",
5166 (VACGEfq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>;
5167 let Predicates = [HasNEON, HasFullFP16] in {
5168 def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vm",
5169 (VACGThd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>;
5170 def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vm",
5171 (VACGThq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>;
5172 def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vm",
5173 (VACGEhd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>;
5174 def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vm",
5175 (VACGEhq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>;
5178 // Vector Bitwise Operations.
5180 def vnotd : PatFrag<(ops node:$in),
5181 (xor node:$in, (bitconvert (v8i8 NEONimmAllOnesV)))>;
5182 def vnotq : PatFrag<(ops node:$in),
5183 (xor node:$in, (bitconvert (v16i8 NEONimmAllOnesV)))>;
5186 // VAND : Vector Bitwise AND
5187 def VANDd : N3VDX<0, 0, 0b00, 0b0001, 1, IIC_VBINiD, "vand",
5188 v2i32, v2i32, and, 1>;
5189 def VANDq : N3VQX<0, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "vand",
5190 v4i32, v4i32, and, 1>;
5192 // VEOR : Vector Bitwise Exclusive OR
5193 def VEORd : N3VDX<1, 0, 0b00, 0b0001, 1, IIC_VBINiD, "veor",
5194 v2i32, v2i32, xor, 1>;
5195 def VEORq : N3VQX<1, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "veor",
5196 v4i32, v4i32, xor, 1>;
5198 // VORR : Vector Bitwise OR
5199 def VORRd : N3VDX<0, 0, 0b10, 0b0001, 1, IIC_VBINiD, "vorr",
5200 v2i32, v2i32, or, 1>;
5201 def VORRq : N3VQX<0, 0, 0b10, 0b0001, 1, IIC_VBINiQ, "vorr",
5202 v4i32, v4i32, or, 1>;
5204 def VORRiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 0, 1,
5205 (outs DPR:$Vd), (ins nImmSplatI16:$SIMM, DPR:$src),
5207 "vorr", "i16", "$Vd, $SIMM", "$src = $Vd",
5209 (v4i16 (NEONvorrImm DPR:$src, timm:$SIMM)))]> {
5210 let Inst{9} = SIMM{9};
5213 def VORRiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 0, 1,
5214 (outs DPR:$Vd), (ins nImmSplatI32:$SIMM, DPR:$src),
5216 "vorr", "i32", "$Vd, $SIMM", "$src = $Vd",
5218 (v2i32 (NEONvorrImm DPR:$src, timm:$SIMM)))]> {
5219 let Inst{10-9} = SIMM{10-9};
5222 def VORRiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 0, 1,
5223 (outs QPR:$Vd), (ins nImmSplatI16:$SIMM, QPR:$src),
5225 "vorr", "i16", "$Vd, $SIMM", "$src = $Vd",
5227 (v8i16 (NEONvorrImm QPR:$src, timm:$SIMM)))]> {
5228 let Inst{9} = SIMM{9};
5231 def VORRiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 0, 1,
5232 (outs QPR:$Vd), (ins nImmSplatI32:$SIMM, QPR:$src),
5234 "vorr", "i32", "$Vd, $SIMM", "$src = $Vd",
5236 (v4i32 (NEONvorrImm QPR:$src, timm:$SIMM)))]> {
5237 let Inst{10-9} = SIMM{10-9};
5241 // VBIC : Vector Bitwise Bit Clear (AND NOT)
5242 let TwoOperandAliasConstraint = "$Vn = $Vd" in {
5243 def VBICd : N3VX<0, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd),
5244 (ins DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VBINiD,
5245 "vbic", "$Vd, $Vn, $Vm", "",
5246 [(set DPR:$Vd, (v2i32 (and DPR:$Vn,
5247 (vnotd DPR:$Vm))))]>;
5248 def VBICq : N3VX<0, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd),
5249 (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINiQ,
5250 "vbic", "$Vd, $Vn, $Vm", "",
5251 [(set QPR:$Vd, (v4i32 (and QPR:$Vn,
5252 (vnotq QPR:$Vm))))]>;
5255 def VBICiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 1, 1,
5256 (outs DPR:$Vd), (ins nImmSplatI16:$SIMM, DPR:$src),
5258 "vbic", "i16", "$Vd, $SIMM", "$src = $Vd",
5260 (v4i16 (NEONvbicImm DPR:$src, timm:$SIMM)))]> {
5261 let Inst{9} = SIMM{9};
5264 def VBICiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 1, 1,
5265 (outs DPR:$Vd), (ins nImmSplatI32:$SIMM, DPR:$src),
5267 "vbic", "i32", "$Vd, $SIMM", "$src = $Vd",
5269 (v2i32 (NEONvbicImm DPR:$src, timm:$SIMM)))]> {
5270 let Inst{10-9} = SIMM{10-9};
5273 def VBICiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 1, 1,
5274 (outs QPR:$Vd), (ins nImmSplatI16:$SIMM, QPR:$src),
5276 "vbic", "i16", "$Vd, $SIMM", "$src = $Vd",
5278 (v8i16 (NEONvbicImm QPR:$src, timm:$SIMM)))]> {
5279 let Inst{9} = SIMM{9};
5282 def VBICiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 1, 1,
5283 (outs QPR:$Vd), (ins nImmSplatI32:$SIMM, QPR:$src),
5285 "vbic", "i32", "$Vd, $SIMM", "$src = $Vd",
5287 (v4i32 (NEONvbicImm QPR:$src, timm:$SIMM)))]> {
5288 let Inst{10-9} = SIMM{10-9};
5291 // VORN : Vector Bitwise OR NOT
5292 def VORNd : N3VX<0, 0, 0b11, 0b0001, 0, 1, (outs DPR:$Vd),
5293 (ins DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VBINiD,
5294 "vorn", "$Vd, $Vn, $Vm", "",
5295 [(set DPR:$Vd, (v2i32 (or DPR:$Vn,
5296 (vnotd DPR:$Vm))))]>;
5297 def VORNq : N3VX<0, 0, 0b11, 0b0001, 1, 1, (outs QPR:$Vd),
5298 (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINiQ,
5299 "vorn", "$Vd, $Vn, $Vm", "",
5300 [(set QPR:$Vd, (v4i32 (or QPR:$Vn,
5301 (vnotq QPR:$Vm))))]>;
5303 // VMVN : Vector Bitwise NOT (Immediate)
5305 let isReMaterializable = 1 in {
5307 def VMVNv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 1, 1, (outs DPR:$Vd),
5308 (ins nImmSplatI16:$SIMM), IIC_VMOVImm,
5309 "vmvn", "i16", "$Vd, $SIMM", "",
5310 [(set DPR:$Vd, (v4i16 (NEONvmvnImm timm:$SIMM)))]> {
5311 let Inst{9} = SIMM{9};
5314 def VMVNv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 1, 1, (outs QPR:$Vd),
5315 (ins nImmSplatI16:$SIMM), IIC_VMOVImm,
5316 "vmvn", "i16", "$Vd, $SIMM", "",
5317 [(set QPR:$Vd, (v8i16 (NEONvmvnImm timm:$SIMM)))]> {
5318 let Inst{9} = SIMM{9};
5321 def VMVNv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 1, 1, (outs DPR:$Vd),
5322 (ins nImmVMOVI32:$SIMM), IIC_VMOVImm,
5323 "vmvn", "i32", "$Vd, $SIMM", "",
5324 [(set DPR:$Vd, (v2i32 (NEONvmvnImm timm:$SIMM)))]> {
5325 let Inst{11-8} = SIMM{11-8};
5328 def VMVNv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 1, 1, (outs QPR:$Vd),
5329 (ins nImmVMOVI32:$SIMM), IIC_VMOVImm,
5330 "vmvn", "i32", "$Vd, $SIMM", "",
5331 [(set QPR:$Vd, (v4i32 (NEONvmvnImm timm:$SIMM)))]> {
5332 let Inst{11-8} = SIMM{11-8};
5336 // VMVN : Vector Bitwise NOT
5337 def VMVNd : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 0, 0,
5338 (outs DPR:$Vd), (ins DPR:$Vm), IIC_VSUBiD,
5339 "vmvn", "$Vd, $Vm", "",
5340 [(set DPR:$Vd, (v2i32 (vnotd DPR:$Vm)))]>;
5341 def VMVNq : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 1, 0,
5342 (outs QPR:$Vd), (ins QPR:$Vm), IIC_VSUBiD,
5343 "vmvn", "$Vd, $Vm", "",
5344 [(set QPR:$Vd, (v4i32 (vnotq QPR:$Vm)))]>;
5345 def : Pat<(v2i32 (vnotd DPR:$src)), (VMVNd DPR:$src)>;
5346 def : Pat<(v4i32 (vnotq QPR:$src)), (VMVNq QPR:$src)>;
5348 // VBSL : Vector Bitwise Select
5349 def VBSLd : N3VX<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd),
5350 (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
5351 N3RegFrm, IIC_VCNTiD,
5352 "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd",
5354 (v2i32 (NEONvbsl DPR:$src1, DPR:$Vn, DPR:$Vm)))]>;
5355 def : Pat<(v8i8 (int_arm_neon_vbsl (v8i8 DPR:$src1),
5356 (v8i8 DPR:$Vn), (v8i8 DPR:$Vm))),
5357 (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
5358 Requires<[HasNEON]>;
5359 def : Pat<(v4i16 (int_arm_neon_vbsl (v4i16 DPR:$src1),
5360 (v4i16 DPR:$Vn), (v4i16 DPR:$Vm))),
5361 (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
5362 Requires<[HasNEON]>;
5363 def : Pat<(v2i32 (int_arm_neon_vbsl (v2i32 DPR:$src1),
5364 (v2i32 DPR:$Vn), (v2i32 DPR:$Vm))),
5365 (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
5366 Requires<[HasNEON]>;
5367 def : Pat<(v2f32 (int_arm_neon_vbsl (v2f32 DPR:$src1),
5368 (v2f32 DPR:$Vn), (v2f32 DPR:$Vm))),
5369 (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
5370 Requires<[HasNEON]>;
5371 def : Pat<(v1i64 (int_arm_neon_vbsl (v1i64 DPR:$src1),
5372 (v1i64 DPR:$Vn), (v1i64 DPR:$Vm))),
5373 (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
5374 Requires<[HasNEON]>;
5376 def : Pat<(v2i32 (or (and DPR:$Vn, DPR:$Vd),
5377 (and DPR:$Vm, (vnotd DPR:$Vd)))),
5378 (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm)>,
5379 Requires<[HasNEON]>;
5381 def : Pat<(v1i64 (or (and DPR:$Vn, DPR:$Vd),
5382 (and DPR:$Vm, (vnotd DPR:$Vd)))),
5383 (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm)>,
5384 Requires<[HasNEON]>;
5386 def VBSLq : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd),
5387 (ins QPR:$src1, QPR:$Vn, QPR:$Vm),
5388 N3RegFrm, IIC_VCNTiQ,
5389 "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd",
5391 (v4i32 (NEONvbsl QPR:$src1, QPR:$Vn, QPR:$Vm)))]>;
5393 def : Pat<(v16i8 (int_arm_neon_vbsl (v16i8 QPR:$src1),
5394 (v16i8 QPR:$Vn), (v16i8 QPR:$Vm))),
5395 (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
5396 Requires<[HasNEON]>;
5397 def : Pat<(v8i16 (int_arm_neon_vbsl (v8i16 QPR:$src1),
5398 (v8i16 QPR:$Vn), (v8i16 QPR:$Vm))),
5399 (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
5400 Requires<[HasNEON]>;
5401 def : Pat<(v4i32 (int_arm_neon_vbsl (v4i32 QPR:$src1),
5402 (v4i32 QPR:$Vn), (v4i32 QPR:$Vm))),
5403 (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
5404 Requires<[HasNEON]>;
5405 def : Pat<(v4f32 (int_arm_neon_vbsl (v4f32 QPR:$src1),
5406 (v4f32 QPR:$Vn), (v4f32 QPR:$Vm))),
5407 (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
5408 Requires<[HasNEON]>;
5409 def : Pat<(v2i64 (int_arm_neon_vbsl (v2i64 QPR:$src1),
5410 (v2i64 QPR:$Vn), (v2i64 QPR:$Vm))),
5411 (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
5412 Requires<[HasNEON]>;
5414 def : Pat<(v4i32 (or (and QPR:$Vn, QPR:$Vd),
5415 (and QPR:$Vm, (vnotq QPR:$Vd)))),
5416 (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm)>,
5417 Requires<[HasNEON]>;
5418 def : Pat<(v2i64 (or (and QPR:$Vn, QPR:$Vd),
5419 (and QPR:$Vm, (vnotq QPR:$Vd)))),
5420 (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm)>,
5421 Requires<[HasNEON]>;
5423 // VBIF : Vector Bitwise Insert if False
5424 // like VBSL but with: "vbif $dst, $src3, $src1", "$src2 = $dst",
5425 // FIXME: This instruction's encoding MAY NOT BE correct.
5426 def VBIFd : N3VX<1, 0, 0b11, 0b0001, 0, 1,
5427 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
5428 N3RegFrm, IIC_VBINiD,
5429 "vbif", "$Vd, $Vn, $Vm", "$src1 = $Vd",
5431 def VBIFq : N3VX<1, 0, 0b11, 0b0001, 1, 1,
5432 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm),
5433 N3RegFrm, IIC_VBINiQ,
5434 "vbif", "$Vd, $Vn, $Vm", "$src1 = $Vd",
5437 // VBIT : Vector Bitwise Insert if True
5438 // like VBSL but with: "vbit $dst, $src2, $src1", "$src3 = $dst",
5439 // FIXME: This instruction's encoding MAY NOT BE correct.
5440 def VBITd : N3VX<1, 0, 0b10, 0b0001, 0, 1,
5441 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
5442 N3RegFrm, IIC_VBINiD,
5443 "vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd",
5445 def VBITq : N3VX<1, 0, 0b10, 0b0001, 1, 1,
5446 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm),
5447 N3RegFrm, IIC_VBINiQ,
5448 "vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd",
5451 // VBIT/VBIF are not yet implemented. The TwoAddress pass will not go looking
5452 // for equivalent operations with different register constraints; it just
5455 // Vector Absolute Differences.
5457 // VABD : Vector Absolute Difference
5458 defm VABDs : N3VInt_QHS<0, 0, 0b0111, 0, N3RegFrm,
5459 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
5460 "vabd", "s", int_arm_neon_vabds, 1>;
5461 defm VABDu : N3VInt_QHS<1, 0, 0b0111, 0, N3RegFrm,
5462 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
5463 "vabd", "u", int_arm_neon_vabdu, 1>;
5464 def VABDfd : N3VDInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBIND,
5465 "vabd", "f32", v2f32, v2f32, int_arm_neon_vabds, 1>;
5466 def VABDfq : N3VQInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBINQ,
5467 "vabd", "f32", v4f32, v4f32, int_arm_neon_vabds, 1>;
5468 def VABDhd : N3VDInt<1, 0, 0b11, 0b1101, 0, N3RegFrm, IIC_VBIND,
5469 "vabd", "f16", v4f16, v4f16, int_arm_neon_vabds, 1>,
5470 Requires<[HasNEON, HasFullFP16]>;
5471 def VABDhq : N3VQInt<1, 0, 0b11, 0b1101, 0, N3RegFrm, IIC_VBINQ,
5472 "vabd", "f16", v8f16, v8f16, int_arm_neon_vabds, 1>,
5473 Requires<[HasNEON, HasFullFP16]>;
5475 // VABDL : Vector Absolute Difference Long (Q = | D - D |)
5476 defm VABDLs : N3VLIntExt_QHS<0,1,0b0111,0, IIC_VSUBi4Q,
5477 "vabdl", "s", int_arm_neon_vabds, zext, 1>;
5478 defm VABDLu : N3VLIntExt_QHS<1,1,0b0111,0, IIC_VSUBi4Q,
5479 "vabdl", "u", int_arm_neon_vabdu, zext, 1>;
5481 def : Pat<(v8i16 (abs (sub (zext (v8i8 DPR:$opA)), (zext (v8i8 DPR:$opB))))),
5482 (VABDLuv8i16 DPR:$opA, DPR:$opB)>;
5483 def : Pat<(v4i32 (abs (sub (zext (v4i16 DPR:$opA)), (zext (v4i16 DPR:$opB))))),
5484 (VABDLuv4i32 DPR:$opA, DPR:$opB)>;
5486 // ISD::ABS is not legal for v2i64, so VABDL needs to be matched from the
5487 // shift/xor pattern for ABS.
5490 PatFrag<(ops node:$in1, node:$in2, node:$shift),
5491 (NEONvshrs (sub (zext node:$in1),
5492 (zext node:$in2)), (i32 $shift))>;
5494 def : Pat<(xor (v4i32 (bitconvert (v2i64 (abd_shr (v2i32 DPR:$opA), (v2i32 DPR:$opB), 63)))),
5495 (v4i32 (bitconvert (v2i64 (add (sub (zext (v2i32 DPR:$opA)),
5496 (zext (v2i32 DPR:$opB))),
5497 (abd_shr (v2i32 DPR:$opA), (v2i32 DPR:$opB), 63)))))),
5498 (VABDLuv2i64 DPR:$opA, DPR:$opB)>;
5500 // VABA : Vector Absolute Difference and Accumulate
5501 defm VABAs : N3VIntOp_QHS<0,0,0b0111,1, IIC_VABAD, IIC_VABAQ,
5502 "vaba", "s", int_arm_neon_vabds, add>;
5503 defm VABAu : N3VIntOp_QHS<1,0,0b0111,1, IIC_VABAD, IIC_VABAQ,
5504 "vaba", "u", int_arm_neon_vabdu, add>;
5506 // VABAL : Vector Absolute Difference and Accumulate Long (Q += | D - D |)
5507 defm VABALs : N3VLIntExtOp_QHS<0,1,0b0101,0, IIC_VABAD,
5508 "vabal", "s", int_arm_neon_vabds, zext, add>;
5509 defm VABALu : N3VLIntExtOp_QHS<1,1,0b0101,0, IIC_VABAD,
5510 "vabal", "u", int_arm_neon_vabdu, zext, add>;
5512 // Vector Maximum and Minimum.
5514 // VMAX : Vector Maximum
5515 defm VMAXs : N3VInt_QHS<0, 0, 0b0110, 0, N3RegFrm,
5516 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
5517 "vmax", "s", smax, 1>;
5518 defm VMAXu : N3VInt_QHS<1, 0, 0b0110, 0, N3RegFrm,
5519 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
5520 "vmax", "u", umax, 1>;
5521 def VMAXfd : N3VDInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBIND,
5523 v2f32, v2f32, fmaximum, 1>;
5524 def VMAXfq : N3VQInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBINQ,
5526 v4f32, v4f32, fmaximum, 1>;
5527 def VMAXhd : N3VDInt<0, 0, 0b01, 0b1111, 0, N3RegFrm, IIC_VBIND,
5529 v4f16, v4f16, fmaximum, 1>,
5530 Requires<[HasNEON, HasFullFP16]>;
5531 def VMAXhq : N3VQInt<0, 0, 0b01, 0b1111, 0, N3RegFrm, IIC_VBINQ,
5533 v8f16, v8f16, fmaximum, 1>,
5534 Requires<[HasNEON, HasFullFP16]>;
5537 let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in {
5538 def VMAXNMNDf : N3VDIntnp<0b00110, 0b00, 0b1111, 0, 1,
5539 N3RegFrm, NoItinerary, "vmaxnm", "f32",
5540 v2f32, v2f32, fmaxnum, 1>,
5541 Requires<[HasV8, HasNEON]>;
5542 def VMAXNMNQf : N3VQIntnp<0b00110, 0b00, 0b1111, 1, 1,
5543 N3RegFrm, NoItinerary, "vmaxnm", "f32",
5544 v4f32, v4f32, fmaxnum, 1>,
5545 Requires<[HasV8, HasNEON]>;
5546 def VMAXNMNDh : N3VDIntnp<0b00110, 0b01, 0b1111, 0, 1,
5547 N3RegFrm, NoItinerary, "vmaxnm", "f16",
5548 v4f16, v4f16, fmaxnum, 1>,
5549 Requires<[HasV8, HasNEON, HasFullFP16]>;
5550 def VMAXNMNQh : N3VQIntnp<0b00110, 0b01, 0b1111, 1, 1,
5551 N3RegFrm, NoItinerary, "vmaxnm", "f16",
5552 v8f16, v8f16, fmaxnum, 1>,
5553 Requires<[HasV8, HasNEON, HasFullFP16]>;
5556 // VMIN : Vector Minimum
5557 defm VMINs : N3VInt_QHS<0, 0, 0b0110, 1, N3RegFrm,
5558 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
5559 "vmin", "s", smin, 1>;
5560 defm VMINu : N3VInt_QHS<1, 0, 0b0110, 1, N3RegFrm,
5561 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
5562 "vmin", "u", umin, 1>;
5563 def VMINfd : N3VDInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBIND,
5565 v2f32, v2f32, fminimum, 1>;
5566 def VMINfq : N3VQInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBINQ,
5568 v4f32, v4f32, fminimum, 1>;
5569 def VMINhd : N3VDInt<0, 0, 0b11, 0b1111, 0, N3RegFrm, IIC_VBIND,
5571 v4f16, v4f16, fminimum, 1>,
5572 Requires<[HasNEON, HasFullFP16]>;
5573 def VMINhq : N3VQInt<0, 0, 0b11, 0b1111, 0, N3RegFrm, IIC_VBINQ,
5575 v8f16, v8f16, fminimum, 1>,
5576 Requires<[HasNEON, HasFullFP16]>;
5579 let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in {
5580 def VMINNMNDf : N3VDIntnp<0b00110, 0b10, 0b1111, 0, 1,
5581 N3RegFrm, NoItinerary, "vminnm", "f32",
5582 v2f32, v2f32, fminnum, 1>,
5583 Requires<[HasV8, HasNEON]>;
5584 def VMINNMNQf : N3VQIntnp<0b00110, 0b10, 0b1111, 1, 1,
5585 N3RegFrm, NoItinerary, "vminnm", "f32",
5586 v4f32, v4f32, fminnum, 1>,
5587 Requires<[HasV8, HasNEON]>;
5588 def VMINNMNDh : N3VDIntnp<0b00110, 0b11, 0b1111, 0, 1,
5589 N3RegFrm, NoItinerary, "vminnm", "f16",
5590 v4f16, v4f16, fminnum, 1>,
5591 Requires<[HasV8, HasNEON, HasFullFP16]>;
5592 def VMINNMNQh : N3VQIntnp<0b00110, 0b11, 0b1111, 1, 1,
5593 N3RegFrm, NoItinerary, "vminnm", "f16",
5594 v8f16, v8f16, fminnum, 1>,
5595 Requires<[HasV8, HasNEON, HasFullFP16]>;
5598 // Vector Pairwise Operations.
5600 // VPADD : Vector Pairwise Add
5601 def VPADDi8 : N3VDInt<0, 0, 0b00, 0b1011, 1, N3RegFrm, IIC_VSHLiD,
5603 v8i8, v8i8, int_arm_neon_vpadd, 0>;
5604 def VPADDi16 : N3VDInt<0, 0, 0b01, 0b1011, 1, N3RegFrm, IIC_VSHLiD,
5606 v4i16, v4i16, int_arm_neon_vpadd, 0>;
5607 def VPADDi32 : N3VDInt<0, 0, 0b10, 0b1011, 1, N3RegFrm, IIC_VSHLiD,
5609 v2i32, v2i32, int_arm_neon_vpadd, 0>;
5610 def VPADDf : N3VDInt<1, 0, 0b00, 0b1101, 0, N3RegFrm,
5611 IIC_VPBIND, "vpadd", "f32",
5612 v2f32, v2f32, int_arm_neon_vpadd, 0>;
5613 def VPADDh : N3VDInt<1, 0, 0b01, 0b1101, 0, N3RegFrm,
5614 IIC_VPBIND, "vpadd", "f16",
5615 v4f16, v4f16, int_arm_neon_vpadd, 0>,
5616 Requires<[HasNEON, HasFullFP16]>;
5618 // VPADDL : Vector Pairwise Add Long
5619 defm VPADDLs : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00100, 0, "vpaddl", "s",
5620 int_arm_neon_vpaddls>;
5621 defm VPADDLu : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00101, 0, "vpaddl", "u",
5622 int_arm_neon_vpaddlu>;
5624 // VPADAL : Vector Pairwise Add and Accumulate Long
5625 defm VPADALs : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01100, 0, "vpadal", "s",
5626 int_arm_neon_vpadals>;
5627 defm VPADALu : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01101, 0, "vpadal", "u",
5628 int_arm_neon_vpadalu>;
5630 // VPMAX : Vector Pairwise Maximum
5631 def VPMAXs8 : N3VDInt<0, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
5632 "s8", v8i8, v8i8, int_arm_neon_vpmaxs, 0>;
5633 def VPMAXs16 : N3VDInt<0, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
5634 "s16", v4i16, v4i16, int_arm_neon_vpmaxs, 0>;
5635 def VPMAXs32 : N3VDInt<0, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
5636 "s32", v2i32, v2i32, int_arm_neon_vpmaxs, 0>;
5637 def VPMAXu8 : N3VDInt<1, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
5638 "u8", v8i8, v8i8, int_arm_neon_vpmaxu, 0>;
5639 def VPMAXu16 : N3VDInt<1, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
5640 "u16", v4i16, v4i16, int_arm_neon_vpmaxu, 0>;
5641 def VPMAXu32 : N3VDInt<1, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
5642 "u32", v2i32, v2i32, int_arm_neon_vpmaxu, 0>;
5643 def VPMAXf : N3VDInt<1, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmax",
5644 "f32", v2f32, v2f32, int_arm_neon_vpmaxs, 0>;
5645 def VPMAXh : N3VDInt<1, 0, 0b01, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmax",
5646 "f16", v4f16, v4f16, int_arm_neon_vpmaxs, 0>,
5647 Requires<[HasNEON, HasFullFP16]>;
5649 // VPMIN : Vector Pairwise Minimum
5650 def VPMINs8 : N3VDInt<0, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
5651 "s8", v8i8, v8i8, int_arm_neon_vpmins, 0>;
5652 def VPMINs16 : N3VDInt<0, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
5653 "s16", v4i16, v4i16, int_arm_neon_vpmins, 0>;
5654 def VPMINs32 : N3VDInt<0, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
5655 "s32", v2i32, v2i32, int_arm_neon_vpmins, 0>;
5656 def VPMINu8 : N3VDInt<1, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
5657 "u8", v8i8, v8i8, int_arm_neon_vpminu, 0>;
5658 def VPMINu16 : N3VDInt<1, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
5659 "u16", v4i16, v4i16, int_arm_neon_vpminu, 0>;
5660 def VPMINu32 : N3VDInt<1, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
5661 "u32", v2i32, v2i32, int_arm_neon_vpminu, 0>;
5662 def VPMINf : N3VDInt<1, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmin",
5663 "f32", v2f32, v2f32, int_arm_neon_vpmins, 0>;
5664 def VPMINh : N3VDInt<1, 0, 0b11, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmin",
5665 "f16", v4f16, v4f16, int_arm_neon_vpmins, 0>,
5666 Requires<[HasNEON, HasFullFP16]>;
5668 // Vector Reciprocal and Reciprocal Square Root Estimate and Step.
5670 // VRECPE : Vector Reciprocal Estimate
5671 def VRECPEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0,
5672 IIC_VUNAD, "vrecpe", "u32",
5673 v2i32, v2i32, int_arm_neon_vrecpe>;
5674 def VRECPEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0,
5675 IIC_VUNAQ, "vrecpe", "u32",
5676 v4i32, v4i32, int_arm_neon_vrecpe>;
5677 def VRECPEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0,
5678 IIC_VUNAD, "vrecpe", "f32",
5679 v2f32, v2f32, int_arm_neon_vrecpe>;
5680 def VRECPEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0,
5681 IIC_VUNAQ, "vrecpe", "f32",
5682 v4f32, v4f32, int_arm_neon_vrecpe>;
5683 def VRECPEhd : N2VDInt<0b11, 0b11, 0b01, 0b11, 0b01010, 0,
5684 IIC_VUNAD, "vrecpe", "f16",
5685 v4f16, v4f16, int_arm_neon_vrecpe>,
5686 Requires<[HasNEON, HasFullFP16]>;
5687 def VRECPEhq : N2VQInt<0b11, 0b11, 0b01, 0b11, 0b01010, 0,
5688 IIC_VUNAQ, "vrecpe", "f16",
5689 v8f16, v8f16, int_arm_neon_vrecpe>,
5690 Requires<[HasNEON, HasFullFP16]>;
5692 // VRECPS : Vector Reciprocal Step
5693 def VRECPSfd : N3VDInt<0, 0, 0b00, 0b1111, 1, N3RegFrm,
5694 IIC_VRECSD, "vrecps", "f32",
5695 v2f32, v2f32, int_arm_neon_vrecps, 1>;
5696 def VRECPSfq : N3VQInt<0, 0, 0b00, 0b1111, 1, N3RegFrm,
5697 IIC_VRECSQ, "vrecps", "f32",
5698 v4f32, v4f32, int_arm_neon_vrecps, 1>;
5699 def VRECPShd : N3VDInt<0, 0, 0b01, 0b1111, 1, N3RegFrm,
5700 IIC_VRECSD, "vrecps", "f16",
5701 v4f16, v4f16, int_arm_neon_vrecps, 1>,
5702 Requires<[HasNEON, HasFullFP16]>;
5703 def VRECPShq : N3VQInt<0, 0, 0b01, 0b1111, 1, N3RegFrm,
5704 IIC_VRECSQ, "vrecps", "f16",
5705 v8f16, v8f16, int_arm_neon_vrecps, 1>,
5706 Requires<[HasNEON, HasFullFP16]>;
5708 // VRSQRTE : Vector Reciprocal Square Root Estimate
5709 def VRSQRTEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0,
5710 IIC_VUNAD, "vrsqrte", "u32",
5711 v2i32, v2i32, int_arm_neon_vrsqrte>;
5712 def VRSQRTEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0,
5713 IIC_VUNAQ, "vrsqrte", "u32",
5714 v4i32, v4i32, int_arm_neon_vrsqrte>;
5715 def VRSQRTEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0,
5716 IIC_VUNAD, "vrsqrte", "f32",
5717 v2f32, v2f32, int_arm_neon_vrsqrte>;
5718 def VRSQRTEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0,
5719 IIC_VUNAQ, "vrsqrte", "f32",
5720 v4f32, v4f32, int_arm_neon_vrsqrte>;
5721 def VRSQRTEhd : N2VDInt<0b11, 0b11, 0b01, 0b11, 0b01011, 0,
5722 IIC_VUNAD, "vrsqrte", "f16",
5723 v4f16, v4f16, int_arm_neon_vrsqrte>,
5724 Requires<[HasNEON, HasFullFP16]>;
5725 def VRSQRTEhq : N2VQInt<0b11, 0b11, 0b01, 0b11, 0b01011, 0,
5726 IIC_VUNAQ, "vrsqrte", "f16",
5727 v8f16, v8f16, int_arm_neon_vrsqrte>,
5728 Requires<[HasNEON, HasFullFP16]>;
5730 // VRSQRTS : Vector Reciprocal Square Root Step
5731 def VRSQRTSfd : N3VDInt<0, 0, 0b10, 0b1111, 1, N3RegFrm,
5732 IIC_VRECSD, "vrsqrts", "f32",
5733 v2f32, v2f32, int_arm_neon_vrsqrts, 1>;
5734 def VRSQRTSfq : N3VQInt<0, 0, 0b10, 0b1111, 1, N3RegFrm,
5735 IIC_VRECSQ, "vrsqrts", "f32",
5736 v4f32, v4f32, int_arm_neon_vrsqrts, 1>;
5737 def VRSQRTShd : N3VDInt<0, 0, 0b11, 0b1111, 1, N3RegFrm,
5738 IIC_VRECSD, "vrsqrts", "f16",
5739 v4f16, v4f16, int_arm_neon_vrsqrts, 1>,
5740 Requires<[HasNEON, HasFullFP16]>;
5741 def VRSQRTShq : N3VQInt<0, 0, 0b11, 0b1111, 1, N3RegFrm,
5742 IIC_VRECSQ, "vrsqrts", "f16",
5743 v8f16, v8f16, int_arm_neon_vrsqrts, 1>,
5744 Requires<[HasNEON, HasFullFP16]>;
5748 // VSHL : Vector Shift
5749 defm VSHLs : N3VInt_QHSDSh<0, 0, 0b0100, 0, N3RegVShFrm,
5750 IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ,
5751 "vshl", "s", int_arm_neon_vshifts>;
5752 defm VSHLu : N3VInt_QHSDSh<1, 0, 0b0100, 0, N3RegVShFrm,
5753 IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ,
5754 "vshl", "u", int_arm_neon_vshiftu>;
5756 // VSHL : Vector Shift Left (Immediate)
5757 defm VSHLi : N2VShL_QHSD<0, 1, 0b0101, 1, IIC_VSHLiD, "vshl", "i", NEONvshl>;
5759 // VSHR : Vector Shift Right (Immediate)
5760 defm VSHRs : N2VShR_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "s", "VSHRs",
5762 defm VSHRu : N2VShR_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "u", "VSHRu",
5765 // VSHLL : Vector Shift Left Long
5766 defm VSHLLs : N2VLSh_QHS<0, 1, 0b1010, 0, 0, 1, "vshll", "s",
5767 PatFrag<(ops node:$LHS, node:$RHS), (NEONvshl (sext node:$LHS), node:$RHS)>>;
5768 defm VSHLLu : N2VLSh_QHS<1, 1, 0b1010, 0, 0, 1, "vshll", "u",
5769 PatFrag<(ops node:$LHS, node:$RHS), (NEONvshl (zext node:$LHS), node:$RHS)>>;
5771 // VSHLL : Vector Shift Left Long (with maximum shift count)
5772 class N2VLShMax<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7,
5773 bit op6, bit op4, string OpcodeStr, string Dt, ValueType ResTy,
5774 ValueType OpTy, Operand ImmTy>
5775 : N2VLSh<op24, op23, op11_8, op7, op6, op4, OpcodeStr, Dt,
5776 ResTy, OpTy, ImmTy, null_frag> {
5777 let Inst{21-16} = op21_16;
5778 let DecoderMethod = "DecodeVSHLMaxInstruction";
5780 def VSHLLi8 : N2VLShMax<1, 1, 0b110010, 0b0011, 0, 0, 0, "vshll", "i8",
5782 def VSHLLi16 : N2VLShMax<1, 1, 0b110110, 0b0011, 0, 0, 0, "vshll", "i16",
5783 v4i32, v4i16, imm16>;
5784 def VSHLLi32 : N2VLShMax<1, 1, 0b111010, 0b0011, 0, 0, 0, "vshll", "i32",
5785 v2i64, v2i32, imm32>;
5787 def : Pat<(v8i16 (NEONvshl (zext (v8i8 DPR:$Rn)), (i32 8))),
5788 (VSHLLi8 DPR:$Rn, 8)>;
5789 def : Pat<(v4i32 (NEONvshl (zext (v4i16 DPR:$Rn)), (i32 16))),
5790 (VSHLLi16 DPR:$Rn, 16)>;
5791 def : Pat<(v2i64 (NEONvshl (zext (v2i32 DPR:$Rn)), (i32 32))),
5792 (VSHLLi32 DPR:$Rn, 32)>;
5793 def : Pat<(v8i16 (NEONvshl (sext (v8i8 DPR:$Rn)), (i32 8))),
5794 (VSHLLi8 DPR:$Rn, 8)>;
5795 def : Pat<(v4i32 (NEONvshl (sext (v4i16 DPR:$Rn)), (i32 16))),
5796 (VSHLLi16 DPR:$Rn, 16)>;
5797 def : Pat<(v2i64 (NEONvshl (sext (v2i32 DPR:$Rn)), (i32 32))),
5798 (VSHLLi32 DPR:$Rn, 32)>;
5799 def : Pat<(v8i16 (NEONvshl (anyext (v8i8 DPR:$Rn)), (i32 8))),
5800 (VSHLLi8 DPR:$Rn, 8)>;
5801 def : Pat<(v4i32 (NEONvshl (anyext (v4i16 DPR:$Rn)), (i32 16))),
5802 (VSHLLi16 DPR:$Rn, 16)>;
5803 def : Pat<(v2i64 (NEONvshl (anyext (v2i32 DPR:$Rn)), (i32 32))),
5804 (VSHLLi32 DPR:$Rn, 32)>;
5806 // VSHRN : Vector Shift Right and Narrow
5807 defm VSHRN : N2VNSh_HSD<0,1,0b1000,0,0,1, IIC_VSHLiD, "vshrn", "i",
5808 PatFrag<(ops node:$Rn, node:$amt),
5809 (trunc (NEONvshrs node:$Rn, node:$amt))>>;
5811 def : Pat<(v8i8 (trunc (NEONvshru (v8i16 QPR:$Vn), shr_imm8:$amt))),
5812 (VSHRNv8i8 QPR:$Vn, shr_imm8:$amt)>;
5813 def : Pat<(v4i16 (trunc (NEONvshru (v4i32 QPR:$Vn), shr_imm16:$amt))),
5814 (VSHRNv4i16 QPR:$Vn, shr_imm16:$amt)>;
5815 def : Pat<(v2i32 (trunc (NEONvshru (v2i64 QPR:$Vn), shr_imm32:$amt))),
5816 (VSHRNv2i32 QPR:$Vn, shr_imm32:$amt)>;
5818 // VRSHL : Vector Rounding Shift
5819 defm VRSHLs : N3VInt_QHSDSh<0, 0, 0b0101, 0, N3RegVShFrm,
5820 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
5821 "vrshl", "s", int_arm_neon_vrshifts>;
5822 defm VRSHLu : N3VInt_QHSDSh<1, 0, 0b0101, 0, N3RegVShFrm,
5823 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
5824 "vrshl", "u", int_arm_neon_vrshiftu>;
5825 // VRSHR : Vector Rounding Shift Right
5826 defm VRSHRs : N2VShR_QHSD<0,1,0b0010,1, IIC_VSHLi4D, "vrshr", "s", "VRSHRs",
5828 defm VRSHRu : N2VShR_QHSD<1,1,0b0010,1, IIC_VSHLi4D, "vrshr", "u", "VRSHRu",
5831 // VRSHRN : Vector Rounding Shift Right and Narrow
5832 defm VRSHRN : N2VNSh_HSD<0, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vrshrn", "i",
5835 // VQSHL : Vector Saturating Shift
5836 defm VQSHLs : N3VInt_QHSDSh<0, 0, 0b0100, 1, N3RegVShFrm,
5837 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
5838 "vqshl", "s", int_arm_neon_vqshifts>;
5839 defm VQSHLu : N3VInt_QHSDSh<1, 0, 0b0100, 1, N3RegVShFrm,
5840 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
5841 "vqshl", "u", int_arm_neon_vqshiftu>;
5842 // VQSHL : Vector Saturating Shift Left (Immediate)
5843 defm VQSHLsi : N2VShL_QHSD<0,1,0b0111,1, IIC_VSHLi4D, "vqshl", "s",NEONvqshls>;
5844 defm VQSHLui : N2VShL_QHSD<1,1,0b0111,1, IIC_VSHLi4D, "vqshl", "u",NEONvqshlu>;
5846 // VQSHLU : Vector Saturating Shift Left (Immediate, Unsigned)
5847 defm VQSHLsu : N2VShL_QHSD<1,1,0b0110,1, IIC_VSHLi4D,"vqshlu","s",NEONvqshlsu>;
5849 // VQSHRN : Vector Saturating Shift Right and Narrow
5850 defm VQSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "s",
5852 defm VQSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "u",
5855 // VQSHRUN : Vector Saturating Shift Right and Narrow (Unsigned)
5856 defm VQSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 0, 1, IIC_VSHLi4D, "vqshrun", "s",
5859 // VQRSHL : Vector Saturating Rounding Shift
5860 defm VQRSHLs : N3VInt_QHSDSh<0, 0, 0b0101, 1, N3RegVShFrm,
5861 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
5862 "vqrshl", "s", int_arm_neon_vqrshifts>;
5863 defm VQRSHLu : N3VInt_QHSDSh<1, 0, 0b0101, 1, N3RegVShFrm,
5864 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
5865 "vqrshl", "u", int_arm_neon_vqrshiftu>;
5867 // VQRSHRN : Vector Saturating Rounding Shift Right and Narrow
5868 defm VQRSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "s",
5870 defm VQRSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "u",
5873 // VQRSHRUN : Vector Saturating Rounding Shift Right and Narrow (Unsigned)
5874 defm VQRSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vqrshrun", "s",
5877 // VSRA : Vector Shift Right and Accumulate
5878 defm VSRAs : N2VShAdd_QHSD<0, 1, 0b0001, 1, "vsra", "s", NEONvshrs>;
5879 defm VSRAu : N2VShAdd_QHSD<1, 1, 0b0001, 1, "vsra", "u", NEONvshru>;
5880 // VRSRA : Vector Rounding Shift Right and Accumulate
5881 defm VRSRAs : N2VShAdd_QHSD<0, 1, 0b0011, 1, "vrsra", "s", NEONvrshrs>;
5882 defm VRSRAu : N2VShAdd_QHSD<1, 1, 0b0011, 1, "vrsra", "u", NEONvrshru>;
5884 // VSLI : Vector Shift Left and Insert
5885 defm VSLI : N2VShInsL_QHSD<1, 1, 0b0101, 1, "vsli">;
5887 // VSRI : Vector Shift Right and Insert
5888 defm VSRI : N2VShInsR_QHSD<1, 1, 0b0100, 1, "vsri">;
5890 // Vector Absolute and Saturating Absolute.
5892 // VABS : Vector Absolute Value
5893 defm VABS : N2VInt_QHS<0b11, 0b11, 0b01, 0b00110, 0,
5894 IIC_VUNAiD, IIC_VUNAiQ, "vabs", "s", abs>;
5895 def VABSfd : N2VD<0b11, 0b11, 0b10, 0b01, 0b01110, 0,
5897 v2f32, v2f32, fabs>;
5898 def VABSfq : N2VQ<0b11, 0b11, 0b10, 0b01, 0b01110, 0,
5900 v4f32, v4f32, fabs>;
5901 def VABShd : N2VD<0b11, 0b11, 0b01, 0b01, 0b01110, 0,
5903 v4f16, v4f16, fabs>,
5904 Requires<[HasNEON, HasFullFP16]>;
5905 def VABShq : N2VQ<0b11, 0b11, 0b01, 0b01, 0b01110, 0,
5907 v8f16, v8f16, fabs>,
5908 Requires<[HasNEON, HasFullFP16]>;
5910 // VQABS : Vector Saturating Absolute Value
5911 defm VQABS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01110, 0,
5912 IIC_VQUNAiD, IIC_VQUNAiQ, "vqabs", "s",
5913 int_arm_neon_vqabs>;
5917 def vnegd : PatFrag<(ops node:$in),
5918 (sub (bitconvert (v2i32 NEONimmAllZerosV)), node:$in)>;
5919 def vnegq : PatFrag<(ops node:$in),
5920 (sub (bitconvert (v4i32 NEONimmAllZerosV)), node:$in)>;
5922 class VNEGD<bits<2> size, string OpcodeStr, string Dt, ValueType Ty>
5923 : N2V<0b11, 0b11, size, 0b01, 0b00111, 0, 0, (outs DPR:$Vd), (ins DPR:$Vm),
5924 IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "",
5925 [(set DPR:$Vd, (Ty (vnegd DPR:$Vm)))]>;
5926 class VNEGQ<bits<2> size, string OpcodeStr, string Dt, ValueType Ty>
5927 : N2V<0b11, 0b11, size, 0b01, 0b00111, 1, 0, (outs QPR:$Vd), (ins QPR:$Vm),
5928 IIC_VSHLiQ, OpcodeStr, Dt, "$Vd, $Vm", "",
5929 [(set QPR:$Vd, (Ty (vnegq QPR:$Vm)))]>;
5931 // VNEG : Vector Negate (integer)
5932 def VNEGs8d : VNEGD<0b00, "vneg", "s8", v8i8>;
5933 def VNEGs16d : VNEGD<0b01, "vneg", "s16", v4i16>;
5934 def VNEGs32d : VNEGD<0b10, "vneg", "s32", v2i32>;
5935 def VNEGs8q : VNEGQ<0b00, "vneg", "s8", v16i8>;
5936 def VNEGs16q : VNEGQ<0b01, "vneg", "s16", v8i16>;
5937 def VNEGs32q : VNEGQ<0b10, "vneg", "s32", v4i32>;
5939 // VNEG : Vector Negate (floating-point)
5940 def VNEGfd : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0,
5941 (outs DPR:$Vd), (ins DPR:$Vm), IIC_VUNAD,
5942 "vneg", "f32", "$Vd, $Vm", "",
5943 [(set DPR:$Vd, (v2f32 (fneg DPR:$Vm)))]>;
5944 def VNEGf32q : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 1, 0,
5945 (outs QPR:$Vd), (ins QPR:$Vm), IIC_VUNAQ,
5946 "vneg", "f32", "$Vd, $Vm", "",
5947 [(set QPR:$Vd, (v4f32 (fneg QPR:$Vm)))]>;
5948 def VNEGhd : N2V<0b11, 0b11, 0b01, 0b01, 0b01111, 0, 0,
5949 (outs DPR:$Vd), (ins DPR:$Vm), IIC_VUNAD,
5950 "vneg", "f16", "$Vd, $Vm", "",
5951 [(set DPR:$Vd, (v4f16 (fneg DPR:$Vm)))]>,
5952 Requires<[HasNEON, HasFullFP16]>;
5953 def VNEGhq : N2V<0b11, 0b11, 0b01, 0b01, 0b01111, 1, 0,
5954 (outs QPR:$Vd), (ins QPR:$Vm), IIC_VUNAQ,
5955 "vneg", "f16", "$Vd, $Vm", "",
5956 [(set QPR:$Vd, (v8f16 (fneg QPR:$Vm)))]>,
5957 Requires<[HasNEON, HasFullFP16]>;
5959 def : Pat<(v8i8 (vnegd DPR:$src)), (VNEGs8d DPR:$src)>;
5960 def : Pat<(v4i16 (vnegd DPR:$src)), (VNEGs16d DPR:$src)>;
5961 def : Pat<(v2i32 (vnegd DPR:$src)), (VNEGs32d DPR:$src)>;
5962 def : Pat<(v16i8 (vnegq QPR:$src)), (VNEGs8q QPR:$src)>;
5963 def : Pat<(v8i16 (vnegq QPR:$src)), (VNEGs16q QPR:$src)>;
5964 def : Pat<(v4i32 (vnegq QPR:$src)), (VNEGs32q QPR:$src)>;
5966 // VQNEG : Vector Saturating Negate
5967 defm VQNEG : N2VInt_QHS<0b11, 0b11, 0b00, 0b01111, 0,
5968 IIC_VQUNAiD, IIC_VQUNAiQ, "vqneg", "s",
5969 int_arm_neon_vqneg>;
5971 // Vector Bit Counting Operations.
5973 // VCLS : Vector Count Leading Sign Bits
5974 defm VCLS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01000, 0,
5975 IIC_VCNTiD, IIC_VCNTiQ, "vcls", "s",
5977 // VCLZ : Vector Count Leading Zeros
5978 defm VCLZ : N2VInt_QHS<0b11, 0b11, 0b00, 0b01001, 0,
5979 IIC_VCNTiD, IIC_VCNTiQ, "vclz", "i",
5981 // VCNT : Vector Count One Bits
5982 def VCNTd : N2VDInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0,
5983 IIC_VCNTiD, "vcnt", "8",
5985 def VCNTq : N2VQInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0,
5986 IIC_VCNTiQ, "vcnt", "8",
5987 v16i8, v16i8, ctpop>;
5990 def VSWPd : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 0, 0,
5991 (outs DPR:$Vd, DPR:$Vm), (ins DPR:$in1, DPR:$in2),
5992 NoItinerary, "vswp", "$Vd, $Vm", "$in1 = $Vd, $in2 = $Vm",
5994 def VSWPq : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 1, 0,
5995 (outs QPR:$Vd, QPR:$Vm), (ins QPR:$in1, QPR:$in2),
5996 NoItinerary, "vswp", "$Vd, $Vm", "$in1 = $Vd, $in2 = $Vm",
5999 // Vector Move Operations.
6001 // VMOV : Vector Move (Register)
6002 def : NEONInstAlias<"vmov${p} $Vd, $Vm",
6003 (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>;
6004 def : NEONInstAlias<"vmov${p} $Vd, $Vm",
6005 (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>;
6007 // VMOV : Vector Move (Immediate)
6009 // Although VMOVs are not strictly speaking cheap, they are as expensive
6010 // as their copies counterpart (VORR), so we should prefer rematerialization
6011 // over splitting when it applies.
6012 let isReMaterializable = 1, isAsCheapAsAMove=1 in {
6013 def VMOVv8i8 : N1ModImm<1, 0b000, 0b1110, 0, 0, 0, 1, (outs DPR:$Vd),
6014 (ins nImmSplatI8:$SIMM), IIC_VMOVImm,
6015 "vmov", "i8", "$Vd, $SIMM", "",
6016 [(set DPR:$Vd, (v8i8 (NEONvmovImm timm:$SIMM)))]>;
6017 def VMOVv16i8 : N1ModImm<1, 0b000, 0b1110, 0, 1, 0, 1, (outs QPR:$Vd),
6018 (ins nImmSplatI8:$SIMM), IIC_VMOVImm,
6019 "vmov", "i8", "$Vd, $SIMM", "",
6020 [(set QPR:$Vd, (v16i8 (NEONvmovImm timm:$SIMM)))]>;
6022 def VMOVv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 0, 1, (outs DPR:$Vd),
6023 (ins nImmSplatI16:$SIMM), IIC_VMOVImm,
6024 "vmov", "i16", "$Vd, $SIMM", "",
6025 [(set DPR:$Vd, (v4i16 (NEONvmovImm timm:$SIMM)))]> {
6026 let Inst{9} = SIMM{9};
6029 def VMOVv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 0, 1, (outs QPR:$Vd),
6030 (ins nImmSplatI16:$SIMM), IIC_VMOVImm,
6031 "vmov", "i16", "$Vd, $SIMM", "",
6032 [(set QPR:$Vd, (v8i16 (NEONvmovImm timm:$SIMM)))]> {
6033 let Inst{9} = SIMM{9};
6036 def VMOVv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 0, 1, (outs DPR:$Vd),
6037 (ins nImmVMOVI32:$SIMM), IIC_VMOVImm,
6038 "vmov", "i32", "$Vd, $SIMM", "",
6039 [(set DPR:$Vd, (v2i32 (NEONvmovImm timm:$SIMM)))]> {
6040 let Inst{11-8} = SIMM{11-8};
6043 def VMOVv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 0, 1, (outs QPR:$Vd),
6044 (ins nImmVMOVI32:$SIMM), IIC_VMOVImm,
6045 "vmov", "i32", "$Vd, $SIMM", "",
6046 [(set QPR:$Vd, (v4i32 (NEONvmovImm timm:$SIMM)))]> {
6047 let Inst{11-8} = SIMM{11-8};
6050 def VMOVv1i64 : N1ModImm<1, 0b000, 0b1110, 0, 0, 1, 1, (outs DPR:$Vd),
6051 (ins nImmSplatI64:$SIMM), IIC_VMOVImm,
6052 "vmov", "i64", "$Vd, $SIMM", "",
6053 [(set DPR:$Vd, (v1i64 (NEONvmovImm timm:$SIMM)))]>;
6054 def VMOVv2i64 : N1ModImm<1, 0b000, 0b1110, 0, 1, 1, 1, (outs QPR:$Vd),
6055 (ins nImmSplatI64:$SIMM), IIC_VMOVImm,
6056 "vmov", "i64", "$Vd, $SIMM", "",
6057 [(set QPR:$Vd, (v2i64 (NEONvmovImm timm:$SIMM)))]>;
6059 def VMOVv2f32 : N1ModImm<1, 0b000, 0b1111, 0, 0, 0, 1, (outs DPR:$Vd),
6060 (ins nImmVMOVF32:$SIMM), IIC_VMOVImm,
6061 "vmov", "f32", "$Vd, $SIMM", "",
6062 [(set DPR:$Vd, (v2f32 (NEONvmovFPImm timm:$SIMM)))]>;
6063 def VMOVv4f32 : N1ModImm<1, 0b000, 0b1111, 0, 1, 0, 1, (outs QPR:$Vd),
6064 (ins nImmVMOVF32:$SIMM), IIC_VMOVImm,
6065 "vmov", "f32", "$Vd, $SIMM", "",
6066 [(set QPR:$Vd, (v4f32 (NEONvmovFPImm timm:$SIMM)))]>;
6067 } // isReMaterializable, isAsCheapAsAMove
6069 // Add support for bytes replication feature, so it could be GAS compatible.
6070 multiclass NEONImmReplicateI8InstAlias<ValueType To> {
6071 // E.g. instructions below:
6072 // "vmov.i32 d0, #0xffffffff"
6073 // "vmov.i32 d0, #0xabababab"
6074 // "vmov.i16 d0, #0xabab"
6075 // are incorrect, but we could deal with such cases.
6076 // For last two instructions, for example, it should emit:
6077 // "vmov.i8 d0, #0xab"
6078 def : NEONInstAlias<"vmov${p}.i" # To.Size # " $Vd, $Vm",
6079 (VMOVv8i8 DPR:$Vd, nImmVMOVIReplicate<i8, To>:$Vm, pred:$p)>;
6080 def : NEONInstAlias<"vmov${p}.i" # To.Size # " $Vd, $Vm",
6081 (VMOVv16i8 QPR:$Vd, nImmVMOVIReplicate<i8, To>:$Vm, pred:$p)>;
6082 // Also add same support for VMVN instructions. So instruction:
6083 // "vmvn.i32 d0, #0xabababab"
6085 // "vmov.i8 d0, #0x54"
6086 def : NEONInstAlias<"vmvn${p}.i" # To.Size # " $Vd, $Vm",
6087 (VMOVv8i8 DPR:$Vd, nImmVINVIReplicate<i8, To>:$Vm, pred:$p)>;
6088 def : NEONInstAlias<"vmvn${p}.i" # To.Size # " $Vd, $Vm",
6089 (VMOVv16i8 QPR:$Vd, nImmVINVIReplicate<i8, To>:$Vm, pred:$p)>;
6092 defm : NEONImmReplicateI8InstAlias<i16>;
6093 defm : NEONImmReplicateI8InstAlias<i32>;
6094 defm : NEONImmReplicateI8InstAlias<i64>;
6096 // Similar to above for types other than i8, e.g.:
6097 // "vmov.i32 d0, #0xab00ab00" -> "vmov.i16 d0, #0xab00"
6098 // "vmvn.i64 q0, #0xab000000ab000000" -> "vmvn.i32 q0, #0xab000000"
6099 // In this case we do not canonicalize VMVN to VMOV
6100 multiclass NEONImmReplicateInstAlias<ValueType From, NeonI V8, NeonI V16,
6101 NeonI NV8, NeonI NV16, ValueType To> {
6102 def : NEONInstAlias<"vmov${p}.i" # To.Size # " $Vd, $Vm",
6103 (V8 DPR:$Vd, nImmVMOVIReplicate<From, To>:$Vm, pred:$p)>;
6104 def : NEONInstAlias<"vmov${p}.i" # To.Size # " $Vd, $Vm",
6105 (V16 QPR:$Vd, nImmVMOVIReplicate<From, To>:$Vm, pred:$p)>;
6106 def : NEONInstAlias<"vmvn${p}.i" # To.Size # " $Vd, $Vm",
6107 (NV8 DPR:$Vd, nImmVMOVIReplicate<From, To>:$Vm, pred:$p)>;
6108 def : NEONInstAlias<"vmvn${p}.i" # To.Size # " $Vd, $Vm",
6109 (NV16 QPR:$Vd, nImmVMOVIReplicate<From, To>:$Vm, pred:$p)>;
6112 defm : NEONImmReplicateInstAlias<i16, VMOVv4i16, VMOVv8i16,
6113 VMVNv4i16, VMVNv8i16, i32>;
6114 defm : NEONImmReplicateInstAlias<i16, VMOVv4i16, VMOVv8i16,
6115 VMVNv4i16, VMVNv8i16, i64>;
6116 defm : NEONImmReplicateInstAlias<i32, VMOVv2i32, VMOVv4i32,
6117 VMVNv2i32, VMVNv4i32, i64>;
6118 // TODO: add "VMOV <-> VMVN" conversion for cases like
6119 // "vmov.i32 d0, #0xffaaffaa" -> "vmvn.i16 d0, #0x55"
6120 // "vmvn.i32 d0, #0xaaffaaff" -> "vmov.i16 d0, #0xff00"
6122 // On some CPUs the two instructions "vmov.i32 dD, #0" and "vmov.i32 qD, #0"
6123 // require zero cycles to execute so they should be used wherever possible for
6124 // setting a register to zero.
6126 // Even without these pseudo-insts we would probably end up with the correct
6127 // instruction, but we could not mark the general ones with "isAsCheapAsAMove"
6128 // since they are sometimes rather expensive (in general).
6130 let AddedComplexity = 50, isAsCheapAsAMove = 1, isReMaterializable = 1 in {
6131 def VMOVD0 : ARMPseudoExpand<(outs DPR:$Vd), (ins), 4, IIC_VMOVImm,
6132 [(set DPR:$Vd, (v2i32 NEONimmAllZerosV))],
6133 (VMOVv2i32 DPR:$Vd, 0, (ops 14, zero_reg))>,
6135 def VMOVQ0 : ARMPseudoExpand<(outs QPR:$Vd), (ins), 4, IIC_VMOVImm,
6136 [(set QPR:$Vd, (v4i32 NEONimmAllZerosV))],
6137 (VMOVv4i32 QPR:$Vd, 0, (ops 14, zero_reg))>,
6141 // VMOV : Vector Get Lane (move scalar to ARM core register)
6143 def VGETLNs8 : NVGetLane<{1,1,1,0,0,1,?,1}, 0b1011, {?,?},
6144 (outs GPR:$R), (ins DPR:$V, VectorIndex8:$lane),
6145 IIC_VMOVSI, "vmov", "s8", "$R, $V$lane",
6146 [(set GPR:$R, (NEONvgetlanes (v8i8 DPR:$V),
6148 let Inst{21} = lane{2};
6149 let Inst{6-5} = lane{1-0};
6151 def VGETLNs16 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, {?,1},
6152 (outs GPR:$R), (ins DPR:$V, VectorIndex16:$lane),
6153 IIC_VMOVSI, "vmov", "s16", "$R, $V$lane",
6154 [(set GPR:$R, (NEONvgetlanes (v4i16 DPR:$V),
6156 let Inst{21} = lane{1};
6157 let Inst{6} = lane{0};
6159 def VGETLNu8 : NVGetLane<{1,1,1,0,1,1,?,1}, 0b1011, {?,?},
6160 (outs GPR:$R), (ins DPR:$V, VectorIndex8:$lane),
6161 IIC_VMOVSI, "vmov", "u8", "$R, $V$lane",
6162 [(set GPR:$R, (NEONvgetlaneu (v8i8 DPR:$V),
6164 let Inst{21} = lane{2};
6165 let Inst{6-5} = lane{1-0};
6167 def VGETLNu16 : NVGetLane<{1,1,1,0,1,0,?,1}, 0b1011, {?,1},
6168 (outs GPR:$R), (ins DPR:$V, VectorIndex16:$lane),
6169 IIC_VMOVSI, "vmov", "u16", "$R, $V$lane",
6170 [(set GPR:$R, (NEONvgetlaneu (v4i16 DPR:$V),
6172 let Inst{21} = lane{1};
6173 let Inst{6} = lane{0};
6175 def VGETLNi32 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, 0b00,
6176 (outs GPR:$R), (ins DPR:$V, VectorIndex32:$lane),
6177 IIC_VMOVSI, "vmov", "32", "$R, $V$lane",
6178 [(set GPR:$R, (extractelt (v2i32 DPR:$V),
6180 Requires<[HasVFP2, HasFastVGETLNi32]> {
6181 let Inst{21} = lane{0};
6183 // def VGETLNf32: see FMRDH and FMRDL in ARMInstrVFP.td
6184 def : Pat<(NEONvgetlanes (v16i8 QPR:$src), imm:$lane),
6185 (VGETLNs8 (v8i8 (EXTRACT_SUBREG QPR:$src,
6186 (DSubReg_i8_reg imm:$lane))),
6187 (SubReg_i8_lane imm:$lane))>;
6188 def : Pat<(NEONvgetlanes (v8i16 QPR:$src), imm:$lane),
6189 (VGETLNs16 (v4i16 (EXTRACT_SUBREG QPR:$src,
6190 (DSubReg_i16_reg imm:$lane))),
6191 (SubReg_i16_lane imm:$lane))>;
6192 def : Pat<(NEONvgetlaneu (v16i8 QPR:$src), imm:$lane),
6193 (VGETLNu8 (v8i8 (EXTRACT_SUBREG QPR:$src,
6194 (DSubReg_i8_reg imm:$lane))),
6195 (SubReg_i8_lane imm:$lane))>;
6196 def : Pat<(NEONvgetlaneu (v8i16 QPR:$src), imm:$lane),
6197 (VGETLNu16 (v4i16 (EXTRACT_SUBREG QPR:$src,
6198 (DSubReg_i16_reg imm:$lane))),
6199 (SubReg_i16_lane imm:$lane))>;
6200 def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane),
6201 (VGETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src,
6202 (DSubReg_i32_reg imm:$lane))),
6203 (SubReg_i32_lane imm:$lane))>,
6204 Requires<[HasNEON, HasFastVGETLNi32]>;
6205 def : Pat<(extractelt (v2i32 DPR:$src), imm:$lane),
6207 (i32 (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane))), GPR)>,
6208 Requires<[HasNEON, HasSlowVGETLNi32]>;
6209 def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane),
6211 (i32 (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane))), GPR)>,
6212 Requires<[HasNEON, HasSlowVGETLNi32]>;
6213 def : Pat<(extractelt (v2f32 DPR:$src1), imm:$src2),
6214 (EXTRACT_SUBREG (v2f32 (COPY_TO_REGCLASS (v2f32 DPR:$src1),DPR_VFP2)),
6215 (SSubReg_f32_reg imm:$src2))>;
6216 def : Pat<(extractelt (v4f32 QPR:$src1), imm:$src2),
6217 (EXTRACT_SUBREG (v4f32 (COPY_TO_REGCLASS (v4f32 QPR:$src1),QPR_VFP2)),
6218 (SSubReg_f32_reg imm:$src2))>;
6219 //def : Pat<(extractelt (v2i64 QPR:$src1), imm:$src2),
6220 // (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>;
6221 def : Pat<(extractelt (v2f64 QPR:$src1), imm:$src2),
6222 (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>;
6225 // VMOV : Vector Set Lane (move ARM core register to scalar)
6227 let Constraints = "$src1 = $V" in {
6228 def VSETLNi8 : NVSetLane<{1,1,1,0,0,1,?,0}, 0b1011, {?,?}, (outs DPR:$V),
6229 (ins DPR:$src1, GPR:$R, VectorIndex8:$lane),
6230 IIC_VMOVISL, "vmov", "8", "$V$lane, $R",
6231 [(set DPR:$V, (vector_insert (v8i8 DPR:$src1),
6232 GPR:$R, imm:$lane))]> {
6233 let Inst{21} = lane{2};
6234 let Inst{6-5} = lane{1-0};
6236 def VSETLNi16 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, {?,1}, (outs DPR:$V),
6237 (ins DPR:$src1, GPR:$R, VectorIndex16:$lane),
6238 IIC_VMOVISL, "vmov", "16", "$V$lane, $R",
6239 [(set DPR:$V, (vector_insert (v4i16 DPR:$src1),
6240 GPR:$R, imm:$lane))]> {
6241 let Inst{21} = lane{1};
6242 let Inst{6} = lane{0};
6244 def VSETLNi32 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, 0b00, (outs DPR:$V),
6245 (ins DPR:$src1, GPR:$R, VectorIndex32:$lane),
6246 IIC_VMOVISL, "vmov", "32", "$V$lane, $R",
6247 [(set DPR:$V, (insertelt (v2i32 DPR:$src1),
6248 GPR:$R, imm:$lane))]>,
6249 Requires<[HasVFP2]> {
6250 let Inst{21} = lane{0};
6251 // This instruction is equivalent as
6252 // $V = INSERT_SUBREG $src1, $R, translateImmToSubIdx($imm)
6253 let isInsertSubreg = 1;
6256 def : Pat<(vector_insert (v16i8 QPR:$src1), GPR:$src2, imm:$lane),
6257 (v16i8 (INSERT_SUBREG QPR:$src1,
6258 (v8i8 (VSETLNi8 (v8i8 (EXTRACT_SUBREG QPR:$src1,
6259 (DSubReg_i8_reg imm:$lane))),
6260 GPR:$src2, (SubReg_i8_lane imm:$lane))),
6261 (DSubReg_i8_reg imm:$lane)))>;
6262 def : Pat<(vector_insert (v8i16 QPR:$src1), GPR:$src2, imm:$lane),
6263 (v8i16 (INSERT_SUBREG QPR:$src1,
6264 (v4i16 (VSETLNi16 (v4i16 (EXTRACT_SUBREG QPR:$src1,
6265 (DSubReg_i16_reg imm:$lane))),
6266 GPR:$src2, (SubReg_i16_lane imm:$lane))),
6267 (DSubReg_i16_reg imm:$lane)))>;
6268 def : Pat<(insertelt (v4i32 QPR:$src1), GPR:$src2, imm:$lane),
6269 (v4i32 (INSERT_SUBREG QPR:$src1,
6270 (v2i32 (VSETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src1,
6271 (DSubReg_i32_reg imm:$lane))),
6272 GPR:$src2, (SubReg_i32_lane imm:$lane))),
6273 (DSubReg_i32_reg imm:$lane)))>;
6275 def : Pat<(v2f32 (insertelt DPR:$src1, SPR:$src2, imm:$src3)),
6276 (INSERT_SUBREG (v2f32 (COPY_TO_REGCLASS DPR:$src1, DPR_VFP2)),
6277 SPR:$src2, (SSubReg_f32_reg imm:$src3))>;
6278 def : Pat<(v4f32 (insertelt QPR:$src1, SPR:$src2, imm:$src3)),
6279 (INSERT_SUBREG (v4f32 (COPY_TO_REGCLASS QPR:$src1, QPR_VFP2)),
6280 SPR:$src2, (SSubReg_f32_reg imm:$src3))>;
6282 //def : Pat<(v2i64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)),
6283 // (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>;
6284 def : Pat<(v2f64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)),
6285 (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>;
6287 def : Pat<(v2f32 (scalar_to_vector SPR:$src)),
6288 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>;
6289 def : Pat<(v2f64 (scalar_to_vector (f64 DPR:$src))),
6290 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), DPR:$src, dsub_0)>;
6291 def : Pat<(v4f32 (scalar_to_vector SPR:$src)),
6292 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>;
6294 def : Pat<(v8i8 (scalar_to_vector GPR:$src)),
6295 (VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0))>;
6296 def : Pat<(v4i16 (scalar_to_vector GPR:$src)),
6297 (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0))>;
6298 def : Pat<(v2i32 (scalar_to_vector GPR:$src)),
6299 (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0))>;
6301 def : Pat<(v16i8 (scalar_to_vector GPR:$src)),
6302 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
6303 (VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0)),
6305 def : Pat<(v8i16 (scalar_to_vector GPR:$src)),
6306 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)),
6307 (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0)),
6309 def : Pat<(v4i32 (scalar_to_vector GPR:$src)),
6310 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
6311 (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0)),
6314 // VDUP : Vector Duplicate (from ARM core register to all elements)
6316 class VDUPD<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty>
6317 : NVDup<opcod1, 0b1011, opcod3, (outs DPR:$V), (ins GPR:$R),
6318 IIC_VMOVIS, "vdup", Dt, "$V, $R",
6319 [(set DPR:$V, (Ty (NEONvdup (i32 GPR:$R))))]>;
6320 class VDUPQ<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty>
6321 : NVDup<opcod1, 0b1011, opcod3, (outs QPR:$V), (ins GPR:$R),
6322 IIC_VMOVIS, "vdup", Dt, "$V, $R",
6323 [(set QPR:$V, (Ty (NEONvdup (i32 GPR:$R))))]>;
6325 def VDUP8d : VDUPD<0b11101100, 0b00, "8", v8i8>;
6326 def VDUP16d : VDUPD<0b11101000, 0b01, "16", v4i16>;
6327 def VDUP32d : VDUPD<0b11101000, 0b00, "32", v2i32>,
6328 Requires<[HasNEON, HasFastVDUP32]>;
6329 def VDUP8q : VDUPQ<0b11101110, 0b00, "8", v16i8>;
6330 def VDUP16q : VDUPQ<0b11101010, 0b01, "16", v8i16>;
6331 def VDUP32q : VDUPQ<0b11101010, 0b00, "32", v4i32>;
6333 // NEONvdup patterns for uarchs with fast VDUP.32.
6334 def : Pat<(v2f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VDUP32d GPR:$R)>,
6335 Requires<[HasNEON,HasFastVDUP32]>;
6336 def : Pat<(v4f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VDUP32q GPR:$R)>;
6338 // NEONvdup patterns for uarchs with slow VDUP.32 - use VMOVDRR instead.
6339 def : Pat<(v2i32 (NEONvdup (i32 GPR:$R))), (VMOVDRR GPR:$R, GPR:$R)>,
6340 Requires<[HasNEON,HasSlowVDUP32]>;
6341 def : Pat<(v2f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VMOVDRR GPR:$R, GPR:$R)>,
6342 Requires<[HasNEON,HasSlowVDUP32]>;
6344 // VDUP : Vector Duplicate Lane (from scalar to all elements)
6346 class VDUPLND<bits<4> op19_16, string OpcodeStr, string Dt,
6347 ValueType Ty, Operand IdxTy>
6348 : NVDupLane<op19_16, 0, (outs DPR:$Vd), (ins DPR:$Vm, IdxTy:$lane),
6349 IIC_VMOVD, OpcodeStr, Dt, "$Vd, $Vm$lane",
6350 [(set DPR:$Vd, (Ty (NEONvduplane (Ty DPR:$Vm), imm:$lane)))]>;
6352 class VDUPLNQ<bits<4> op19_16, string OpcodeStr, string Dt,
6353 ValueType ResTy, ValueType OpTy, Operand IdxTy>
6354 : NVDupLane<op19_16, 1, (outs QPR:$Vd), (ins DPR:$Vm, IdxTy:$lane),
6355 IIC_VMOVQ, OpcodeStr, Dt, "$Vd, $Vm$lane",
6356 [(set QPR:$Vd, (ResTy (NEONvduplane (OpTy DPR:$Vm),
6357 VectorIndex32:$lane)))]>;
6359 // Inst{19-16} is partially specified depending on the element size.
6361 def VDUPLN8d : VDUPLND<{?,?,?,1}, "vdup", "8", v8i8, VectorIndex8> {
6363 let Inst{19-17} = lane{2-0};
6365 def VDUPLN16d : VDUPLND<{?,?,1,0}, "vdup", "16", v4i16, VectorIndex16> {
6367 let Inst{19-18} = lane{1-0};
6369 def VDUPLN32d : VDUPLND<{?,1,0,0}, "vdup", "32", v2i32, VectorIndex32> {
6371 let Inst{19} = lane{0};
6373 def VDUPLN8q : VDUPLNQ<{?,?,?,1}, "vdup", "8", v16i8, v8i8, VectorIndex8> {
6375 let Inst{19-17} = lane{2-0};
6377 def VDUPLN16q : VDUPLNQ<{?,?,1,0}, "vdup", "16", v8i16, v4i16, VectorIndex16> {
6379 let Inst{19-18} = lane{1-0};
6381 def VDUPLN32q : VDUPLNQ<{?,1,0,0}, "vdup", "32", v4i32, v2i32, VectorIndex32> {
6383 let Inst{19} = lane{0};
6386 def : Pat<(v4f16 (NEONvduplane (v4f16 DPR:$Vm), imm:$lane)),
6387 (VDUPLN32d DPR:$Vm, imm:$lane)>;
6389 def : Pat<(v2f32 (NEONvduplane (v2f32 DPR:$Vm), imm:$lane)),
6390 (VDUPLN32d DPR:$Vm, imm:$lane)>;
6392 def : Pat<(v4f32 (NEONvduplane (v2f32 DPR:$Vm), imm:$lane)),
6393 (VDUPLN32q DPR:$Vm, imm:$lane)>;
6395 def : Pat<(v16i8 (NEONvduplane (v16i8 QPR:$src), imm:$lane)),
6396 (v16i8 (VDUPLN8q (v8i8 (EXTRACT_SUBREG QPR:$src,
6397 (DSubReg_i8_reg imm:$lane))),
6398 (SubReg_i8_lane imm:$lane)))>;
6399 def : Pat<(v8i16 (NEONvduplane (v8i16 QPR:$src), imm:$lane)),
6400 (v8i16 (VDUPLN16q (v4i16 (EXTRACT_SUBREG QPR:$src,
6401 (DSubReg_i16_reg imm:$lane))),
6402 (SubReg_i16_lane imm:$lane)))>;
6403 def : Pat<(v8f16 (NEONvduplane (v8f16 QPR:$src), imm:$lane)),
6404 (v8f16 (VDUPLN16q (v4f16 (EXTRACT_SUBREG QPR:$src,
6405 (DSubReg_i16_reg imm:$lane))),
6406 (SubReg_i16_lane imm:$lane)))>;
6407 def : Pat<(v4i32 (NEONvduplane (v4i32 QPR:$src), imm:$lane)),
6408 (v4i32 (VDUPLN32q (v2i32 (EXTRACT_SUBREG QPR:$src,
6409 (DSubReg_i32_reg imm:$lane))),
6410 (SubReg_i32_lane imm:$lane)))>;
6411 def : Pat<(v4f32 (NEONvduplane (v4f32 QPR:$src), imm:$lane)),
6412 (v4f32 (VDUPLN32q (v2f32 (EXTRACT_SUBREG QPR:$src,
6413 (DSubReg_i32_reg imm:$lane))),
6414 (SubReg_i32_lane imm:$lane)))>;
6416 def : Pat<(v4f16 (NEONvdup HPR:$src)),
6417 (v4f16 (VDUPLN16d (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)),
6418 HPR:$src, ssub_0), (i32 0)))>;
6419 def : Pat<(v2f32 (NEONvdup (f32 SPR:$src))),
6420 (v2f32 (VDUPLN32d (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
6421 SPR:$src, ssub_0), (i32 0)))>;
6422 def : Pat<(v4f32 (NEONvdup (f32 SPR:$src))),
6423 (v4f32 (VDUPLN32q (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
6424 SPR:$src, ssub_0), (i32 0)))>;
6425 def : Pat<(v8f16 (NEONvdup HPR:$src)),
6426 (v8f16 (VDUPLN16q (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)),
6427 HPR:$src, ssub_0), (i32 0)))>;
6429 // VMOVN : Vector Narrowing Move
6430 defm VMOVN : N2VN_HSD<0b11,0b11,0b10,0b00100,0,0, IIC_VMOVN,
6431 "vmovn", "i", trunc>;
6432 // VQMOVN : Vector Saturating Narrowing Move
6433 defm VQMOVNs : N2VNInt_HSD<0b11,0b11,0b10,0b00101,0,0, IIC_VQUNAiD,
6434 "vqmovn", "s", int_arm_neon_vqmovns>;
6435 defm VQMOVNu : N2VNInt_HSD<0b11,0b11,0b10,0b00101,1,0, IIC_VQUNAiD,
6436 "vqmovn", "u", int_arm_neon_vqmovnu>;
6437 defm VQMOVNsu : N2VNInt_HSD<0b11,0b11,0b10,0b00100,1,0, IIC_VQUNAiD,
6438 "vqmovun", "s", int_arm_neon_vqmovnsu>;
6439 // VMOVL : Vector Lengthening Move
6440 defm VMOVLs : N2VL_QHS<0b01,0b10100,0,1, "vmovl", "s", sext>;
6441 defm VMOVLu : N2VL_QHS<0b11,0b10100,0,1, "vmovl", "u", zext>;
6442 def : Pat<(v8i16 (anyext (v8i8 DPR:$Vm))), (VMOVLuv8i16 DPR:$Vm)>;
6443 def : Pat<(v4i32 (anyext (v4i16 DPR:$Vm))), (VMOVLuv4i32 DPR:$Vm)>;
6444 def : Pat<(v2i64 (anyext (v2i32 DPR:$Vm))), (VMOVLuv2i64 DPR:$Vm)>;
6446 // Vector Conversions.
6448 // VCVT : Vector Convert Between Floating-Point and Integers
6449 def VCVTf2sd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32",
6450 v2i32, v2f32, fp_to_sint>;
6451 def VCVTf2ud : N2VD<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32",
6452 v2i32, v2f32, fp_to_uint>;
6453 def VCVTs2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32",
6454 v2f32, v2i32, sint_to_fp>;
6455 def VCVTu2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32",
6456 v2f32, v2i32, uint_to_fp>;
6458 def VCVTf2sq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32",
6459 v4i32, v4f32, fp_to_sint>;
6460 def VCVTf2uq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32",
6461 v4i32, v4f32, fp_to_uint>;
6462 def VCVTs2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32",
6463 v4f32, v4i32, sint_to_fp>;
6464 def VCVTu2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32",
6465 v4f32, v4i32, uint_to_fp>;
6467 def VCVTh2sd : N2VD<0b11, 0b11, 0b01, 0b11, 0b01110, 0, "vcvt", "s16.f16",
6468 v4i16, v4f16, fp_to_sint>,
6469 Requires<[HasNEON, HasFullFP16]>;
6470 def VCVTh2ud : N2VD<0b11, 0b11, 0b01, 0b11, 0b01111, 0, "vcvt", "u16.f16",
6471 v4i16, v4f16, fp_to_uint>,
6472 Requires<[HasNEON, HasFullFP16]>;
6473 def VCVTs2hd : N2VD<0b11, 0b11, 0b01, 0b11, 0b01100, 0, "vcvt", "f16.s16",
6474 v4f16, v4i16, sint_to_fp>,
6475 Requires<[HasNEON, HasFullFP16]>;
6476 def VCVTu2hd : N2VD<0b11, 0b11, 0b01, 0b11, 0b01101, 0, "vcvt", "f16.u16",
6477 v4f16, v4i16, uint_to_fp>,
6478 Requires<[HasNEON, HasFullFP16]>;
6480 def VCVTh2sq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01110, 0, "vcvt", "s16.f16",
6481 v8i16, v8f16, fp_to_sint>,
6482 Requires<[HasNEON, HasFullFP16]>;
6483 def VCVTh2uq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01111, 0, "vcvt", "u16.f16",
6484 v8i16, v8f16, fp_to_uint>,
6485 Requires<[HasNEON, HasFullFP16]>;
6486 def VCVTs2hq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01100, 0, "vcvt", "f16.s16",
6487 v8f16, v8i16, sint_to_fp>,
6488 Requires<[HasNEON, HasFullFP16]>;
6489 def VCVTu2hq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01101, 0, "vcvt", "f16.u16",
6490 v8f16, v8i16, uint_to_fp>,
6491 Requires<[HasNEON, HasFullFP16]>;
6494 multiclass VCVT_FPI<string op, bits<3> op10_8, SDPatternOperator IntS,
6495 SDPatternOperator IntU> {
6496 let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in {
6497 def SDf : N2VDIntnp<0b10, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op),
6498 "s32.f32", v2i32, v2f32, IntS>, Requires<[HasV8, HasNEON]>;
6499 def SQf : N2VQIntnp<0b10, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op),
6500 "s32.f32", v4i32, v4f32, IntS>, Requires<[HasV8, HasNEON]>;
6501 def UDf : N2VDIntnp<0b10, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op),
6502 "u32.f32", v2i32, v2f32, IntU>, Requires<[HasV8, HasNEON]>;
6503 def UQf : N2VQIntnp<0b10, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op),
6504 "u32.f32", v4i32, v4f32, IntU>, Requires<[HasV8, HasNEON]>;
6505 def SDh : N2VDIntnp<0b01, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op),
6506 "s16.f16", v4i16, v4f16, IntS>,
6507 Requires<[HasV8, HasNEON, HasFullFP16]>;
6508 def SQh : N2VQIntnp<0b01, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op),
6509 "s16.f16", v8i16, v8f16, IntS>,
6510 Requires<[HasV8, HasNEON, HasFullFP16]>;
6511 def UDh : N2VDIntnp<0b01, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op),
6512 "u16.f16", v4i16, v4f16, IntU>,
6513 Requires<[HasV8, HasNEON, HasFullFP16]>;
6514 def UQh : N2VQIntnp<0b01, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op),
6515 "u16.f16", v8i16, v8f16, IntU>,
6516 Requires<[HasV8, HasNEON, HasFullFP16]>;
6520 defm VCVTAN : VCVT_FPI<"a", 0b000, int_arm_neon_vcvtas, int_arm_neon_vcvtau>;
6521 defm VCVTNN : VCVT_FPI<"n", 0b001, int_arm_neon_vcvtns, int_arm_neon_vcvtnu>;
6522 defm VCVTPN : VCVT_FPI<"p", 0b010, int_arm_neon_vcvtps, int_arm_neon_vcvtpu>;
6523 defm VCVTMN : VCVT_FPI<"m", 0b011, int_arm_neon_vcvtms, int_arm_neon_vcvtmu>;
6525 // VCVT : Vector Convert Between Floating-Point and Fixed-Point.
6526 let DecoderMethod = "DecodeVCVTD" in {
6527 def VCVTf2xsd : N2VCvtD<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32",
6528 v2i32, v2f32, int_arm_neon_vcvtfp2fxs>;
6529 def VCVTf2xud : N2VCvtD<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32",
6530 v2i32, v2f32, int_arm_neon_vcvtfp2fxu>;
6531 def VCVTxs2fd : N2VCvtD<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32",
6532 v2f32, v2i32, int_arm_neon_vcvtfxs2fp>;
6533 def VCVTxu2fd : N2VCvtD<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32",
6534 v2f32, v2i32, int_arm_neon_vcvtfxu2fp>;
6535 let Predicates = [HasNEON, HasFullFP16] in {
6536 def VCVTh2xsd : N2VCvtD<0, 1, 0b1101, 0, 1, "vcvt", "s16.f16",
6537 v4i16, v4f16, int_arm_neon_vcvtfp2fxs>;
6538 def VCVTh2xud : N2VCvtD<1, 1, 0b1101, 0, 1, "vcvt", "u16.f16",
6539 v4i16, v4f16, int_arm_neon_vcvtfp2fxu>;
6540 def VCVTxs2hd : N2VCvtD<0, 1, 0b1100, 0, 1, "vcvt", "f16.s16",
6541 v4f16, v4i16, int_arm_neon_vcvtfxs2fp>;
6542 def VCVTxu2hd : N2VCvtD<1, 1, 0b1100, 0, 1, "vcvt", "f16.u16",
6543 v4f16, v4i16, int_arm_neon_vcvtfxu2fp>;
6544 } // Predicates = [HasNEON, HasFullFP16]
6547 let DecoderMethod = "DecodeVCVTQ" in {
6548 def VCVTf2xsq : N2VCvtQ<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32",
6549 v4i32, v4f32, int_arm_neon_vcvtfp2fxs>;
6550 def VCVTf2xuq : N2VCvtQ<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32",
6551 v4i32, v4f32, int_arm_neon_vcvtfp2fxu>;
6552 def VCVTxs2fq : N2VCvtQ<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32",
6553 v4f32, v4i32, int_arm_neon_vcvtfxs2fp>;
6554 def VCVTxu2fq : N2VCvtQ<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32",
6555 v4f32, v4i32, int_arm_neon_vcvtfxu2fp>;
6556 let Predicates = [HasNEON, HasFullFP16] in {
6557 def VCVTh2xsq : N2VCvtQ<0, 1, 0b1101, 0, 1, "vcvt", "s16.f16",
6558 v8i16, v8f16, int_arm_neon_vcvtfp2fxs>;
6559 def VCVTh2xuq : N2VCvtQ<1, 1, 0b1101, 0, 1, "vcvt", "u16.f16",
6560 v8i16, v8f16, int_arm_neon_vcvtfp2fxu>;
6561 def VCVTxs2hq : N2VCvtQ<0, 1, 0b1100, 0, 1, "vcvt", "f16.s16",
6562 v8f16, v8i16, int_arm_neon_vcvtfxs2fp>;
6563 def VCVTxu2hq : N2VCvtQ<1, 1, 0b1100, 0, 1, "vcvt", "f16.u16",
6564 v8f16, v8i16, int_arm_neon_vcvtfxu2fp>;
6565 } // Predicates = [HasNEON, HasFullFP16]
6568 def : NEONInstAlias<"vcvt${p}.s32.f32 $Dd, $Dm, #0",
6569 (VCVTf2sd DPR:$Dd, DPR:$Dm, pred:$p)>;
6570 def : NEONInstAlias<"vcvt${p}.u32.f32 $Dd, $Dm, #0",
6571 (VCVTf2ud DPR:$Dd, DPR:$Dm, pred:$p)>;
6572 def : NEONInstAlias<"vcvt${p}.f32.s32 $Dd, $Dm, #0",
6573 (VCVTs2fd DPR:$Dd, DPR:$Dm, pred:$p)>;
6574 def : NEONInstAlias<"vcvt${p}.f32.u32 $Dd, $Dm, #0",
6575 (VCVTu2fd DPR:$Dd, DPR:$Dm, pred:$p)>;
6577 def : NEONInstAlias<"vcvt${p}.s32.f32 $Qd, $Qm, #0",
6578 (VCVTf2sq QPR:$Qd, QPR:$Qm, pred:$p)>;
6579 def : NEONInstAlias<"vcvt${p}.u32.f32 $Qd, $Qm, #0",
6580 (VCVTf2uq QPR:$Qd, QPR:$Qm, pred:$p)>;
6581 def : NEONInstAlias<"vcvt${p}.f32.s32 $Qd, $Qm, #0",
6582 (VCVTs2fq QPR:$Qd, QPR:$Qm, pred:$p)>;
6583 def : NEONInstAlias<"vcvt${p}.f32.u32 $Qd, $Qm, #0",
6584 (VCVTu2fq QPR:$Qd, QPR:$Qm, pred:$p)>;
6586 def : NEONInstAlias<"vcvt${p}.s16.f16 $Dd, $Dm, #0",
6587 (VCVTh2sd DPR:$Dd, DPR:$Dm, pred:$p)>;
6588 def : NEONInstAlias<"vcvt${p}.u16.f16 $Dd, $Dm, #0",
6589 (VCVTh2ud DPR:$Dd, DPR:$Dm, pred:$p)>;
6590 def : NEONInstAlias<"vcvt${p}.f16.s16 $Dd, $Dm, #0",
6591 (VCVTs2hd DPR:$Dd, DPR:$Dm, pred:$p)>;
6592 def : NEONInstAlias<"vcvt${p}.f16.u16 $Dd, $Dm, #0",
6593 (VCVTu2hd DPR:$Dd, DPR:$Dm, pred:$p)>;
6595 def : NEONInstAlias<"vcvt${p}.s16.f16 $Qd, $Qm, #0",
6596 (VCVTh2sq QPR:$Qd, QPR:$Qm, pred:$p)>;
6597 def : NEONInstAlias<"vcvt${p}.u16.f16 $Qd, $Qm, #0",
6598 (VCVTh2uq QPR:$Qd, QPR:$Qm, pred:$p)>;
6599 def : NEONInstAlias<"vcvt${p}.f16.s16 $Qd, $Qm, #0",
6600 (VCVTs2hq QPR:$Qd, QPR:$Qm, pred:$p)>;
6601 def : NEONInstAlias<"vcvt${p}.f16.u16 $Qd, $Qm, #0",
6602 (VCVTu2hq QPR:$Qd, QPR:$Qm, pred:$p)>;
6605 // VCVT : Vector Convert Between Half-Precision and Single-Precision.
6606 def VCVTf2h : N2VNInt<0b11, 0b11, 0b01, 0b10, 0b01100, 0, 0,
6607 IIC_VUNAQ, "vcvt", "f16.f32",
6608 v4i16, v4f32, int_arm_neon_vcvtfp2hf>,
6609 Requires<[HasNEON, HasFP16]>;
6610 def VCVTh2f : N2VLInt<0b11, 0b11, 0b01, 0b10, 0b01110, 0, 0,
6611 IIC_VUNAQ, "vcvt", "f32.f16",
6612 v4f32, v4i16, int_arm_neon_vcvthf2fp>,
6613 Requires<[HasNEON, HasFP16]>;
6617 // VREV64 : Vector Reverse elements within 64-bit doublewords
6619 class VREV64D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
6620 : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 0, 0, (outs DPR:$Vd),
6621 (ins DPR:$Vm), IIC_VMOVD,
6622 OpcodeStr, Dt, "$Vd, $Vm", "",
6623 [(set DPR:$Vd, (Ty (NEONvrev64 (Ty DPR:$Vm))))]>;
6624 class VREV64Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
6625 : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 1, 0, (outs QPR:$Vd),
6626 (ins QPR:$Vm), IIC_VMOVQ,
6627 OpcodeStr, Dt, "$Vd, $Vm", "",
6628 [(set QPR:$Vd, (Ty (NEONvrev64 (Ty QPR:$Vm))))]>;
6630 def VREV64d8 : VREV64D<0b00, "vrev64", "8", v8i8>;
6631 def VREV64d16 : VREV64D<0b01, "vrev64", "16", v4i16>;
6632 def VREV64d32 : VREV64D<0b10, "vrev64", "32", v2i32>;
6633 def : Pat<(v2f32 (NEONvrev64 (v2f32 DPR:$Vm))), (VREV64d32 DPR:$Vm)>;
6635 def VREV64q8 : VREV64Q<0b00, "vrev64", "8", v16i8>;
6636 def VREV64q16 : VREV64Q<0b01, "vrev64", "16", v8i16>;
6637 def VREV64q32 : VREV64Q<0b10, "vrev64", "32", v4i32>;
6638 def : Pat<(v4f32 (NEONvrev64 (v4f32 QPR:$Vm))), (VREV64q32 QPR:$Vm)>;
6639 def : Pat<(v8f16 (NEONvrev64 (v8f16 QPR:$Vm))), (VREV64q16 QPR:$Vm)>;
6640 def : Pat<(v4f16 (NEONvrev64 (v4f16 DPR:$Vm))), (VREV64d16 DPR:$Vm)>;
6642 // VREV32 : Vector Reverse elements within 32-bit words
6644 class VREV32D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
6645 : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 0, 0, (outs DPR:$Vd),
6646 (ins DPR:$Vm), IIC_VMOVD,
6647 OpcodeStr, Dt, "$Vd, $Vm", "",
6648 [(set DPR:$Vd, (Ty (NEONvrev32 (Ty DPR:$Vm))))]>;
6649 class VREV32Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
6650 : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 1, 0, (outs QPR:$Vd),
6651 (ins QPR:$Vm), IIC_VMOVQ,
6652 OpcodeStr, Dt, "$Vd, $Vm", "",
6653 [(set QPR:$Vd, (Ty (NEONvrev32 (Ty QPR:$Vm))))]>;
6655 def VREV32d8 : VREV32D<0b00, "vrev32", "8", v8i8>;
6656 def VREV32d16 : VREV32D<0b01, "vrev32", "16", v4i16>;
6658 def VREV32q8 : VREV32Q<0b00, "vrev32", "8", v16i8>;
6659 def VREV32q16 : VREV32Q<0b01, "vrev32", "16", v8i16>;
6661 // VREV16 : Vector Reverse elements within 16-bit halfwords
6663 class VREV16D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
6664 : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 0, 0, (outs DPR:$Vd),
6665 (ins DPR:$Vm), IIC_VMOVD,
6666 OpcodeStr, Dt, "$Vd, $Vm", "",
6667 [(set DPR:$Vd, (Ty (NEONvrev16 (Ty DPR:$Vm))))]>;
6668 class VREV16Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
6669 : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 1, 0, (outs QPR:$Vd),
6670 (ins QPR:$Vm), IIC_VMOVQ,
6671 OpcodeStr, Dt, "$Vd, $Vm", "",
6672 [(set QPR:$Vd, (Ty (NEONvrev16 (Ty QPR:$Vm))))]>;
6674 def VREV16d8 : VREV16D<0b00, "vrev16", "8", v8i8>;
6675 def VREV16q8 : VREV16Q<0b00, "vrev16", "8", v16i8>;
6677 // Other Vector Shuffles.
6679 // Aligned extractions: really just dropping registers
6681 class AlignedVEXTq<ValueType DestTy, ValueType SrcTy, SDNodeXForm LaneCVT>
6682 : Pat<(DestTy (vector_extract_subvec (SrcTy QPR:$src), (i32 imm:$start))),
6683 (EXTRACT_SUBREG (SrcTy QPR:$src), (LaneCVT imm:$start))>;
6685 def : AlignedVEXTq<v8i8, v16i8, DSubReg_i8_reg>;
6687 def : AlignedVEXTq<v4i16, v8i16, DSubReg_i16_reg>;
6689 def : AlignedVEXTq<v2i32, v4i32, DSubReg_i32_reg>;
6691 def : AlignedVEXTq<v1i64, v2i64, DSubReg_f64_reg>;
6693 def : AlignedVEXTq<v2f32, v4f32, DSubReg_i32_reg>;
6696 // VEXT : Vector Extract
6699 // All of these have a two-operand InstAlias.
6700 let TwoOperandAliasConstraint = "$Vn = $Vd" in {
6701 class VEXTd<string OpcodeStr, string Dt, ValueType Ty, Operand immTy>
6702 : N3V<0,1,0b11,{?,?,?,?},0,0, (outs DPR:$Vd),
6703 (ins DPR:$Vn, DPR:$Vm, immTy:$index), NVExtFrm,
6704 IIC_VEXTD, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "",
6705 [(set DPR:$Vd, (Ty (NEONvext (Ty DPR:$Vn),
6706 (Ty DPR:$Vm), imm:$index)))]> {
6709 let Inst{10-8} = index{2-0};
6712 class VEXTq<string OpcodeStr, string Dt, ValueType Ty, Operand immTy>
6713 : N3V<0,1,0b11,{?,?,?,?},1,0, (outs QPR:$Vd),
6714 (ins QPR:$Vn, QPR:$Vm, imm0_15:$index), NVExtFrm,
6715 IIC_VEXTQ, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "",
6716 [(set QPR:$Vd, (Ty (NEONvext (Ty QPR:$Vn),
6717 (Ty QPR:$Vm), imm:$index)))]> {
6719 let Inst{11-8} = index{3-0};
6723 def VEXTd8 : VEXTd<"vext", "8", v8i8, imm0_7> {
6724 let Inst{10-8} = index{2-0};
6726 def VEXTd16 : VEXTd<"vext", "16", v4i16, imm0_3> {
6727 let Inst{10-9} = index{1-0};
6730 def : Pat<(v4f16 (NEONvext (v4f16 DPR:$Vn), (v4f16 DPR:$Vm), (i32 imm:$index))),
6731 (VEXTd16 DPR:$Vn, DPR:$Vm, imm:$index)>;
6733 def VEXTd32 : VEXTd<"vext", "32", v2i32, imm0_1> {
6734 let Inst{10} = index{0};
6735 let Inst{9-8} = 0b00;
6737 def : Pat<(v2f32 (NEONvext (v2f32 DPR:$Vn), (v2f32 DPR:$Vm), (i32 imm:$index))),
6738 (VEXTd32 DPR:$Vn, DPR:$Vm, imm:$index)>;
6740 def VEXTq8 : VEXTq<"vext", "8", v16i8, imm0_15> {
6741 let Inst{11-8} = index{3-0};
6743 def VEXTq16 : VEXTq<"vext", "16", v8i16, imm0_7> {
6744 let Inst{11-9} = index{2-0};
6747 def : Pat<(v8f16 (NEONvext (v8f16 QPR:$Vn), (v8f16 QPR:$Vm), (i32 imm:$index))),
6748 (VEXTq16 QPR:$Vn, QPR:$Vm, imm:$index)>;
6750 def VEXTq32 : VEXTq<"vext", "32", v4i32, imm0_3> {
6751 let Inst{11-10} = index{1-0};
6752 let Inst{9-8} = 0b00;
6754 def VEXTq64 : VEXTq<"vext", "64", v2i64, imm0_1> {
6755 let Inst{11} = index{0};
6756 let Inst{10-8} = 0b000;
6758 def : Pat<(v4f32 (NEONvext (v4f32 QPR:$Vn), (v4f32 QPR:$Vm), (i32 imm:$index))),
6759 (VEXTq32 QPR:$Vn, QPR:$Vm, imm:$index)>;
6761 // VTRN : Vector Transpose
6763 def VTRNd8 : N2VDShuffle<0b00, 0b00001, "vtrn", "8">;
6764 def VTRNd16 : N2VDShuffle<0b01, 0b00001, "vtrn", "16">;
6765 def VTRNd32 : N2VDShuffle<0b10, 0b00001, "vtrn", "32">;
6767 def VTRNq8 : N2VQShuffle<0b00, 0b00001, IIC_VPERMQ, "vtrn", "8">;
6768 def VTRNq16 : N2VQShuffle<0b01, 0b00001, IIC_VPERMQ, "vtrn", "16">;
6769 def VTRNq32 : N2VQShuffle<0b10, 0b00001, IIC_VPERMQ, "vtrn", "32">;
6771 // VUZP : Vector Unzip (Deinterleave)
6773 def VUZPd8 : N2VDShuffle<0b00, 0b00010, "vuzp", "8">;
6774 def VUZPd16 : N2VDShuffle<0b01, 0b00010, "vuzp", "16">;
6775 // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
6776 def : NEONInstAlias<"vuzp${p}.32 $Dd, $Dm",
6777 (VTRNd32 DPR:$Dd, DPR:$Dm, pred:$p)>;
6779 def VUZPq8 : N2VQShuffle<0b00, 0b00010, IIC_VPERMQ3, "vuzp", "8">;
6780 def VUZPq16 : N2VQShuffle<0b01, 0b00010, IIC_VPERMQ3, "vuzp", "16">;
6781 def VUZPq32 : N2VQShuffle<0b10, 0b00010, IIC_VPERMQ3, "vuzp", "32">;
6783 // VZIP : Vector Zip (Interleave)
6785 def VZIPd8 : N2VDShuffle<0b00, 0b00011, "vzip", "8">;
6786 def VZIPd16 : N2VDShuffle<0b01, 0b00011, "vzip", "16">;
6787 // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
6788 def : NEONInstAlias<"vzip${p}.32 $Dd, $Dm",
6789 (VTRNd32 DPR:$Dd, DPR:$Dm, pred:$p)>;
6791 def VZIPq8 : N2VQShuffle<0b00, 0b00011, IIC_VPERMQ3, "vzip", "8">;
6792 def VZIPq16 : N2VQShuffle<0b01, 0b00011, IIC_VPERMQ3, "vzip", "16">;
6793 def VZIPq32 : N2VQShuffle<0b10, 0b00011, IIC_VPERMQ3, "vzip", "32">;
6795 // Vector Table Lookup and Table Extension.
6797 // VTBL : Vector Table Lookup
6798 let DecoderMethod = "DecodeTBLInstruction" in {
6800 : N3V<1,1,0b11,0b1000,0,0, (outs DPR:$Vd),
6801 (ins VecListOneD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB1,
6802 "vtbl", "8", "$Vd, $Vn, $Vm", "",
6803 [(set DPR:$Vd, (v8i8 (NEONvtbl1 VecListOneD:$Vn, DPR:$Vm)))]>;
6805 let hasExtraSrcRegAllocReq = 1 in {
6807 : N3V<1,1,0b11,0b1001,0,0, (outs DPR:$Vd),
6808 (ins VecListDPair:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB2,
6809 "vtbl", "8", "$Vd, $Vn, $Vm", "", []>;
6811 : N3V<1,1,0b11,0b1010,0,0, (outs DPR:$Vd),
6812 (ins VecListThreeD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB3,
6813 "vtbl", "8", "$Vd, $Vn, $Vm", "", []>;
6815 : N3V<1,1,0b11,0b1011,0,0, (outs DPR:$Vd),
6816 (ins VecListFourD:$Vn, DPR:$Vm),
6818 "vtbl", "8", "$Vd, $Vn, $Vm", "", []>;
6819 } // hasExtraSrcRegAllocReq = 1
6822 : PseudoNeonI<(outs DPR:$dst), (ins QQPR:$tbl, DPR:$src), IIC_VTB3, "", []>;
6824 : PseudoNeonI<(outs DPR:$dst), (ins QQPR:$tbl, DPR:$src), IIC_VTB4, "", []>;
6826 // VTBX : Vector Table Extension
6828 : N3V<1,1,0b11,0b1000,1,0, (outs DPR:$Vd),
6829 (ins DPR:$orig, VecListOneD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX1,
6830 "vtbx", "8", "$Vd, $Vn, $Vm", "$orig = $Vd",
6831 [(set DPR:$Vd, (v8i8 (int_arm_neon_vtbx1
6832 DPR:$orig, VecListOneD:$Vn, DPR:$Vm)))]>;
6833 let hasExtraSrcRegAllocReq = 1 in {
6835 : N3V<1,1,0b11,0b1001,1,0, (outs DPR:$Vd),
6836 (ins DPR:$orig, VecListDPair:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX2,
6837 "vtbx", "8", "$Vd, $Vn, $Vm", "$orig = $Vd", []>;
6839 : N3V<1,1,0b11,0b1010,1,0, (outs DPR:$Vd),
6840 (ins DPR:$orig, VecListThreeD:$Vn, DPR:$Vm),
6841 NVTBLFrm, IIC_VTBX3,
6842 "vtbx", "8", "$Vd, $Vn, $Vm",
6845 : N3V<1,1,0b11,0b1011,1,0, (outs DPR:$Vd),
6846 (ins DPR:$orig, VecListFourD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX4,
6847 "vtbx", "8", "$Vd, $Vn, $Vm",
6849 } // hasExtraSrcRegAllocReq = 1
6852 : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src),
6853 IIC_VTBX3, "$orig = $dst", []>;
6855 : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src),
6856 IIC_VTBX4, "$orig = $dst", []>;
6857 } // DecoderMethod = "DecodeTBLInstruction"
6859 def : Pat<(v8i8 (NEONvtbl2 v8i8:$Vn0, v8i8:$Vn1, v8i8:$Vm)),
6860 (v8i8 (VTBL2 (REG_SEQUENCE DPair, v8i8:$Vn0, dsub_0,
6863 def : Pat<(v8i8 (int_arm_neon_vtbx2 v8i8:$orig, v8i8:$Vn0, v8i8:$Vn1,
6865 (v8i8 (VTBX2 v8i8:$orig,
6866 (REG_SEQUENCE DPair, v8i8:$Vn0, dsub_0,
6870 def : Pat<(v8i8 (int_arm_neon_vtbl3 v8i8:$Vn0, v8i8:$Vn1,
6871 v8i8:$Vn2, v8i8:$Vm)),
6872 (v8i8 (VTBL3Pseudo (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0,
6875 (v8i8 (IMPLICIT_DEF)), dsub_3),
6877 def : Pat<(v8i8 (int_arm_neon_vtbx3 v8i8:$orig, v8i8:$Vn0, v8i8:$Vn1,
6878 v8i8:$Vn2, v8i8:$Vm)),
6879 (v8i8 (VTBX3Pseudo v8i8:$orig,
6880 (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0,
6883 (v8i8 (IMPLICIT_DEF)), dsub_3),
6886 def : Pat<(v8i8 (int_arm_neon_vtbl4 v8i8:$Vn0, v8i8:$Vn1,
6887 v8i8:$Vn2, v8i8:$Vn3, v8i8:$Vm)),
6888 (v8i8 (VTBL4Pseudo (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0,
6893 def : Pat<(v8i8 (int_arm_neon_vtbx4 v8i8:$orig, v8i8:$Vn0, v8i8:$Vn1,
6894 v8i8:$Vn2, v8i8:$Vn3, v8i8:$Vm)),
6895 (v8i8 (VTBX4Pseudo v8i8:$orig,
6896 (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0,
6902 // VRINT : Vector Rounding
6903 multiclass VRINT_FPI<string op, bits<3> op9_7, SDPatternOperator Int> {
6904 let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in {
6905 def Df : N2VDIntnp<0b10, 0b10, 0b100, 0, NoItinerary,
6906 !strconcat("vrint", op), "f32",
6907 v2f32, v2f32, Int>, Requires<[HasV8, HasNEON]> {
6908 let Inst{9-7} = op9_7;
6910 def Qf : N2VQIntnp<0b10, 0b10, 0b100, 0, NoItinerary,
6911 !strconcat("vrint", op), "f32",
6912 v4f32, v4f32, Int>, Requires<[HasV8, HasNEON]> {
6913 let Inst{9-7} = op9_7;
6915 def Dh : N2VDIntnp<0b01, 0b10, 0b100, 0, NoItinerary,
6916 !strconcat("vrint", op), "f16",
6918 Requires<[HasV8, HasNEON, HasFullFP16]> {
6919 let Inst{9-7} = op9_7;
6921 def Qh : N2VQIntnp<0b01, 0b10, 0b100, 0, NoItinerary,
6922 !strconcat("vrint", op), "f16",
6924 Requires<[HasV8, HasNEON, HasFullFP16]> {
6925 let Inst{9-7} = op9_7;
6929 def : NEONInstAlias<!strconcat("vrint", op, ".f32.f32\t$Dd, $Dm"),
6930 (!cast<Instruction>(NAME#"Df") DPR:$Dd, DPR:$Dm)>;
6931 def : NEONInstAlias<!strconcat("vrint", op, ".f32.f32\t$Qd, $Qm"),
6932 (!cast<Instruction>(NAME#"Qf") QPR:$Qd, QPR:$Qm)>;
6933 let Predicates = [HasNEON, HasFullFP16] in {
6934 def : NEONInstAlias<!strconcat("vrint", op, ".f16.f16\t$Dd, $Dm"),
6935 (!cast<Instruction>(NAME#"Dh") DPR:$Dd, DPR:$Dm)>;
6936 def : NEONInstAlias<!strconcat("vrint", op, ".f16.f16\t$Qd, $Qm"),
6937 (!cast<Instruction>(NAME#"Qh") QPR:$Qd, QPR:$Qm)>;
6941 defm VRINTNN : VRINT_FPI<"n", 0b000, int_arm_neon_vrintn>;
6942 defm VRINTXN : VRINT_FPI<"x", 0b001, int_arm_neon_vrintx>;
6943 defm VRINTAN : VRINT_FPI<"a", 0b010, int_arm_neon_vrinta>;
6944 defm VRINTZN : VRINT_FPI<"z", 0b011, int_arm_neon_vrintz>;
6945 defm VRINTMN : VRINT_FPI<"m", 0b101, int_arm_neon_vrintm>;
6946 defm VRINTPN : VRINT_FPI<"p", 0b111, int_arm_neon_vrintp>;
6948 // Cryptography instructions
6949 let PostEncoderMethod = "NEONThumb2DataIPostEncoder",
6950 DecoderNamespace = "v8Crypto", hasSideEffects = 0 in {
6951 class AES<string op, bit op7, bit op6, SDPatternOperator Int>
6952 : N2VQIntXnp<0b00, 0b00, 0b011, op6, op7, NoItinerary,
6953 !strconcat("aes", op), "8", v16i8, v16i8, Int>,
6954 Requires<[HasV8, HasCrypto]>;
6955 class AES2Op<string op, bit op7, bit op6, SDPatternOperator Int>
6956 : N2VQIntX2np<0b00, 0b00, 0b011, op6, op7, NoItinerary,
6957 !strconcat("aes", op), "8", v16i8, v16i8, Int>,
6958 Requires<[HasV8, HasCrypto]>;
6959 class N2SHA<string op, bits<2> op17_16, bits<3> op10_8, bit op7, bit op6,
6960 SDPatternOperator Int>
6961 : N2VQIntXnp<0b10, op17_16, op10_8, op6, op7, NoItinerary,
6962 !strconcat("sha", op), "32", v4i32, v4i32, Int>,
6963 Requires<[HasV8, HasCrypto]>;
6964 class N2SHA2Op<string op, bits<2> op17_16, bits<3> op10_8, bit op7, bit op6,
6965 SDPatternOperator Int>
6966 : N2VQIntX2np<0b10, op17_16, op10_8, op6, op7, NoItinerary,
6967 !strconcat("sha", op), "32", v4i32, v4i32, Int>,
6968 Requires<[HasV8, HasCrypto]>;
6969 class N3SHA3Op<string op, bits<5> op27_23, bits<2> op21_20, SDPatternOperator Int>
6970 : N3VQInt3np<op27_23, op21_20, 0b1100, 1, 0, N3RegFrm, NoItinerary,
6971 !strconcat("sha", op), "32", v4i32, v4i32, Int, 0>,
6972 Requires<[HasV8, HasCrypto]>;
6975 def AESD : AES2Op<"d", 0, 1, int_arm_neon_aesd>;
6976 def AESE : AES2Op<"e", 0, 0, int_arm_neon_aese>;
6977 def AESIMC : AES<"imc", 1, 1, int_arm_neon_aesimc>;
6978 def AESMC : AES<"mc", 1, 0, int_arm_neon_aesmc>;
6980 def SHA1H : N2SHA<"1h", 0b01, 0b010, 1, 1, null_frag>;
6981 def SHA1SU1 : N2SHA2Op<"1su1", 0b10, 0b011, 1, 0, int_arm_neon_sha1su1>;
6982 def SHA256SU0 : N2SHA2Op<"256su0", 0b10, 0b011, 1, 1, int_arm_neon_sha256su0>;
6983 def SHA1C : N3SHA3Op<"1c", 0b00100, 0b00, null_frag>;
6984 def SHA1M : N3SHA3Op<"1m", 0b00100, 0b10, null_frag>;
6985 def SHA1P : N3SHA3Op<"1p", 0b00100, 0b01, null_frag>;
6986 def SHA1SU0 : N3SHA3Op<"1su0", 0b00100, 0b11, int_arm_neon_sha1su0>;
6987 def SHA256H : N3SHA3Op<"256h", 0b00110, 0b00, int_arm_neon_sha256h>;
6988 def SHA256H2 : N3SHA3Op<"256h2", 0b00110, 0b01, int_arm_neon_sha256h2>;
6989 def SHA256SU1 : N3SHA3Op<"256su1", 0b00110, 0b10, int_arm_neon_sha256su1>;
6991 def : Pat<(i32 (int_arm_neon_sha1h i32:$Rn)),
6992 (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG
6993 (SHA1H (SUBREG_TO_REG (i64 0),
6994 (f32 (COPY_TO_REGCLASS i32:$Rn, SPR)),
6998 def : Pat<(v4i32 (int_arm_neon_sha1c v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk)),
6999 (SHA1C v4i32:$hash_abcd,
7000 (SUBREG_TO_REG (i64 0),
7001 (f32 (COPY_TO_REGCLASS i32:$hash_e, SPR)),
7005 def : Pat<(v4i32 (int_arm_neon_sha1m v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk)),
7006 (SHA1M v4i32:$hash_abcd,
7007 (SUBREG_TO_REG (i64 0),
7008 (f32 (COPY_TO_REGCLASS i32:$hash_e, SPR)),
7012 def : Pat<(v4i32 (int_arm_neon_sha1p v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk)),
7013 (SHA1P v4i32:$hash_abcd,
7014 (SUBREG_TO_REG (i64 0),
7015 (f32 (COPY_TO_REGCLASS i32:$hash_e, SPR)),
7019 //===----------------------------------------------------------------------===//
7020 // NEON instructions for single-precision FP math
7021 //===----------------------------------------------------------------------===//
7023 class N2VSPat<SDNode OpNode, NeonI Inst>
7024 : NEONFPPat<(f32 (OpNode SPR:$a)),
7026 (v2f32 (COPY_TO_REGCLASS (Inst
7028 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
7029 SPR:$a, ssub_0)), DPR_VFP2)), ssub_0)>;
7031 class N3VSPat<SDNode OpNode, NeonI Inst>
7032 : NEONFPPat<(f32 (OpNode SPR:$a, SPR:$b)),
7034 (v2f32 (COPY_TO_REGCLASS (Inst
7036 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
7039 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
7040 SPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>;
7042 class N3VSPatFP16<SDNode OpNode, NeonI Inst>
7043 : NEONFPPat<(f16 (OpNode HPR:$a, HPR:$b)),
7045 (v4f16 (COPY_TO_REGCLASS (Inst
7047 (v4f16 (COPY_TO_REGCLASS (v4f16 (IMPLICIT_DEF)), DPR_VFP2)),
7050 (v4f16 (COPY_TO_REGCLASS (v4f16 (IMPLICIT_DEF)), DPR_VFP2)),
7051 HPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>;
7053 class N3VSMulOpPat<SDNode MulNode, SDNode OpNode, NeonI Inst>
7054 : NEONFPPat<(f32 (OpNode SPR:$acc, (f32 (MulNode SPR:$a, SPR:$b)))),
7056 (v2f32 (COPY_TO_REGCLASS (Inst
7058 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
7061 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
7064 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
7065 SPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>;
7067 class NVCVTIFPat<SDNode OpNode, NeonI Inst>
7068 : NEONFPPat<(f32 (OpNode GPR:$a)),
7069 (f32 (EXTRACT_SUBREG
7072 (v2f32 (IMPLICIT_DEF)),
7073 (i32 (COPY_TO_REGCLASS GPR:$a, SPR)), ssub_0))),
7075 class NVCVTFIPat<SDNode OpNode, NeonI Inst>
7076 : NEONFPPat<(i32 (OpNode SPR:$a)),
7077 (i32 (EXTRACT_SUBREG
7078 (v2f32 (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
7082 def : N3VSPat<fadd, VADDfd>;
7083 def : N3VSPat<fsub, VSUBfd>;
7084 def : N3VSPat<fmul, VMULfd>;
7085 def : N3VSMulOpPat<fmul, fadd, VMLAfd>,
7086 Requires<[HasNEON, UseNEONForFP, UseFPVMLx]>;
7087 def : N3VSMulOpPat<fmul, fsub, VMLSfd>,
7088 Requires<[HasNEON, UseNEONForFP, UseFPVMLx]>;
7089 def : N3VSMulOpPat<fmul, fadd, VFMAfd>,
7090 Requires<[HasVFP4, UseNEONForFP, UseFusedMAC]>;
7091 def : N3VSMulOpPat<fmul, fsub, VFMSfd>,
7092 Requires<[HasVFP4, UseNEONForFP, UseFusedMAC]>;
7093 def : N2VSPat<fabs, VABSfd>;
7094 def : N2VSPat<fneg, VNEGfd>;
7095 def : N3VSPatFP16<fmaximum, VMAXhd>, Requires<[HasFullFP16]>;
7096 def : N3VSPatFP16<fminimum, VMINhd>, Requires<[HasFullFP16]>;
7097 def : N3VSPat<fmaximum, VMAXfd>, Requires<[HasNEON]>;
7098 def : N3VSPat<fminimum, VMINfd>, Requires<[HasNEON]>;
7099 def : NVCVTFIPat<fp_to_sint, VCVTf2sd>;
7100 def : NVCVTFIPat<fp_to_uint, VCVTf2ud>;
7101 def : NVCVTIFPat<sint_to_fp, VCVTs2fd>;
7102 def : NVCVTIFPat<uint_to_fp, VCVTu2fd>;
7104 // NEON doesn't have any f64 conversions, so provide patterns to make
7105 // sure the VFP conversions match when extracting from a vector.
7106 def : VFPPat<(f64 (sint_to_fp (extractelt (v2i32 DPR:$src), imm:$lane))),
7107 (VSITOD (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>;
7108 def : VFPPat<(f64 (sint_to_fp (extractelt (v4i32 QPR:$src), imm:$lane))),
7109 (VSITOD (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane)))>;
7110 def : VFPPat<(f64 (uint_to_fp (extractelt (v2i32 DPR:$src), imm:$lane))),
7111 (VUITOD (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>;
7112 def : VFPPat<(f64 (uint_to_fp (extractelt (v4i32 QPR:$src), imm:$lane))),
7113 (VUITOD (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane)))>;
7116 // Prefer VMOVDRR for i32 -> f32 bitcasts, it can write all DPR registers.
7117 def : Pat<(f32 (bitconvert GPR:$a)),
7118 (EXTRACT_SUBREG (VMOVDRR GPR:$a, GPR:$a), ssub_0)>,
7119 Requires<[HasNEON, DontUseVMOVSR]>;
7120 def : Pat<(arm_vmovsr GPR:$a),
7121 (EXTRACT_SUBREG (VMOVDRR GPR:$a, GPR:$a), ssub_0)>,
7122 Requires<[HasNEON, DontUseVMOVSR]>;
7124 //===----------------------------------------------------------------------===//
7125 // Non-Instruction Patterns
7126 //===----------------------------------------------------------------------===//
7129 let Predicates = [IsLE] in {
7130 def : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (v1i64 DPR:$src)>;
7131 def : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (v1i64 DPR:$src)>;
7132 def : Pat<(v1i64 (bitconvert (v8i8 DPR:$src))), (v1i64 DPR:$src)>;
7134 def : Pat<(v1i64 (bitconvert (f64 DPR:$src))), (v1i64 DPR:$src)>;
7135 let Predicates = [IsLE] in {
7136 def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (v1i64 DPR:$src)>;
7137 def : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (v2i32 DPR:$src)>;
7138 def : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (v2i32 DPR:$src)>;
7139 def : Pat<(v2i32 (bitconvert (v8i8 DPR:$src))), (v2i32 DPR:$src)>;
7140 def : Pat<(v2i32 (bitconvert (f64 DPR:$src))), (v2i32 DPR:$src)>;
7142 def : Pat<(v2i32 (bitconvert (v2f32 DPR:$src))), (v2i32 DPR:$src)>;
7143 let Predicates = [IsLE] in {
7144 def : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (v4i16 DPR:$src)>;
7145 def : Pat<(v4i16 (bitconvert (v2i32 DPR:$src))), (v4i16 DPR:$src)>;
7146 def : Pat<(v4i16 (bitconvert (v8i8 DPR:$src))), (v4i16 DPR:$src)>;
7147 def : Pat<(v4i16 (bitconvert (f64 DPR:$src))), (v4i16 DPR:$src)>;
7148 def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (v4i16 DPR:$src)>;
7149 def : Pat<(v8i8 (bitconvert (v1i64 DPR:$src))), (v8i8 DPR:$src)>;
7150 def : Pat<(v8i8 (bitconvert (v2i32 DPR:$src))), (v8i8 DPR:$src)>;
7151 def : Pat<(v8i8 (bitconvert (v4i16 DPR:$src))), (v8i8 DPR:$src)>;
7152 def : Pat<(v8i8 (bitconvert (f64 DPR:$src))), (v8i8 DPR:$src)>;
7153 def : Pat<(v8i8 (bitconvert (v2f32 DPR:$src))), (v8i8 DPR:$src)>;
7155 def : Pat<(f64 (bitconvert (v1i64 DPR:$src))), (f64 DPR:$src)>;
7156 let Predicates = [IsLE] in {
7157 def : Pat<(f64 (bitconvert (v2i32 DPR:$src))), (f64 DPR:$src)>;
7158 def : Pat<(f64 (bitconvert (v4i16 DPR:$src))), (f64 DPR:$src)>;
7159 def : Pat<(f64 (bitconvert (v4f16 DPR:$src))), (f64 DPR:$src)>;
7160 def : Pat<(f64 (bitconvert (v8i8 DPR:$src))), (f64 DPR:$src)>;
7161 def : Pat<(f64 (bitconvert (v2f32 DPR:$src))), (f64 DPR:$src)>;
7162 def : Pat<(v2f32 (bitconvert (f64 DPR:$src))), (v2f32 DPR:$src)>;
7163 def : Pat<(v4f16 (bitconvert (f64 DPR:$src))), (v4f16 DPR:$src)>;
7164 def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (v2f32 DPR:$src)>;
7166 def : Pat<(v2f32 (bitconvert (v2i32 DPR:$src))), (v2f32 DPR:$src)>;
7167 let Predicates = [IsLE] in {
7168 def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (v2f32 DPR:$src)>;
7169 def : Pat<(v2f32 (bitconvert (v8i8 DPR:$src))), (v2f32 DPR:$src)>;
7172 let Predicates = [IsLE] in {
7173 def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (v2i64 QPR:$src)>;
7174 def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (v2i64 QPR:$src)>;
7175 def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (v2i64 QPR:$src)>;
7177 def : Pat<(v2i64 (bitconvert (v2f64 QPR:$src))), (v2i64 QPR:$src)>;
7178 let Predicates = [IsLE] in {
7179 def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (v2i64 QPR:$src)>;
7180 def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (v4i32 QPR:$src)>;
7181 def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (v4i32 QPR:$src)>;
7182 def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (v4i32 QPR:$src)>;
7183 def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (v4i32 QPR:$src)>;
7185 def : Pat<(v4i32 (bitconvert (v4f32 QPR:$src))), (v4i32 QPR:$src)>;
7186 let Predicates = [IsLE] in {
7187 def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (v8i16 QPR:$src)>;
7188 def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (v8i16 QPR:$src)>;
7189 def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (v8i16 QPR:$src)>;
7190 def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (v8i16 QPR:$src)>;
7191 def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (v8i16 QPR:$src)>;
7192 def : Pat<(v8f16 (bitconvert (v2f64 QPR:$src))), (v8f16 QPR:$src)>;
7193 def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (v16i8 QPR:$src)>;
7194 def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (v16i8 QPR:$src)>;
7195 def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (v16i8 QPR:$src)>;
7196 def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (v16i8 QPR:$src)>;
7197 def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (v16i8 QPR:$src)>;
7198 def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (v4f32 QPR:$src)>;
7200 def : Pat<(v4f32 (bitconvert (v4i32 QPR:$src))), (v4f32 QPR:$src)>;
7201 let Predicates = [IsLE] in {
7202 def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (v4f32 QPR:$src)>;
7203 def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (v4f32 QPR:$src)>;
7204 def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (v4f32 QPR:$src)>;
7206 def : Pat<(v2f64 (bitconvert (v2i64 QPR:$src))), (v2f64 QPR:$src)>;
7207 let Predicates = [IsLE] in {
7208 def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (v2f64 QPR:$src)>;
7209 def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (v2f64 QPR:$src)>;
7210 def : Pat<(v2f64 (bitconvert (v8f16 QPR:$src))), (v2f64 QPR:$src)>;
7211 def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (v2f64 QPR:$src)>;
7212 def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>;
7215 let Predicates = [IsBE] in {
7216 // 64 bit conversions
7217 def : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (VREV64d32 DPR:$src)>;
7218 def : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (VREV64d16 DPR:$src)>;
7219 def : Pat<(v1i64 (bitconvert (v8i8 DPR:$src))), (VREV64d8 DPR:$src)>;
7220 def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (VREV64d32 DPR:$src)>;
7221 def : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (VREV64d32 DPR:$src)>;
7222 def : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (VREV32d16 DPR:$src)>;
7223 def : Pat<(v2i32 (bitconvert (v8i8 DPR:$src))), (VREV32d8 DPR:$src)>;
7224 def : Pat<(v2i32 (bitconvert (f64 DPR:$src))), (VREV64d32 DPR:$src)>;
7225 def : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (VREV64d16 DPR:$src)>;
7226 def : Pat<(v4i16 (bitconvert (v2i32 DPR:$src))), (VREV32d16 DPR:$src)>;
7227 def : Pat<(v4i16 (bitconvert (v8i8 DPR:$src))), (VREV16d8 DPR:$src)>;
7228 def : Pat<(v4i16 (bitconvert (f64 DPR:$src))), (VREV64d16 DPR:$src)>;
7229 def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (VREV32d16 DPR:$src)>;
7230 def : Pat<(v8i8 (bitconvert (v1i64 DPR:$src))), (VREV64d8 DPR:$src)>;
7231 def : Pat<(v8i8 (bitconvert (v2i32 DPR:$src))), (VREV32d8 DPR:$src)>;
7232 def : Pat<(v8i8 (bitconvert (v4i16 DPR:$src))), (VREV16d8 DPR:$src)>;
7233 def : Pat<(v8i8 (bitconvert (f64 DPR:$src))), (VREV64d8 DPR:$src)>;
7234 def : Pat<(v8i8 (bitconvert (v2f32 DPR:$src))), (VREV32d8 DPR:$src)>;
7235 def : Pat<(f64 (bitconvert (v2i32 DPR:$src))), (VREV64d32 DPR:$src)>;
7236 def : Pat<(f64 (bitconvert (v4f16 DPR:$src))), (VREV64d16 DPR:$src)>;
7237 def : Pat<(f64 (bitconvert (v4i16 DPR:$src))), (VREV64d16 DPR:$src)>;
7238 def : Pat<(f64 (bitconvert (v8i8 DPR:$src))), (VREV64d8 DPR:$src)>;
7239 def : Pat<(f64 (bitconvert (v2f32 DPR:$src))), (VREV64d32 DPR:$src)>;
7240 def : Pat<(v2f32 (bitconvert (f64 DPR:$src))), (VREV64d32 DPR:$src)>;
7241 def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (VREV64d32 DPR:$src)>;
7242 def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (VREV32d16 DPR:$src)>;
7243 def : Pat<(v2f32 (bitconvert (v8i8 DPR:$src))), (VREV32d8 DPR:$src)>;
7245 // 128 bit conversions
7246 def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (VREV64q32 QPR:$src)>;
7247 def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (VREV64q16 QPR:$src)>;
7248 def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (VREV64q8 QPR:$src)>;
7249 def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (VREV64q32 QPR:$src)>;
7250 def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (VREV64q32 QPR:$src)>;
7251 def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (VREV32q16 QPR:$src)>;
7252 def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (VREV32q8 QPR:$src)>;
7253 def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (VREV64q32 QPR:$src)>;
7254 def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (VREV64q16 QPR:$src)>;
7255 def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (VREV32q16 QPR:$src)>;
7256 def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (VREV16q8 QPR:$src)>;
7257 def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (VREV64q16 QPR:$src)>;
7258 def : Pat<(v8f16 (bitconvert (v2f64 QPR:$src))), (VREV64q16 QPR:$src)>;
7259 def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (VREV32q16 QPR:$src)>;
7260 def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (VREV64q8 QPR:$src)>;
7261 def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (VREV32q8 QPR:$src)>;
7262 def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (VREV16q8 QPR:$src)>;
7263 def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (VREV64q8 QPR:$src)>;
7264 def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (VREV32q8 QPR:$src)>;
7265 def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (VREV64q32 QPR:$src)>;
7266 def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (VREV32q16 QPR:$src)>;
7267 def : Pat<(v4f32 (bitconvert (v8f16 QPR:$src))), (VREV32q16 QPR:$src)>;
7268 def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (VREV32q8 QPR:$src)>;
7269 def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (VREV64q32 QPR:$src)>;
7270 def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (VREV64q32 QPR:$src)>;
7271 def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (VREV64q16 QPR:$src)>;
7272 def : Pat<(v2f64 (bitconvert (v8f16 QPR:$src))), (VREV64q16 QPR:$src)>;
7273 def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (VREV64q8 QPR:$src)>;
7274 def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (VREV64q32 QPR:$src)>;
7277 // Use VLD1/VST1 + VREV for non-word-aligned v2f64 load/store on Big Endian
7278 def : Pat<(v2f64 (byte_alignedload addrmode6:$addr)),
7279 (VREV64q8 (VLD1q8 addrmode6:$addr))>, Requires<[IsBE]>;
7280 def : Pat<(byte_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
7281 (VST1q8 addrmode6:$addr, (VREV64q8 QPR:$value))>, Requires<[IsBE]>;
7282 def : Pat<(v2f64 (hword_alignedload addrmode6:$addr)),
7283 (VREV64q16 (VLD1q16 addrmode6:$addr))>, Requires<[IsBE]>;
7284 def : Pat<(hword_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
7285 (VST1q16 addrmode6:$addr, (VREV64q16 QPR:$value))>, Requires<[IsBE]>;
7287 // Fold extracting an element out of a v2i32 into a vfp register.
7288 def : Pat<(f32 (bitconvert (i32 (extractelt (v2i32 DPR:$src), imm:$lane)))),
7289 (f32 (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>;
7291 // Vector lengthening move with load, matching extending loads.
7293 // extload, zextload and sextload for a standard lengthening load. Example:
7294 // Lengthen_Single<"8", "i16", "8"> =
7295 // Pat<(v8i16 (extloadvi8 addrmode6:$addr))
7296 // (VMOVLuv8i16 (VLD1d8 addrmode6:$addr,
7297 // (f64 (IMPLICIT_DEF)), (i32 0)))>;
7298 multiclass Lengthen_Single<string DestLanes, string DestTy, string SrcTy> {
7299 let AddedComplexity = 10 in {
7300 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7301 (!cast<PatFrag>("extloadvi" # SrcTy) addrmode6:$addr)),
7302 (!cast<Instruction>("VMOVLuv" # DestLanes # DestTy)
7303 (!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>;
7305 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7306 (!cast<PatFrag>("zextloadvi" # SrcTy) addrmode6:$addr)),
7307 (!cast<Instruction>("VMOVLuv" # DestLanes # DestTy)
7308 (!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>;
7310 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7311 (!cast<PatFrag>("sextloadvi" # SrcTy) addrmode6:$addr)),
7312 (!cast<Instruction>("VMOVLsv" # DestLanes # DestTy)
7313 (!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>;
7317 // extload, zextload and sextload for a lengthening load which only uses
7318 // half the lanes available. Example:
7319 // Lengthen_HalfSingle<"4", "i16", "8", "i16", "i8"> =
7320 // Pat<(v4i16 (extloadvi8 addrmode6oneL32:$addr)),
7321 // (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd32 addrmode6oneL32:$addr,
7322 // (f64 (IMPLICIT_DEF)), (i32 0))),
7324 multiclass Lengthen_HalfSingle<string DestLanes, string DestTy, string SrcTy,
7325 string InsnLanes, string InsnTy> {
7326 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7327 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)),
7328 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy)
7329 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
7331 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7332 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)),
7333 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy)
7334 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
7336 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7337 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)),
7338 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # InsnLanes # InsnTy)
7339 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
7343 // The following class definition is basically a copy of the
7344 // Lengthen_HalfSingle definition above, however with an additional parameter
7345 // "RevLanes" to select the correct VREV32dXX instruction. This is to convert
7346 // data loaded by VLD1LN into proper vector format in big endian mode.
7347 multiclass Lengthen_HalfSingle_Big_Endian<string DestLanes, string DestTy, string SrcTy,
7348 string InsnLanes, string InsnTy, string RevLanes> {
7349 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7350 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)),
7351 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy)
7352 (!cast<Instruction>("VREV32d" # RevLanes)
7353 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
7355 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7356 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)),
7357 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy)
7358 (!cast<Instruction>("VREV32d" # RevLanes)
7359 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
7361 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7362 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)),
7363 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # InsnLanes # InsnTy)
7364 (!cast<Instruction>("VREV32d" # RevLanes)
7365 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
7369 // extload, zextload and sextload for a lengthening load followed by another
7370 // lengthening load, to quadruple the initial length.
7372 // Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32"> =
7373 // Pat<(v4i32 (extloadvi8 addrmode6oneL32:$addr))
7374 // (EXTRACT_SUBREG (VMOVLuv4i32
7375 // (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd32 addrmode6oneL32:$addr,
7376 // (f64 (IMPLICIT_DEF)),
7380 multiclass Lengthen_Double<string DestLanes, string DestTy, string SrcTy,
7381 string Insn1Lanes, string Insn1Ty, string Insn2Lanes,
7383 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7384 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)),
7385 (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
7386 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
7387 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
7389 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7390 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)),
7391 (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
7392 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
7393 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
7395 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7396 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)),
7397 (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty)
7398 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty)
7399 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
7403 // The following class definition is basically a copy of the
7404 // Lengthen_Double definition above, however with an additional parameter
7405 // "RevLanes" to select the correct VREV32dXX instruction. This is to convert
7406 // data loaded by VLD1LN into proper vector format in big endian mode.
7407 multiclass Lengthen_Double_Big_Endian<string DestLanes, string DestTy, string SrcTy,
7408 string Insn1Lanes, string Insn1Ty, string Insn2Lanes,
7409 string Insn2Ty, string RevLanes> {
7410 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7411 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)),
7412 (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
7413 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
7414 (!cast<Instruction>("VREV32d" # RevLanes)
7415 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
7417 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7418 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)),
7419 (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
7420 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
7421 (!cast<Instruction>("VREV32d" # RevLanes)
7422 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
7424 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7425 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)),
7426 (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty)
7427 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty)
7428 (!cast<Instruction>("VREV32d" # RevLanes)
7429 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
7433 // extload, zextload and sextload for a lengthening load followed by another
7434 // lengthening load, to quadruple the initial length, but which ends up only
7435 // requiring half the available lanes (a 64-bit outcome instead of a 128-bit).
7437 // Lengthen_HalfDouble<"2", "i32", "i8", "8", "i16", "4", "i32"> =
7438 // Pat<(v2i32 (extloadvi8 addrmode6:$addr))
7439 // (EXTRACT_SUBREG (VMOVLuv4i32
7440 // (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd16 addrmode6:$addr,
7441 // (f64 (IMPLICIT_DEF)), (i32 0))),
7444 multiclass Lengthen_HalfDouble<string DestLanes, string DestTy, string SrcTy,
7445 string Insn1Lanes, string Insn1Ty, string Insn2Lanes,
7447 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7448 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6:$addr)),
7449 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
7450 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
7451 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
7454 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7455 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6:$addr)),
7456 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
7457 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
7458 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
7461 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7462 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6:$addr)),
7463 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty)
7464 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty)
7465 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
7470 // The following class definition is basically a copy of the
7471 // Lengthen_HalfDouble definition above, however with an additional VREV16d8
7472 // instruction to convert data loaded by VLD1LN into proper vector format
7473 // in big endian mode.
7474 multiclass Lengthen_HalfDouble_Big_Endian<string DestLanes, string DestTy, string SrcTy,
7475 string Insn1Lanes, string Insn1Ty, string Insn2Lanes,
7477 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7478 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6:$addr)),
7479 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
7480 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
7481 (!cast<Instruction>("VREV16d8")
7482 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
7485 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7486 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6:$addr)),
7487 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
7488 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
7489 (!cast<Instruction>("VREV16d8")
7490 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
7493 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7494 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6:$addr)),
7495 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty)
7496 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty)
7497 (!cast<Instruction>("VREV16d8")
7498 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
7503 defm : Lengthen_Single<"8", "i16", "8">; // v8i8 -> v8i16
7504 defm : Lengthen_Single<"4", "i32", "16">; // v4i16 -> v4i32
7505 defm : Lengthen_Single<"2", "i64", "32">; // v2i32 -> v2i64
7507 let Predicates = [IsLE] in {
7508 defm : Lengthen_HalfSingle<"4", "i16", "i8", "8", "i16">; // v4i8 -> v4i16
7509 defm : Lengthen_HalfSingle<"2", "i32", "i16", "4", "i32">; // v2i16 -> v2i32
7511 // Double lengthening - v4i8 -> v4i16 -> v4i32
7512 defm : Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32">;
7513 // v2i8 -> v2i16 -> v2i32
7514 defm : Lengthen_HalfDouble<"2", "i32", "i8", "8", "i16", "4", "i32">;
7515 // v2i16 -> v2i32 -> v2i64
7516 defm : Lengthen_Double<"2", "i64", "i16", "4", "i32", "2", "i64">;
7519 let Predicates = [IsBE] in {
7520 defm : Lengthen_HalfSingle_Big_Endian<"4", "i16", "i8", "8", "i16", "8">; // v4i8 -> v4i16
7521 defm : Lengthen_HalfSingle_Big_Endian<"2", "i32", "i16", "4", "i32", "16">; // v2i16 -> v2i32
7523 // Double lengthening - v4i8 -> v4i16 -> v4i32
7524 defm : Lengthen_Double_Big_Endian<"4", "i32", "i8", "8", "i16", "4", "i32", "8">;
7525 // v2i8 -> v2i16 -> v2i32
7526 defm : Lengthen_HalfDouble_Big_Endian<"2", "i32", "i8", "8", "i16", "4", "i32">;
7527 // v2i16 -> v2i32 -> v2i64
7528 defm : Lengthen_Double_Big_Endian<"2", "i64", "i16", "4", "i32", "2", "i64", "16">;
7531 // Triple lengthening - v2i8 -> v2i16 -> v2i32 -> v2i64
7532 let Predicates = [IsLE] in {
7533 def : Pat<(v2i64 (extloadvi8 addrmode6:$addr)),
7534 (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16
7535 (VLD1LNd16 addrmode6:$addr,
7536 (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>;
7537 def : Pat<(v2i64 (zextloadvi8 addrmode6:$addr)),
7538 (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16
7539 (VLD1LNd16 addrmode6:$addr,
7540 (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>;
7541 def : Pat<(v2i64 (sextloadvi8 addrmode6:$addr)),
7542 (VMOVLsv2i64 (EXTRACT_SUBREG (VMOVLsv4i32 (EXTRACT_SUBREG (VMOVLsv8i16
7543 (VLD1LNd16 addrmode6:$addr,
7544 (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>;
7546 // The following patterns are basically a copy of the patterns above,
7547 // however with an additional VREV16d instruction to convert data
7548 // loaded by VLD1LN into proper vector format in big endian mode.
7549 let Predicates = [IsBE] in {
7550 def : Pat<(v2i64 (extloadvi8 addrmode6:$addr)),
7551 (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16
7552 (!cast<Instruction>("VREV16d8")
7553 (VLD1LNd16 addrmode6:$addr,
7554 (f64 (IMPLICIT_DEF)), (i32 0)))), dsub_0)), dsub_0))>;
7555 def : Pat<(v2i64 (zextloadvi8 addrmode6:$addr)),
7556 (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16
7557 (!cast<Instruction>("VREV16d8")
7558 (VLD1LNd16 addrmode6:$addr,
7559 (f64 (IMPLICIT_DEF)), (i32 0)))), dsub_0)), dsub_0))>;
7560 def : Pat<(v2i64 (sextloadvi8 addrmode6:$addr)),
7561 (VMOVLsv2i64 (EXTRACT_SUBREG (VMOVLsv4i32 (EXTRACT_SUBREG (VMOVLsv8i16
7562 (!cast<Instruction>("VREV16d8")
7563 (VLD1LNd16 addrmode6:$addr,
7564 (f64 (IMPLICIT_DEF)), (i32 0)))), dsub_0)), dsub_0))>;
7567 def : Pat<(v2i64 (concat_vectors DPR:$Dn, DPR:$Dm)),
7568 (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>;
7569 def : Pat<(v4i32 (concat_vectors DPR:$Dn, DPR:$Dm)),
7570 (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>;
7571 def : Pat<(v8i16 (concat_vectors DPR:$Dn, DPR:$Dm)),
7572 (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>;
7573 def : Pat<(v16i8 (concat_vectors DPR:$Dn, DPR:$Dm)),
7574 (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>;
7575 def : Pat<(v4f32 (concat_vectors DPR:$Dn, DPR:$Dm)),
7576 (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>;
7578 //===----------------------------------------------------------------------===//
7579 // Assembler aliases
7582 def : VFP2InstAlias<"fmdhr${p} $Dd, $Rn",
7583 (VSETLNi32 DPR:$Dd, GPR:$Rn, 1, pred:$p)>;
7584 def : VFP2InstAlias<"fmdlr${p} $Dd, $Rn",
7585 (VSETLNi32 DPR:$Dd, GPR:$Rn, 0, pred:$p)>;
7587 // VAND/VBIC/VEOR/VORR accept but do not require a type suffix.
7588 defm : NEONDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm",
7589 (VANDd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
7590 defm : NEONDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm",
7591 (VANDq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
7592 defm : NEONDTAnyInstAlias<"vbic${p}", "$Vd, $Vn, $Vm",
7593 (VBICd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
7594 defm : NEONDTAnyInstAlias<"vbic${p}", "$Vd, $Vn, $Vm",
7595 (VBICq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
7596 defm : NEONDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm",
7597 (VEORd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
7598 defm : NEONDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm",
7599 (VEORq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
7600 defm : NEONDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm",
7601 (VORRd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
7602 defm : NEONDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm",
7603 (VORRq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
7604 // ... two-operand aliases
7605 defm : NEONDTAnyInstAlias<"vand${p}", "$Vdn, $Vm",
7606 (VANDd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
7607 defm : NEONDTAnyInstAlias<"vand${p}", "$Vdn, $Vm",
7608 (VANDq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
7609 defm : NEONDTAnyInstAlias<"veor${p}", "$Vdn, $Vm",
7610 (VEORd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
7611 defm : NEONDTAnyInstAlias<"veor${p}", "$Vdn, $Vm",
7612 (VEORq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
7613 defm : NEONDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm",
7614 (VORRd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
7615 defm : NEONDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm",
7616 (VORRq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
7618 def : NEONInstAlias<"vand${p}.i16 $Vd, $imm",
7619 (VBICiv4i16 DPR:$Vd, nImmSplatNotI16:$imm, pred:$p)>;
7620 def : NEONInstAlias<"vand${p}.i32 $Vd, $imm",
7621 (VBICiv2i32 DPR:$Vd, nImmSplatNotI32:$imm, pred:$p)>;
7622 def : NEONInstAlias<"vand${p}.i16 $Vd, $imm",
7623 (VBICiv8i16 QPR:$Vd, nImmSplatNotI16:$imm, pred:$p)>;
7624 def : NEONInstAlias<"vand${p}.i32 $Vd, $imm",
7625 (VBICiv4i32 QPR:$Vd, nImmSplatNotI32:$imm, pred:$p)>;
7628 // VLD1 single-lane pseudo-instructions. These need special handling for
7629 // the lane index that an InstAlias can't handle, so we use these instead.
7630 def VLD1LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr",
7631 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr,
7633 def VLD1LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr",
7634 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr,
7636 def VLD1LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr",
7637 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr,
7640 def VLD1LNdWB_fixed_Asm_8 :
7641 NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr!",
7642 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr,
7644 def VLD1LNdWB_fixed_Asm_16 :
7645 NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr!",
7646 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr,
7648 def VLD1LNdWB_fixed_Asm_32 :
7649 NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr!",
7650 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr,
7652 def VLD1LNdWB_register_Asm_8 :
7653 NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr, $Rm",
7654 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr,
7655 rGPR:$Rm, pred:$p)>;
7656 def VLD1LNdWB_register_Asm_16 :
7657 NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr, $Rm",
7658 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr,
7659 rGPR:$Rm, pred:$p)>;
7660 def VLD1LNdWB_register_Asm_32 :
7661 NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr, $Rm",
7662 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr,
7663 rGPR:$Rm, pred:$p)>;
7666 // VST1 single-lane pseudo-instructions. These need special handling for
7667 // the lane index that an InstAlias can't handle, so we use these instead.
7668 def VST1LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr",
7669 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr,
7671 def VST1LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr",
7672 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr,
7674 def VST1LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr",
7675 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr,
7678 def VST1LNdWB_fixed_Asm_8 :
7679 NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr!",
7680 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr,
7682 def VST1LNdWB_fixed_Asm_16 :
7683 NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr!",
7684 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr,
7686 def VST1LNdWB_fixed_Asm_32 :
7687 NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr!",
7688 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr,
7690 def VST1LNdWB_register_Asm_8 :
7691 NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr, $Rm",
7692 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr,
7693 rGPR:$Rm, pred:$p)>;
7694 def VST1LNdWB_register_Asm_16 :
7695 NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr, $Rm",
7696 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr,
7697 rGPR:$Rm, pred:$p)>;
7698 def VST1LNdWB_register_Asm_32 :
7699 NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr, $Rm",
7700 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr,
7701 rGPR:$Rm, pred:$p)>;
7703 // VLD2 single-lane pseudo-instructions. These need special handling for
7704 // the lane index that an InstAlias can't handle, so we use these instead.
7705 def VLD2LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr",
7706 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr,
7708 def VLD2LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr",
7709 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr,
7711 def VLD2LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr",
7712 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, pred:$p)>;
7713 def VLD2LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr",
7714 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr,
7716 def VLD2LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr",
7717 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr,
7720 def VLD2LNdWB_fixed_Asm_8 :
7721 NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr!",
7722 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr,
7724 def VLD2LNdWB_fixed_Asm_16 :
7725 NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr!",
7726 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr,
7728 def VLD2LNdWB_fixed_Asm_32 :
7729 NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr!",
7730 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr,
7732 def VLD2LNqWB_fixed_Asm_16 :
7733 NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr!",
7734 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr,
7736 def VLD2LNqWB_fixed_Asm_32 :
7737 NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr!",
7738 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr,
7740 def VLD2LNdWB_register_Asm_8 :
7741 NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr, $Rm",
7742 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr,
7743 rGPR:$Rm, pred:$p)>;
7744 def VLD2LNdWB_register_Asm_16 :
7745 NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr, $Rm",
7746 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr,
7747 rGPR:$Rm, pred:$p)>;
7748 def VLD2LNdWB_register_Asm_32 :
7749 NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr, $Rm",
7750 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr,
7751 rGPR:$Rm, pred:$p)>;
7752 def VLD2LNqWB_register_Asm_16 :
7753 NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr, $Rm",
7754 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr,
7755 rGPR:$Rm, pred:$p)>;
7756 def VLD2LNqWB_register_Asm_32 :
7757 NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr, $Rm",
7758 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr,
7759 rGPR:$Rm, pred:$p)>;
7762 // VST2 single-lane pseudo-instructions. These need special handling for
7763 // the lane index that an InstAlias can't handle, so we use these instead.
7764 def VST2LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr",
7765 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr,
7767 def VST2LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr",
7768 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr,
7770 def VST2LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr",
7771 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr,
7773 def VST2LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr",
7774 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr,
7776 def VST2LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr",
7777 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr,
7780 def VST2LNdWB_fixed_Asm_8 :
7781 NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr!",
7782 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr,
7784 def VST2LNdWB_fixed_Asm_16 :
7785 NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr!",
7786 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr,
7788 def VST2LNdWB_fixed_Asm_32 :
7789 NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr!",
7790 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr,
7792 def VST2LNqWB_fixed_Asm_16 :
7793 NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr!",
7794 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr,
7796 def VST2LNqWB_fixed_Asm_32 :
7797 NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr!",
7798 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr,
7800 def VST2LNdWB_register_Asm_8 :
7801 NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr, $Rm",
7802 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr,
7803 rGPR:$Rm, pred:$p)>;
7804 def VST2LNdWB_register_Asm_16 :
7805 NEONDataTypeAsmPseudoInst<"vst2${p}", ".16","$list, $addr, $Rm",
7806 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr,
7807 rGPR:$Rm, pred:$p)>;
7808 def VST2LNdWB_register_Asm_32 :
7809 NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr, $Rm",
7810 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr,
7811 rGPR:$Rm, pred:$p)>;
7812 def VST2LNqWB_register_Asm_16 :
7813 NEONDataTypeAsmPseudoInst<"vst2${p}", ".16","$list, $addr, $Rm",
7814 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr,
7815 rGPR:$Rm, pred:$p)>;
7816 def VST2LNqWB_register_Asm_32 :
7817 NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr, $Rm",
7818 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr,
7819 rGPR:$Rm, pred:$p)>;
7821 // VLD3 all-lanes pseudo-instructions. These need special handling for
7822 // the lane index that an InstAlias can't handle, so we use these instead.
7823 def VLD3DUPdAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
7824 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
7826 def VLD3DUPdAsm_16: NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
7827 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
7829 def VLD3DUPdAsm_32: NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
7830 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
7832 def VLD3DUPqAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
7833 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
7835 def VLD3DUPqAsm_16: NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
7836 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
7838 def VLD3DUPqAsm_32: NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
7839 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
7842 def VLD3DUPdWB_fixed_Asm_8 :
7843 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!",
7844 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
7846 def VLD3DUPdWB_fixed_Asm_16 :
7847 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
7848 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
7850 def VLD3DUPdWB_fixed_Asm_32 :
7851 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
7852 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
7854 def VLD3DUPqWB_fixed_Asm_8 :
7855 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!",
7856 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
7858 def VLD3DUPqWB_fixed_Asm_16 :
7859 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
7860 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
7862 def VLD3DUPqWB_fixed_Asm_32 :
7863 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
7864 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
7866 def VLD3DUPdWB_register_Asm_8 :
7867 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm",
7868 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
7869 rGPR:$Rm, pred:$p)>;
7870 def VLD3DUPdWB_register_Asm_16 :
7871 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
7872 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
7873 rGPR:$Rm, pred:$p)>;
7874 def VLD3DUPdWB_register_Asm_32 :
7875 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
7876 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
7877 rGPR:$Rm, pred:$p)>;
7878 def VLD3DUPqWB_register_Asm_8 :
7879 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm",
7880 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
7881 rGPR:$Rm, pred:$p)>;
7882 def VLD3DUPqWB_register_Asm_16 :
7883 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
7884 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
7885 rGPR:$Rm, pred:$p)>;
7886 def VLD3DUPqWB_register_Asm_32 :
7887 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
7888 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
7889 rGPR:$Rm, pred:$p)>;
7892 // VLD3 single-lane pseudo-instructions. These need special handling for
7893 // the lane index that an InstAlias can't handle, so we use these instead.
7894 def VLD3LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
7895 (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr,
7897 def VLD3LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
7898 (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr,
7900 def VLD3LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
7901 (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr,
7903 def VLD3LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
7904 (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr,
7906 def VLD3LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
7907 (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr,
7910 def VLD3LNdWB_fixed_Asm_8 :
7911 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!",
7912 (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr,
7914 def VLD3LNdWB_fixed_Asm_16 :
7915 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
7916 (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr,
7918 def VLD3LNdWB_fixed_Asm_32 :
7919 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
7920 (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr,
7922 def VLD3LNqWB_fixed_Asm_16 :
7923 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
7924 (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr,
7926 def VLD3LNqWB_fixed_Asm_32 :
7927 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
7928 (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr,
7930 def VLD3LNdWB_register_Asm_8 :
7931 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm",
7932 (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr,
7933 rGPR:$Rm, pred:$p)>;
7934 def VLD3LNdWB_register_Asm_16 :
7935 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
7936 (ins VecListThreeDHWordIndexed:$list,
7937 addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>;
7938 def VLD3LNdWB_register_Asm_32 :
7939 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
7940 (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr,
7941 rGPR:$Rm, pred:$p)>;
7942 def VLD3LNqWB_register_Asm_16 :
7943 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
7944 (ins VecListThreeQHWordIndexed:$list,
7945 addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>;
7946 def VLD3LNqWB_register_Asm_32 :
7947 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
7948 (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr,
7949 rGPR:$Rm, pred:$p)>;
7951 // VLD3 multiple structure pseudo-instructions. These need special handling for
7952 // the vector operands that the normal instructions don't yet model.
7953 // FIXME: Remove these when the register classes and instructions are updated.
7954 def VLD3dAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
7955 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
7956 def VLD3dAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
7957 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
7958 def VLD3dAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
7959 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
7960 def VLD3qAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
7961 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
7962 def VLD3qAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
7963 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
7964 def VLD3qAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
7965 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
7967 def VLD3dWB_fixed_Asm_8 :
7968 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!",
7969 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
7970 def VLD3dWB_fixed_Asm_16 :
7971 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
7972 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
7973 def VLD3dWB_fixed_Asm_32 :
7974 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
7975 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
7976 def VLD3qWB_fixed_Asm_8 :
7977 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!",
7978 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
7979 def VLD3qWB_fixed_Asm_16 :
7980 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
7981 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
7982 def VLD3qWB_fixed_Asm_32 :
7983 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
7984 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
7985 def VLD3dWB_register_Asm_8 :
7986 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm",
7987 (ins VecListThreeD:$list, addrmode6align64:$addr,
7988 rGPR:$Rm, pred:$p)>;
7989 def VLD3dWB_register_Asm_16 :
7990 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
7991 (ins VecListThreeD:$list, addrmode6align64:$addr,
7992 rGPR:$Rm, pred:$p)>;
7993 def VLD3dWB_register_Asm_32 :
7994 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
7995 (ins VecListThreeD:$list, addrmode6align64:$addr,
7996 rGPR:$Rm, pred:$p)>;
7997 def VLD3qWB_register_Asm_8 :
7998 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm",
7999 (ins VecListThreeQ:$list, addrmode6align64:$addr,
8000 rGPR:$Rm, pred:$p)>;
8001 def VLD3qWB_register_Asm_16 :
8002 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
8003 (ins VecListThreeQ:$list, addrmode6align64:$addr,
8004 rGPR:$Rm, pred:$p)>;
8005 def VLD3qWB_register_Asm_32 :
8006 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
8007 (ins VecListThreeQ:$list, addrmode6align64:$addr,
8008 rGPR:$Rm, pred:$p)>;
8010 // VST3 single-lane pseudo-instructions. These need special handling for
8011 // the lane index that an InstAlias can't handle, so we use these instead.
8012 def VST3LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr",
8013 (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr,
8015 def VST3LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr",
8016 (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr,
8018 def VST3LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr",
8019 (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr,
8021 def VST3LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr",
8022 (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr,
8024 def VST3LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr",
8025 (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr,
8028 def VST3LNdWB_fixed_Asm_8 :
8029 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!",
8030 (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr,
8032 def VST3LNdWB_fixed_Asm_16 :
8033 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!",
8034 (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr,
8036 def VST3LNdWB_fixed_Asm_32 :
8037 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!",
8038 (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr,
8040 def VST3LNqWB_fixed_Asm_16 :
8041 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!",
8042 (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr,
8044 def VST3LNqWB_fixed_Asm_32 :
8045 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!",
8046 (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr,
8048 def VST3LNdWB_register_Asm_8 :
8049 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm",
8050 (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr,
8051 rGPR:$Rm, pred:$p)>;
8052 def VST3LNdWB_register_Asm_16 :
8053 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm",
8054 (ins VecListThreeDHWordIndexed:$list,
8055 addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>;
8056 def VST3LNdWB_register_Asm_32 :
8057 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm",
8058 (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr,
8059 rGPR:$Rm, pred:$p)>;
8060 def VST3LNqWB_register_Asm_16 :
8061 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm",
8062 (ins VecListThreeQHWordIndexed:$list,
8063 addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>;
8064 def VST3LNqWB_register_Asm_32 :
8065 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm",
8066 (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr,
8067 rGPR:$Rm, pred:$p)>;
8070 // VST3 multiple structure pseudo-instructions. These need special handling for
8071 // the vector operands that the normal instructions don't yet model.
8072 // FIXME: Remove these when the register classes and instructions are updated.
8073 def VST3dAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr",
8074 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
8075 def VST3dAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr",
8076 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
8077 def VST3dAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr",
8078 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
8079 def VST3qAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr",
8080 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
8081 def VST3qAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr",
8082 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
8083 def VST3qAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr",
8084 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
8086 def VST3dWB_fixed_Asm_8 :
8087 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!",
8088 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
8089 def VST3dWB_fixed_Asm_16 :
8090 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!",
8091 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
8092 def VST3dWB_fixed_Asm_32 :
8093 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!",
8094 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
8095 def VST3qWB_fixed_Asm_8 :
8096 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!",
8097 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
8098 def VST3qWB_fixed_Asm_16 :
8099 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!",
8100 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
8101 def VST3qWB_fixed_Asm_32 :
8102 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!",
8103 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
8104 def VST3dWB_register_Asm_8 :
8105 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm",
8106 (ins VecListThreeD:$list, addrmode6align64:$addr,
8107 rGPR:$Rm, pred:$p)>;
8108 def VST3dWB_register_Asm_16 :
8109 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm",
8110 (ins VecListThreeD:$list, addrmode6align64:$addr,
8111 rGPR:$Rm, pred:$p)>;
8112 def VST3dWB_register_Asm_32 :
8113 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm",
8114 (ins VecListThreeD:$list, addrmode6align64:$addr,
8115 rGPR:$Rm, pred:$p)>;
8116 def VST3qWB_register_Asm_8 :
8117 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm",
8118 (ins VecListThreeQ:$list, addrmode6align64:$addr,
8119 rGPR:$Rm, pred:$p)>;
8120 def VST3qWB_register_Asm_16 :
8121 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm",
8122 (ins VecListThreeQ:$list, addrmode6align64:$addr,
8123 rGPR:$Rm, pred:$p)>;
8124 def VST3qWB_register_Asm_32 :
8125 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm",
8126 (ins VecListThreeQ:$list, addrmode6align64:$addr,
8127 rGPR:$Rm, pred:$p)>;
8129 // VLD4 all-lanes pseudo-instructions. These need special handling for
8130 // the lane index that an InstAlias can't handle, so we use these instead.
8131 def VLD4DUPdAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
8132 (ins VecListFourDAllLanes:$list, addrmode6dupalign32:$addr,
8134 def VLD4DUPdAsm_16: NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
8135 (ins VecListFourDAllLanes:$list, addrmode6dupalign64:$addr,
8137 def VLD4DUPdAsm_32: NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
8138 (ins VecListFourDAllLanes:$list, addrmode6dupalign64or128:$addr,
8140 def VLD4DUPqAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
8141 (ins VecListFourQAllLanes:$list, addrmode6dupalign32:$addr,
8143 def VLD4DUPqAsm_16: NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
8144 (ins VecListFourQAllLanes:$list, addrmode6dupalign64:$addr,
8146 def VLD4DUPqAsm_32: NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
8147 (ins VecListFourQAllLanes:$list, addrmode6dupalign64or128:$addr,
8150 def VLD4DUPdWB_fixed_Asm_8 :
8151 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!",
8152 (ins VecListFourDAllLanes:$list, addrmode6dupalign32:$addr,
8154 def VLD4DUPdWB_fixed_Asm_16 :
8155 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
8156 (ins VecListFourDAllLanes:$list, addrmode6dupalign64:$addr,
8158 def VLD4DUPdWB_fixed_Asm_32 :
8159 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
8160 (ins VecListFourDAllLanes:$list, addrmode6dupalign64or128:$addr,
8162 def VLD4DUPqWB_fixed_Asm_8 :
8163 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!",
8164 (ins VecListFourQAllLanes:$list, addrmode6dupalign32:$addr,
8166 def VLD4DUPqWB_fixed_Asm_16 :
8167 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
8168 (ins VecListFourQAllLanes:$list, addrmode6dupalign64:$addr,
8170 def VLD4DUPqWB_fixed_Asm_32 :
8171 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
8172 (ins VecListFourQAllLanes:$list, addrmode6dupalign64or128:$addr,
8174 def VLD4DUPdWB_register_Asm_8 :
8175 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm",
8176 (ins VecListFourDAllLanes:$list, addrmode6dupalign32:$addr,
8177 rGPR:$Rm, pred:$p)>;
8178 def VLD4DUPdWB_register_Asm_16 :
8179 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
8180 (ins VecListFourDAllLanes:$list, addrmode6dupalign64:$addr,
8181 rGPR:$Rm, pred:$p)>;
8182 def VLD4DUPdWB_register_Asm_32 :
8183 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
8184 (ins VecListFourDAllLanes:$list,
8185 addrmode6dupalign64or128:$addr, rGPR:$Rm, pred:$p)>;
8186 def VLD4DUPqWB_register_Asm_8 :
8187 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm",
8188 (ins VecListFourQAllLanes:$list, addrmode6dupalign32:$addr,
8189 rGPR:$Rm, pred:$p)>;
8190 def VLD4DUPqWB_register_Asm_16 :
8191 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
8192 (ins VecListFourQAllLanes:$list, addrmode6dupalign64:$addr,
8193 rGPR:$Rm, pred:$p)>;
8194 def VLD4DUPqWB_register_Asm_32 :
8195 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
8196 (ins VecListFourQAllLanes:$list,
8197 addrmode6dupalign64or128:$addr, rGPR:$Rm, pred:$p)>;
8200 // VLD4 single-lane pseudo-instructions. These need special handling for
8201 // the lane index that an InstAlias can't handle, so we use these instead.
8202 def VLD4LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
8203 (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr,
8205 def VLD4LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
8206 (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr,
8208 def VLD4LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
8209 (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr,
8211 def VLD4LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
8212 (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr,
8214 def VLD4LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
8215 (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr,
8218 def VLD4LNdWB_fixed_Asm_8 :
8219 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!",
8220 (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr,
8222 def VLD4LNdWB_fixed_Asm_16 :
8223 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
8224 (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr,
8226 def VLD4LNdWB_fixed_Asm_32 :
8227 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
8228 (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr,
8230 def VLD4LNqWB_fixed_Asm_16 :
8231 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
8232 (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr,
8234 def VLD4LNqWB_fixed_Asm_32 :
8235 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
8236 (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr,
8238 def VLD4LNdWB_register_Asm_8 :
8239 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm",
8240 (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr,
8241 rGPR:$Rm, pred:$p)>;
8242 def VLD4LNdWB_register_Asm_16 :
8243 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
8244 (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr,
8245 rGPR:$Rm, pred:$p)>;
8246 def VLD4LNdWB_register_Asm_32 :
8247 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
8248 (ins VecListFourDWordIndexed:$list,
8249 addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>;
8250 def VLD4LNqWB_register_Asm_16 :
8251 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
8252 (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr,
8253 rGPR:$Rm, pred:$p)>;
8254 def VLD4LNqWB_register_Asm_32 :
8255 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
8256 (ins VecListFourQWordIndexed:$list,
8257 addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>;
8261 // VLD4 multiple structure pseudo-instructions. These need special handling for
8262 // the vector operands that the normal instructions don't yet model.
8263 // FIXME: Remove these when the register classes and instructions are updated.
8264 def VLD4dAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
8265 (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8267 def VLD4dAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
8268 (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8270 def VLD4dAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
8271 (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8273 def VLD4qAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
8274 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8276 def VLD4qAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
8277 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8279 def VLD4qAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
8280 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8283 def VLD4dWB_fixed_Asm_8 :
8284 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!",
8285 (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8287 def VLD4dWB_fixed_Asm_16 :
8288 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
8289 (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8291 def VLD4dWB_fixed_Asm_32 :
8292 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
8293 (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8295 def VLD4qWB_fixed_Asm_8 :
8296 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!",
8297 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8299 def VLD4qWB_fixed_Asm_16 :
8300 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
8301 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8303 def VLD4qWB_fixed_Asm_32 :
8304 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
8305 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8307 def VLD4dWB_register_Asm_8 :
8308 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm",
8309 (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8310 rGPR:$Rm, pred:$p)>;
8311 def VLD4dWB_register_Asm_16 :
8312 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
8313 (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8314 rGPR:$Rm, pred:$p)>;
8315 def VLD4dWB_register_Asm_32 :
8316 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
8317 (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8318 rGPR:$Rm, pred:$p)>;
8319 def VLD4qWB_register_Asm_8 :
8320 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm",
8321 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8322 rGPR:$Rm, pred:$p)>;
8323 def VLD4qWB_register_Asm_16 :
8324 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
8325 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8326 rGPR:$Rm, pred:$p)>;
8327 def VLD4qWB_register_Asm_32 :
8328 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
8329 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8330 rGPR:$Rm, pred:$p)>;
8332 // VST4 single-lane pseudo-instructions. These need special handling for
8333 // the lane index that an InstAlias can't handle, so we use these instead.
8334 def VST4LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr",
8335 (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr,
8337 def VST4LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr",
8338 (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr,
8340 def VST4LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr",
8341 (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr,
8343 def VST4LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr",
8344 (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr,
8346 def VST4LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr",
8347 (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr,
8350 def VST4LNdWB_fixed_Asm_8 :
8351 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!",
8352 (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr,
8354 def VST4LNdWB_fixed_Asm_16 :
8355 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!",
8356 (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr,
8358 def VST4LNdWB_fixed_Asm_32 :
8359 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!",
8360 (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr,
8362 def VST4LNqWB_fixed_Asm_16 :
8363 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!",
8364 (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr,
8366 def VST4LNqWB_fixed_Asm_32 :
8367 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!",
8368 (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr,
8370 def VST4LNdWB_register_Asm_8 :
8371 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm",
8372 (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr,
8373 rGPR:$Rm, pred:$p)>;
8374 def VST4LNdWB_register_Asm_16 :
8375 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm",
8376 (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr,
8377 rGPR:$Rm, pred:$p)>;
8378 def VST4LNdWB_register_Asm_32 :
8379 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm",
8380 (ins VecListFourDWordIndexed:$list,
8381 addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>;
8382 def VST4LNqWB_register_Asm_16 :
8383 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm",
8384 (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr,
8385 rGPR:$Rm, pred:$p)>;
8386 def VST4LNqWB_register_Asm_32 :
8387 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm",
8388 (ins VecListFourQWordIndexed:$list,
8389 addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>;
8392 // VST4 multiple structure pseudo-instructions. These need special handling for
8393 // the vector operands that the normal instructions don't yet model.
8394 // FIXME: Remove these when the register classes and instructions are updated.
8395 def VST4dAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr",
8396 (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8398 def VST4dAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr",
8399 (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8401 def VST4dAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr",
8402 (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8404 def VST4qAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr",
8405 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8407 def VST4qAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr",
8408 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8410 def VST4qAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr",
8411 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8414 def VST4dWB_fixed_Asm_8 :
8415 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!",
8416 (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8418 def VST4dWB_fixed_Asm_16 :
8419 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!",
8420 (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8422 def VST4dWB_fixed_Asm_32 :
8423 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!",
8424 (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8426 def VST4qWB_fixed_Asm_8 :
8427 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!",
8428 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8430 def VST4qWB_fixed_Asm_16 :
8431 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!",
8432 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8434 def VST4qWB_fixed_Asm_32 :
8435 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!",
8436 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8438 def VST4dWB_register_Asm_8 :
8439 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm",
8440 (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8441 rGPR:$Rm, pred:$p)>;
8442 def VST4dWB_register_Asm_16 :
8443 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm",
8444 (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8445 rGPR:$Rm, pred:$p)>;
8446 def VST4dWB_register_Asm_32 :
8447 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm",
8448 (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8449 rGPR:$Rm, pred:$p)>;
8450 def VST4qWB_register_Asm_8 :
8451 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm",
8452 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8453 rGPR:$Rm, pred:$p)>;
8454 def VST4qWB_register_Asm_16 :
8455 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm",
8456 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8457 rGPR:$Rm, pred:$p)>;
8458 def VST4qWB_register_Asm_32 :
8459 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm",
8460 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8461 rGPR:$Rm, pred:$p)>;
8463 // VMOV/VMVN takes an optional datatype suffix
8464 defm : NEONDTAnyInstAlias<"vmov${p}", "$Vd, $Vm",
8465 (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>;
8466 defm : NEONDTAnyInstAlias<"vmov${p}", "$Vd, $Vm",
8467 (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>;
8469 defm : NEONDTAnyInstAlias<"vmvn${p}", "$Vd, $Vm",
8470 (VMVNd DPR:$Vd, DPR:$Vm, pred:$p)>;
8471 defm : NEONDTAnyInstAlias<"vmvn${p}", "$Vd, $Vm",
8472 (VMVNq QPR:$Vd, QPR:$Vm, pred:$p)>;
8474 // VCLT (register) is an assembler alias for VCGT w/ the operands reversed.
8475 // D-register versions.
8476 def : NEONInstAlias<"vcle${p}.s8 $Dd, $Dn, $Dm",
8477 (VCGEsv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8478 def : NEONInstAlias<"vcle${p}.s16 $Dd, $Dn, $Dm",
8479 (VCGEsv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8480 def : NEONInstAlias<"vcle${p}.s32 $Dd, $Dn, $Dm",
8481 (VCGEsv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8482 def : NEONInstAlias<"vcle${p}.u8 $Dd, $Dn, $Dm",
8483 (VCGEuv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8484 def : NEONInstAlias<"vcle${p}.u16 $Dd, $Dn, $Dm",
8485 (VCGEuv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8486 def : NEONInstAlias<"vcle${p}.u32 $Dd, $Dn, $Dm",
8487 (VCGEuv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8488 def : NEONInstAlias<"vcle${p}.f32 $Dd, $Dn, $Dm",
8489 (VCGEfd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8490 let Predicates = [HasNEON, HasFullFP16] in
8491 def : NEONInstAlias<"vcle${p}.f16 $Dd, $Dn, $Dm",
8492 (VCGEhd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8493 // Q-register versions.
8494 def : NEONInstAlias<"vcle${p}.s8 $Qd, $Qn, $Qm",
8495 (VCGEsv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8496 def : NEONInstAlias<"vcle${p}.s16 $Qd, $Qn, $Qm",
8497 (VCGEsv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8498 def : NEONInstAlias<"vcle${p}.s32 $Qd, $Qn, $Qm",
8499 (VCGEsv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8500 def : NEONInstAlias<"vcle${p}.u8 $Qd, $Qn, $Qm",
8501 (VCGEuv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8502 def : NEONInstAlias<"vcle${p}.u16 $Qd, $Qn, $Qm",
8503 (VCGEuv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8504 def : NEONInstAlias<"vcle${p}.u32 $Qd, $Qn, $Qm",
8505 (VCGEuv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8506 def : NEONInstAlias<"vcle${p}.f32 $Qd, $Qn, $Qm",
8507 (VCGEfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8508 let Predicates = [HasNEON, HasFullFP16] in
8509 def : NEONInstAlias<"vcle${p}.f16 $Qd, $Qn, $Qm",
8510 (VCGEhq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8512 // VCLT (register) is an assembler alias for VCGT w/ the operands reversed.
8513 // D-register versions.
8514 def : NEONInstAlias<"vclt${p}.s8 $Dd, $Dn, $Dm",
8515 (VCGTsv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8516 def : NEONInstAlias<"vclt${p}.s16 $Dd, $Dn, $Dm",
8517 (VCGTsv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8518 def : NEONInstAlias<"vclt${p}.s32 $Dd, $Dn, $Dm",
8519 (VCGTsv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8520 def : NEONInstAlias<"vclt${p}.u8 $Dd, $Dn, $Dm",
8521 (VCGTuv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8522 def : NEONInstAlias<"vclt${p}.u16 $Dd, $Dn, $Dm",
8523 (VCGTuv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8524 def : NEONInstAlias<"vclt${p}.u32 $Dd, $Dn, $Dm",
8525 (VCGTuv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8526 def : NEONInstAlias<"vclt${p}.f32 $Dd, $Dn, $Dm",
8527 (VCGTfd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8528 let Predicates = [HasNEON, HasFullFP16] in
8529 def : NEONInstAlias<"vclt${p}.f16 $Dd, $Dn, $Dm",
8530 (VCGThd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8531 // Q-register versions.
8532 def : NEONInstAlias<"vclt${p}.s8 $Qd, $Qn, $Qm",
8533 (VCGTsv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8534 def : NEONInstAlias<"vclt${p}.s16 $Qd, $Qn, $Qm",
8535 (VCGTsv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8536 def : NEONInstAlias<"vclt${p}.s32 $Qd, $Qn, $Qm",
8537 (VCGTsv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8538 def : NEONInstAlias<"vclt${p}.u8 $Qd, $Qn, $Qm",
8539 (VCGTuv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8540 def : NEONInstAlias<"vclt${p}.u16 $Qd, $Qn, $Qm",
8541 (VCGTuv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8542 def : NEONInstAlias<"vclt${p}.u32 $Qd, $Qn, $Qm",
8543 (VCGTuv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8544 def : NEONInstAlias<"vclt${p}.f32 $Qd, $Qn, $Qm",
8545 (VCGTfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8546 let Predicates = [HasNEON, HasFullFP16] in
8547 def : NEONInstAlias<"vclt${p}.f16 $Qd, $Qn, $Qm",
8548 (VCGThq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8550 // VSWP allows, but does not require, a type suffix.
8551 defm : NEONDTAnyInstAlias<"vswp${p}", "$Vd, $Vm",
8552 (VSWPd DPR:$Vd, DPR:$Vm, pred:$p)>;
8553 defm : NEONDTAnyInstAlias<"vswp${p}", "$Vd, $Vm",
8554 (VSWPq QPR:$Vd, QPR:$Vm, pred:$p)>;
8556 // VBIF, VBIT, and VBSL allow, but do not require, a type suffix.
8557 defm : NEONDTAnyInstAlias<"vbif${p}", "$Vd, $Vn, $Vm",
8558 (VBIFd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
8559 defm : NEONDTAnyInstAlias<"vbit${p}", "$Vd, $Vn, $Vm",
8560 (VBITd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
8561 defm : NEONDTAnyInstAlias<"vbsl${p}", "$Vd, $Vn, $Vm",
8562 (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
8563 defm : NEONDTAnyInstAlias<"vbif${p}", "$Vd, $Vn, $Vm",
8564 (VBIFq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
8565 defm : NEONDTAnyInstAlias<"vbit${p}", "$Vd, $Vn, $Vm",
8566 (VBITq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
8567 defm : NEONDTAnyInstAlias<"vbsl${p}", "$Vd, $Vn, $Vm",
8568 (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
8570 // "vmov Rd, #-imm" can be handled via "vmvn".
8571 def : NEONInstAlias<"vmov${p}.i32 $Vd, $imm",
8572 (VMVNv2i32 DPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>;
8573 def : NEONInstAlias<"vmov${p}.i32 $Vd, $imm",
8574 (VMVNv4i32 QPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>;
8575 def : NEONInstAlias<"vmvn${p}.i32 $Vd, $imm",
8576 (VMOVv2i32 DPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>;
8577 def : NEONInstAlias<"vmvn${p}.i32 $Vd, $imm",
8578 (VMOVv4i32 QPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>;
8580 // 'gas' compatibility aliases for quad-word instructions. Strictly speaking,
8581 // these should restrict to just the Q register variants, but the register
8582 // classes are enough to match correctly regardless, so we keep it simple
8583 // and just use MnemonicAlias.
8584 def : NEONMnemonicAlias<"vbicq", "vbic">;
8585 def : NEONMnemonicAlias<"vandq", "vand">;
8586 def : NEONMnemonicAlias<"veorq", "veor">;
8587 def : NEONMnemonicAlias<"vorrq", "vorr">;
8589 def : NEONMnemonicAlias<"vmovq", "vmov">;
8590 def : NEONMnemonicAlias<"vmvnq", "vmvn">;
8591 // Explicit versions for floating point so that the FPImm variants get
8592 // handled early. The parser gets confused otherwise.
8593 def : NEONMnemonicAlias<"vmovq.f32", "vmov.f32">;
8594 def : NEONMnemonicAlias<"vmovq.f64", "vmov.f64">;
8596 def : NEONMnemonicAlias<"vaddq", "vadd">;
8597 def : NEONMnemonicAlias<"vsubq", "vsub">;
8599 def : NEONMnemonicAlias<"vminq", "vmin">;
8600 def : NEONMnemonicAlias<"vmaxq", "vmax">;
8602 def : NEONMnemonicAlias<"vmulq", "vmul">;
8604 def : NEONMnemonicAlias<"vabsq", "vabs">;
8606 def : NEONMnemonicAlias<"vshlq", "vshl">;
8607 def : NEONMnemonicAlias<"vshrq", "vshr">;
8609 def : NEONMnemonicAlias<"vcvtq", "vcvt">;
8611 def : NEONMnemonicAlias<"vcleq", "vcle">;
8612 def : NEONMnemonicAlias<"vceqq", "vceq">;
8614 def : NEONMnemonicAlias<"vzipq", "vzip">;
8615 def : NEONMnemonicAlias<"vswpq", "vswp">;
8617 def : NEONMnemonicAlias<"vrecpeq.f32", "vrecpe.f32">;
8618 def : NEONMnemonicAlias<"vrecpeq.u32", "vrecpe.u32">;
8621 // Alias for loading floating point immediates that aren't representable
8622 // using the vmov.f32 encoding but the bitpattern is representable using
8623 // the .i32 encoding.
8624 def : NEONInstAlias<"vmov${p}.f32 $Vd, $imm",
8625 (VMOVv4i32 QPR:$Vd, nImmVMOVI32:$imm, pred:$p)>;
8626 def : NEONInstAlias<"vmov${p}.f32 $Vd, $imm",
8627 (VMOVv2i32 DPR:$Vd, nImmVMOVI32:$imm, pred:$p)>;