1 //===-- ARMInstrNEON.td - NEON support for ARM -------------*- tablegen -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file describes the ARM NEON instruction set.
11 //===----------------------------------------------------------------------===//
14 //===----------------------------------------------------------------------===//
15 // NEON-specific Operands.
16 //===----------------------------------------------------------------------===//
17 def nModImm : Operand<i32> {
18 let PrintMethod = "printVMOVModImmOperand";
21 def nImmSplatI8AsmOperand : AsmOperandClass { let Name = "NEONi8splat"; }
22 def nImmSplatI8 : Operand<i32> {
23 let PrintMethod = "printVMOVModImmOperand";
24 let ParserMatchClass = nImmSplatI8AsmOperand;
26 def nImmSplatI16AsmOperand : AsmOperandClass { let Name = "NEONi16splat"; }
27 def nImmSplatI16 : Operand<i32> {
28 let PrintMethod = "printVMOVModImmOperand";
29 let ParserMatchClass = nImmSplatI16AsmOperand;
31 def nImmSplatI32AsmOperand : AsmOperandClass { let Name = "NEONi32splat"; }
32 def nImmSplatI32 : Operand<i32> {
33 let PrintMethod = "printVMOVModImmOperand";
34 let ParserMatchClass = nImmSplatI32AsmOperand;
36 def nImmSplatNotI16AsmOperand : AsmOperandClass { let Name = "NEONi16splatNot"; }
37 def nImmSplatNotI16 : Operand<i32> {
38 let ParserMatchClass = nImmSplatNotI16AsmOperand;
40 def nImmSplatNotI32AsmOperand : AsmOperandClass { let Name = "NEONi32splatNot"; }
41 def nImmSplatNotI32 : Operand<i32> {
42 let ParserMatchClass = nImmSplatNotI32AsmOperand;
44 def nImmVMOVI32AsmOperand : AsmOperandClass { let Name = "NEONi32vmov"; }
45 def nImmVMOVI32 : Operand<i32> {
46 let PrintMethod = "printVMOVModImmOperand";
47 let ParserMatchClass = nImmVMOVI32AsmOperand;
50 class nImmVMOVIAsmOperandReplicate<ValueType From, ValueType To>
52 let Name = "NEONi" # To.Size # "vmovi" # From.Size # "Replicate";
53 let PredicateMethod = "isNEONmovReplicate<" # From.Size # ", " # To.Size # ">";
54 let RenderMethod = "addNEONvmovi" # From.Size # "ReplicateOperands";
57 class nImmVINVIAsmOperandReplicate<ValueType From, ValueType To>
59 let Name = "NEONi" # To.Size # "invi" # From.Size # "Replicate";
60 let PredicateMethod = "isNEONinvReplicate<" # From.Size # ", " # To.Size # ">";
61 let RenderMethod = "addNEONinvi" # From.Size # "ReplicateOperands";
64 class nImmVMOVIReplicate<ValueType From, ValueType To> : Operand<i32> {
65 let PrintMethod = "printVMOVModImmOperand";
66 let ParserMatchClass = nImmVMOVIAsmOperandReplicate<From, To>;
69 class nImmVINVIReplicate<ValueType From, ValueType To> : Operand<i32> {
70 let PrintMethod = "printVMOVModImmOperand";
71 let ParserMatchClass = nImmVINVIAsmOperandReplicate<From, To>;
74 def nImmVMOVI32NegAsmOperand : AsmOperandClass { let Name = "NEONi32vmovNeg"; }
75 def nImmVMOVI32Neg : Operand<i32> {
76 let PrintMethod = "printVMOVModImmOperand";
77 let ParserMatchClass = nImmVMOVI32NegAsmOperand;
79 def nImmVMOVF32 : Operand<i32> {
80 let PrintMethod = "printFPImmOperand";
81 let ParserMatchClass = FPImmOperand;
83 def nImmSplatI64AsmOperand : AsmOperandClass { let Name = "NEONi64splat"; }
84 def nImmSplatI64 : Operand<i32> {
85 let PrintMethod = "printVMOVModImmOperand";
86 let ParserMatchClass = nImmSplatI64AsmOperand;
89 def VectorIndex8Operand : AsmOperandClass { let Name = "VectorIndex8"; }
90 def VectorIndex16Operand : AsmOperandClass { let Name = "VectorIndex16"; }
91 def VectorIndex32Operand : AsmOperandClass { let Name = "VectorIndex32"; }
92 def VectorIndex64Operand : AsmOperandClass { let Name = "VectorIndex64"; }
93 def VectorIndex8 : Operand<i32>, ImmLeaf<i32, [{
94 return ((uint64_t)Imm) < 8;
96 let ParserMatchClass = VectorIndex8Operand;
97 let PrintMethod = "printVectorIndex";
98 let MIOperandInfo = (ops i32imm);
100 def VectorIndex16 : Operand<i32>, ImmLeaf<i32, [{
101 return ((uint64_t)Imm) < 4;
103 let ParserMatchClass = VectorIndex16Operand;
104 let PrintMethod = "printVectorIndex";
105 let MIOperandInfo = (ops i32imm);
107 def VectorIndex32 : Operand<i32>, ImmLeaf<i32, [{
108 return ((uint64_t)Imm) < 2;
110 let ParserMatchClass = VectorIndex32Operand;
111 let PrintMethod = "printVectorIndex";
112 let MIOperandInfo = (ops i32imm);
114 def VectorIndex64 : Operand<i32>, ImmLeaf<i32, [{
115 return ((uint64_t)Imm) < 1;
117 let ParserMatchClass = VectorIndex64Operand;
118 let PrintMethod = "printVectorIndex";
119 let MIOperandInfo = (ops i32imm);
122 // Register list of one D register.
123 def VecListOneDAsmOperand : AsmOperandClass {
124 let Name = "VecListOneD";
125 let ParserMethod = "parseVectorList";
126 let RenderMethod = "addVecListOperands";
128 def VecListOneD : RegisterOperand<DPR, "printVectorListOne"> {
129 let ParserMatchClass = VecListOneDAsmOperand;
131 // Register list of two sequential D registers.
132 def VecListDPairAsmOperand : AsmOperandClass {
133 let Name = "VecListDPair";
134 let ParserMethod = "parseVectorList";
135 let RenderMethod = "addVecListOperands";
137 def VecListDPair : RegisterOperand<DPair, "printVectorListTwo"> {
138 let ParserMatchClass = VecListDPairAsmOperand;
140 // Register list of three sequential D registers.
141 def VecListThreeDAsmOperand : AsmOperandClass {
142 let Name = "VecListThreeD";
143 let ParserMethod = "parseVectorList";
144 let RenderMethod = "addVecListOperands";
146 def VecListThreeD : RegisterOperand<DPR, "printVectorListThree"> {
147 let ParserMatchClass = VecListThreeDAsmOperand;
149 // Register list of four sequential D registers.
150 def VecListFourDAsmOperand : AsmOperandClass {
151 let Name = "VecListFourD";
152 let ParserMethod = "parseVectorList";
153 let RenderMethod = "addVecListOperands";
155 def VecListFourD : RegisterOperand<DPR, "printVectorListFour"> {
156 let ParserMatchClass = VecListFourDAsmOperand;
158 // Register list of two D registers spaced by 2 (two sequential Q registers).
159 def VecListDPairSpacedAsmOperand : AsmOperandClass {
160 let Name = "VecListDPairSpaced";
161 let ParserMethod = "parseVectorList";
162 let RenderMethod = "addVecListOperands";
164 def VecListDPairSpaced : RegisterOperand<DPair, "printVectorListTwoSpaced"> {
165 let ParserMatchClass = VecListDPairSpacedAsmOperand;
167 // Register list of three D registers spaced by 2 (three Q registers).
168 def VecListThreeQAsmOperand : AsmOperandClass {
169 let Name = "VecListThreeQ";
170 let ParserMethod = "parseVectorList";
171 let RenderMethod = "addVecListOperands";
173 def VecListThreeQ : RegisterOperand<DPR, "printVectorListThreeSpaced"> {
174 let ParserMatchClass = VecListThreeQAsmOperand;
176 // Register list of three D registers spaced by 2 (three Q registers).
177 def VecListFourQAsmOperand : AsmOperandClass {
178 let Name = "VecListFourQ";
179 let ParserMethod = "parseVectorList";
180 let RenderMethod = "addVecListOperands";
182 def VecListFourQ : RegisterOperand<DPR, "printVectorListFourSpaced"> {
183 let ParserMatchClass = VecListFourQAsmOperand;
186 // Register list of one D register, with "all lanes" subscripting.
187 def VecListOneDAllLanesAsmOperand : AsmOperandClass {
188 let Name = "VecListOneDAllLanes";
189 let ParserMethod = "parseVectorList";
190 let RenderMethod = "addVecListOperands";
192 def VecListOneDAllLanes : RegisterOperand<DPR, "printVectorListOneAllLanes"> {
193 let ParserMatchClass = VecListOneDAllLanesAsmOperand;
195 // Register list of two D registers, with "all lanes" subscripting.
196 def VecListDPairAllLanesAsmOperand : AsmOperandClass {
197 let Name = "VecListDPairAllLanes";
198 let ParserMethod = "parseVectorList";
199 let RenderMethod = "addVecListOperands";
201 def VecListDPairAllLanes : RegisterOperand<DPair,
202 "printVectorListTwoAllLanes"> {
203 let ParserMatchClass = VecListDPairAllLanesAsmOperand;
205 // Register list of two D registers spaced by 2 (two sequential Q registers).
206 def VecListDPairSpacedAllLanesAsmOperand : AsmOperandClass {
207 let Name = "VecListDPairSpacedAllLanes";
208 let ParserMethod = "parseVectorList";
209 let RenderMethod = "addVecListOperands";
211 def VecListDPairSpacedAllLanes : RegisterOperand<DPairSpc,
212 "printVectorListTwoSpacedAllLanes"> {
213 let ParserMatchClass = VecListDPairSpacedAllLanesAsmOperand;
215 // Register list of three D registers, with "all lanes" subscripting.
216 def VecListThreeDAllLanesAsmOperand : AsmOperandClass {
217 let Name = "VecListThreeDAllLanes";
218 let ParserMethod = "parseVectorList";
219 let RenderMethod = "addVecListOperands";
221 def VecListThreeDAllLanes : RegisterOperand<DPR,
222 "printVectorListThreeAllLanes"> {
223 let ParserMatchClass = VecListThreeDAllLanesAsmOperand;
225 // Register list of three D registers spaced by 2 (three sequential Q regs).
226 def VecListThreeQAllLanesAsmOperand : AsmOperandClass {
227 let Name = "VecListThreeQAllLanes";
228 let ParserMethod = "parseVectorList";
229 let RenderMethod = "addVecListOperands";
231 def VecListThreeQAllLanes : RegisterOperand<DPR,
232 "printVectorListThreeSpacedAllLanes"> {
233 let ParserMatchClass = VecListThreeQAllLanesAsmOperand;
235 // Register list of four D registers, with "all lanes" subscripting.
236 def VecListFourDAllLanesAsmOperand : AsmOperandClass {
237 let Name = "VecListFourDAllLanes";
238 let ParserMethod = "parseVectorList";
239 let RenderMethod = "addVecListOperands";
241 def VecListFourDAllLanes : RegisterOperand<DPR, "printVectorListFourAllLanes"> {
242 let ParserMatchClass = VecListFourDAllLanesAsmOperand;
244 // Register list of four D registers spaced by 2 (four sequential Q regs).
245 def VecListFourQAllLanesAsmOperand : AsmOperandClass {
246 let Name = "VecListFourQAllLanes";
247 let ParserMethod = "parseVectorList";
248 let RenderMethod = "addVecListOperands";
250 def VecListFourQAllLanes : RegisterOperand<DPR,
251 "printVectorListFourSpacedAllLanes"> {
252 let ParserMatchClass = VecListFourQAllLanesAsmOperand;
256 // Register list of one D register, with byte lane subscripting.
257 def VecListOneDByteIndexAsmOperand : AsmOperandClass {
258 let Name = "VecListOneDByteIndexed";
259 let ParserMethod = "parseVectorList";
260 let RenderMethod = "addVecListIndexedOperands";
262 def VecListOneDByteIndexed : Operand<i32> {
263 let ParserMatchClass = VecListOneDByteIndexAsmOperand;
264 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
266 // ...with half-word lane subscripting.
267 def VecListOneDHWordIndexAsmOperand : AsmOperandClass {
268 let Name = "VecListOneDHWordIndexed";
269 let ParserMethod = "parseVectorList";
270 let RenderMethod = "addVecListIndexedOperands";
272 def VecListOneDHWordIndexed : Operand<i32> {
273 let ParserMatchClass = VecListOneDHWordIndexAsmOperand;
274 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
276 // ...with word lane subscripting.
277 def VecListOneDWordIndexAsmOperand : AsmOperandClass {
278 let Name = "VecListOneDWordIndexed";
279 let ParserMethod = "parseVectorList";
280 let RenderMethod = "addVecListIndexedOperands";
282 def VecListOneDWordIndexed : Operand<i32> {
283 let ParserMatchClass = VecListOneDWordIndexAsmOperand;
284 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
287 // Register list of two D registers with byte lane subscripting.
288 def VecListTwoDByteIndexAsmOperand : AsmOperandClass {
289 let Name = "VecListTwoDByteIndexed";
290 let ParserMethod = "parseVectorList";
291 let RenderMethod = "addVecListIndexedOperands";
293 def VecListTwoDByteIndexed : Operand<i32> {
294 let ParserMatchClass = VecListTwoDByteIndexAsmOperand;
295 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
297 // ...with half-word lane subscripting.
298 def VecListTwoDHWordIndexAsmOperand : AsmOperandClass {
299 let Name = "VecListTwoDHWordIndexed";
300 let ParserMethod = "parseVectorList";
301 let RenderMethod = "addVecListIndexedOperands";
303 def VecListTwoDHWordIndexed : Operand<i32> {
304 let ParserMatchClass = VecListTwoDHWordIndexAsmOperand;
305 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
307 // ...with word lane subscripting.
308 def VecListTwoDWordIndexAsmOperand : AsmOperandClass {
309 let Name = "VecListTwoDWordIndexed";
310 let ParserMethod = "parseVectorList";
311 let RenderMethod = "addVecListIndexedOperands";
313 def VecListTwoDWordIndexed : Operand<i32> {
314 let ParserMatchClass = VecListTwoDWordIndexAsmOperand;
315 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
317 // Register list of two Q registers with half-word lane subscripting.
318 def VecListTwoQHWordIndexAsmOperand : AsmOperandClass {
319 let Name = "VecListTwoQHWordIndexed";
320 let ParserMethod = "parseVectorList";
321 let RenderMethod = "addVecListIndexedOperands";
323 def VecListTwoQHWordIndexed : Operand<i32> {
324 let ParserMatchClass = VecListTwoQHWordIndexAsmOperand;
325 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
327 // ...with word lane subscripting.
328 def VecListTwoQWordIndexAsmOperand : AsmOperandClass {
329 let Name = "VecListTwoQWordIndexed";
330 let ParserMethod = "parseVectorList";
331 let RenderMethod = "addVecListIndexedOperands";
333 def VecListTwoQWordIndexed : Operand<i32> {
334 let ParserMatchClass = VecListTwoQWordIndexAsmOperand;
335 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
339 // Register list of three D registers with byte lane subscripting.
340 def VecListThreeDByteIndexAsmOperand : AsmOperandClass {
341 let Name = "VecListThreeDByteIndexed";
342 let ParserMethod = "parseVectorList";
343 let RenderMethod = "addVecListIndexedOperands";
345 def VecListThreeDByteIndexed : Operand<i32> {
346 let ParserMatchClass = VecListThreeDByteIndexAsmOperand;
347 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
349 // ...with half-word lane subscripting.
350 def VecListThreeDHWordIndexAsmOperand : AsmOperandClass {
351 let Name = "VecListThreeDHWordIndexed";
352 let ParserMethod = "parseVectorList";
353 let RenderMethod = "addVecListIndexedOperands";
355 def VecListThreeDHWordIndexed : Operand<i32> {
356 let ParserMatchClass = VecListThreeDHWordIndexAsmOperand;
357 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
359 // ...with word lane subscripting.
360 def VecListThreeDWordIndexAsmOperand : AsmOperandClass {
361 let Name = "VecListThreeDWordIndexed";
362 let ParserMethod = "parseVectorList";
363 let RenderMethod = "addVecListIndexedOperands";
365 def VecListThreeDWordIndexed : Operand<i32> {
366 let ParserMatchClass = VecListThreeDWordIndexAsmOperand;
367 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
369 // Register list of three Q registers with half-word lane subscripting.
370 def VecListThreeQHWordIndexAsmOperand : AsmOperandClass {
371 let Name = "VecListThreeQHWordIndexed";
372 let ParserMethod = "parseVectorList";
373 let RenderMethod = "addVecListIndexedOperands";
375 def VecListThreeQHWordIndexed : Operand<i32> {
376 let ParserMatchClass = VecListThreeQHWordIndexAsmOperand;
377 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
379 // ...with word lane subscripting.
380 def VecListThreeQWordIndexAsmOperand : AsmOperandClass {
381 let Name = "VecListThreeQWordIndexed";
382 let ParserMethod = "parseVectorList";
383 let RenderMethod = "addVecListIndexedOperands";
385 def VecListThreeQWordIndexed : Operand<i32> {
386 let ParserMatchClass = VecListThreeQWordIndexAsmOperand;
387 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
390 // Register list of four D registers with byte lane subscripting.
391 def VecListFourDByteIndexAsmOperand : AsmOperandClass {
392 let Name = "VecListFourDByteIndexed";
393 let ParserMethod = "parseVectorList";
394 let RenderMethod = "addVecListIndexedOperands";
396 def VecListFourDByteIndexed : Operand<i32> {
397 let ParserMatchClass = VecListFourDByteIndexAsmOperand;
398 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
400 // ...with half-word lane subscripting.
401 def VecListFourDHWordIndexAsmOperand : AsmOperandClass {
402 let Name = "VecListFourDHWordIndexed";
403 let ParserMethod = "parseVectorList";
404 let RenderMethod = "addVecListIndexedOperands";
406 def VecListFourDHWordIndexed : Operand<i32> {
407 let ParserMatchClass = VecListFourDHWordIndexAsmOperand;
408 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
410 // ...with word lane subscripting.
411 def VecListFourDWordIndexAsmOperand : AsmOperandClass {
412 let Name = "VecListFourDWordIndexed";
413 let ParserMethod = "parseVectorList";
414 let RenderMethod = "addVecListIndexedOperands";
416 def VecListFourDWordIndexed : Operand<i32> {
417 let ParserMatchClass = VecListFourDWordIndexAsmOperand;
418 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
420 // Register list of four Q registers with half-word lane subscripting.
421 def VecListFourQHWordIndexAsmOperand : AsmOperandClass {
422 let Name = "VecListFourQHWordIndexed";
423 let ParserMethod = "parseVectorList";
424 let RenderMethod = "addVecListIndexedOperands";
426 def VecListFourQHWordIndexed : Operand<i32> {
427 let ParserMatchClass = VecListFourQHWordIndexAsmOperand;
428 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
430 // ...with word lane subscripting.
431 def VecListFourQWordIndexAsmOperand : AsmOperandClass {
432 let Name = "VecListFourQWordIndexed";
433 let ParserMethod = "parseVectorList";
434 let RenderMethod = "addVecListIndexedOperands";
436 def VecListFourQWordIndexed : Operand<i32> {
437 let ParserMatchClass = VecListFourQWordIndexAsmOperand;
438 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
441 def dword_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
442 return cast<LoadSDNode>(N)->getAlignment() >= 8;
444 def dword_alignedstore : PatFrag<(ops node:$val, node:$ptr),
445 (store node:$val, node:$ptr), [{
446 return cast<StoreSDNode>(N)->getAlignment() >= 8;
448 def word_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
449 return cast<LoadSDNode>(N)->getAlignment() == 4;
451 def word_alignedstore : PatFrag<(ops node:$val, node:$ptr),
452 (store node:$val, node:$ptr), [{
453 return cast<StoreSDNode>(N)->getAlignment() == 4;
455 def hword_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
456 return cast<LoadSDNode>(N)->getAlignment() == 2;
458 def hword_alignedstore : PatFrag<(ops node:$val, node:$ptr),
459 (store node:$val, node:$ptr), [{
460 return cast<StoreSDNode>(N)->getAlignment() == 2;
462 def byte_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
463 return cast<LoadSDNode>(N)->getAlignment() == 1;
465 def byte_alignedstore : PatFrag<(ops node:$val, node:$ptr),
466 (store node:$val, node:$ptr), [{
467 return cast<StoreSDNode>(N)->getAlignment() == 1;
469 def non_word_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
470 return cast<LoadSDNode>(N)->getAlignment() < 4;
472 def non_word_alignedstore : PatFrag<(ops node:$val, node:$ptr),
473 (store node:$val, node:$ptr), [{
474 return cast<StoreSDNode>(N)->getAlignment() < 4;
477 //===----------------------------------------------------------------------===//
478 // NEON-specific DAG Nodes.
479 //===----------------------------------------------------------------------===//
481 def NEONvtst : SDNode<"ARMISD::VTST", SDTARMVCMP>;
483 // Types for vector shift by immediates. The "SHX" version is for long and
484 // narrow operations where the source and destination vectors have different
485 // types. The "SHINS" version is for shift and insert operations.
486 def SDTARMVSHXIMM : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>,
488 def SDTARMVSHINSIMM : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0, 1>,
489 SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>;
491 def NEONvshrnImm : SDNode<"ARMISD::VSHRNIMM", SDTARMVSHXIMM>;
493 def NEONvrshrsImm : SDNode<"ARMISD::VRSHRsIMM", SDTARMVSHIMM>;
494 def NEONvrshruImm : SDNode<"ARMISD::VRSHRuIMM", SDTARMVSHIMM>;
495 def NEONvrshrnImm : SDNode<"ARMISD::VRSHRNIMM", SDTARMVSHXIMM>;
497 def NEONvqshlsImm : SDNode<"ARMISD::VQSHLsIMM", SDTARMVSHIMM>;
498 def NEONvqshluImm : SDNode<"ARMISD::VQSHLuIMM", SDTARMVSHIMM>;
499 def NEONvqshlsuImm : SDNode<"ARMISD::VQSHLsuIMM", SDTARMVSHIMM>;
500 def NEONvqshrnsImm : SDNode<"ARMISD::VQSHRNsIMM", SDTARMVSHXIMM>;
501 def NEONvqshrnuImm : SDNode<"ARMISD::VQSHRNuIMM", SDTARMVSHXIMM>;
502 def NEONvqshrnsuImm : SDNode<"ARMISD::VQSHRNsuIMM", SDTARMVSHXIMM>;
504 def NEONvqrshrnsImm : SDNode<"ARMISD::VQRSHRNsIMM", SDTARMVSHXIMM>;
505 def NEONvqrshrnuImm : SDNode<"ARMISD::VQRSHRNuIMM", SDTARMVSHXIMM>;
506 def NEONvqrshrnsuImm : SDNode<"ARMISD::VQRSHRNsuIMM", SDTARMVSHXIMM>;
508 def NEONvsliImm : SDNode<"ARMISD::VSLIIMM", SDTARMVSHINSIMM>;
509 def NEONvsriImm : SDNode<"ARMISD::VSRIIMM", SDTARMVSHINSIMM>;
511 def SDTARMVORRIMM : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
513 def NEONvorrImm : SDNode<"ARMISD::VORRIMM", SDTARMVORRIMM>;
514 def NEONvbicImm : SDNode<"ARMISD::VBICIMM", SDTARMVORRIMM>;
516 def NEONvbsl : SDNode<"ARMISD::VBSL",
517 SDTypeProfile<1, 3, [SDTCisVec<0>,
520 SDTCisSameAs<0, 3>]>>;
522 def SDTARMVEXT : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
523 SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>;
524 def NEONvext : SDNode<"ARMISD::VEXT", SDTARMVEXT>;
526 def SDTARMVSHUF2 : SDTypeProfile<2, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
528 SDTCisSameAs<0, 3>]>;
529 def NEONzip : SDNode<"ARMISD::VZIP", SDTARMVSHUF2>;
530 def NEONuzp : SDNode<"ARMISD::VUZP", SDTARMVSHUF2>;
531 def NEONtrn : SDNode<"ARMISD::VTRN", SDTARMVSHUF2>;
533 def SDTARMVMULL : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>,
534 SDTCisSameAs<1, 2>]>;
535 def NEONvmulls : SDNode<"ARMISD::VMULLs", SDTARMVMULL>;
536 def NEONvmullu : SDNode<"ARMISD::VMULLu", SDTARMVMULL>;
538 def SDTARMVTBL1 : SDTypeProfile<1, 2, [SDTCisVT<0, v8i8>, SDTCisVT<1, v8i8>,
540 def SDTARMVTBL2 : SDTypeProfile<1, 3, [SDTCisVT<0, v8i8>, SDTCisVT<1, v8i8>,
541 SDTCisVT<2, v8i8>, SDTCisVT<3, v8i8>]>;
542 def NEONvtbl1 : SDNode<"ARMISD::VTBL1", SDTARMVTBL1>;
543 def NEONvtbl2 : SDNode<"ARMISD::VTBL2", SDTARMVTBL2>;
546 def NEONimmAllZerosV: PatLeaf<(ARMvmovImm (i32 timm)), [{
547 ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0));
548 unsigned EltBits = 0;
549 uint64_t EltVal = ARM_AM::decodeVMOVModImm(ConstVal->getZExtValue(), EltBits);
550 return (EltBits == 32 && EltVal == 0);
553 def NEONimmAllOnesV: PatLeaf<(ARMvmovImm (i32 timm)), [{
554 ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0));
555 unsigned EltBits = 0;
556 uint64_t EltVal = ARM_AM::decodeVMOVModImm(ConstVal->getZExtValue(), EltBits);
557 return (EltBits == 8 && EltVal == 0xff);
560 //===----------------------------------------------------------------------===//
561 // NEON load / store instructions
562 //===----------------------------------------------------------------------===//
564 // Use VLDM to load a Q register as a D register pair.
565 // This is a pseudo instruction that is expanded to VLDMD after reg alloc.
567 : PseudoVFPLdStM<(outs DPair:$dst), (ins GPR:$Rn),
569 [(set DPair:$dst, (v2f64 (word_alignedload GPR:$Rn)))]>;
571 // Use VSTM to store a Q register as a D register pair.
572 // This is a pseudo instruction that is expanded to VSTMD after reg alloc.
574 : PseudoVFPLdStM<(outs), (ins DPair:$src, GPR:$Rn),
576 [(word_alignedstore (v2f64 DPair:$src), GPR:$Rn)]>;
578 // Classes for VLD* pseudo-instructions with multi-register operands.
579 // These are expanded to real instructions after register allocation.
580 class VLDQPseudo<InstrItinClass itin>
581 : PseudoNLdSt<(outs QPR:$dst), (ins addrmode6:$addr), itin, "">;
582 class VLDQWBPseudo<InstrItinClass itin>
583 : PseudoNLdSt<(outs QPR:$dst, GPR:$wb),
584 (ins addrmode6:$addr, am6offset:$offset), itin,
586 class VLDQWBfixedPseudo<InstrItinClass itin>
587 : PseudoNLdSt<(outs QPR:$dst, GPR:$wb),
588 (ins addrmode6:$addr), itin,
590 class VLDQWBregisterPseudo<InstrItinClass itin>
591 : PseudoNLdSt<(outs QPR:$dst, GPR:$wb),
592 (ins addrmode6:$addr, rGPR:$offset), itin,
595 class VLDQQPseudo<InstrItinClass itin>
596 : PseudoNLdSt<(outs QQPR:$dst), (ins addrmode6:$addr), itin, "">;
597 class VLDQQWBPseudo<InstrItinClass itin>
598 : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
599 (ins addrmode6:$addr, am6offset:$offset), itin,
601 class VLDQQWBfixedPseudo<InstrItinClass itin>
602 : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
603 (ins addrmode6:$addr), itin,
605 class VLDQQWBregisterPseudo<InstrItinClass itin>
606 : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
607 (ins addrmode6:$addr, rGPR:$offset), itin,
611 class VLDQQQQPseudo<InstrItinClass itin>
612 : PseudoNLdSt<(outs QQQQPR:$dst), (ins addrmode6:$addr, QQQQPR:$src),itin,
614 class VLDQQQQWBPseudo<InstrItinClass itin>
615 : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb),
616 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin,
617 "$addr.addr = $wb, $src = $dst">;
619 let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in {
621 // VLD1 : Vector Load (multiple single elements)
622 class VLD1D<bits<4> op7_4, string Dt, Operand AddrMode>
623 : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd),
624 (ins AddrMode:$Rn), IIC_VLD1,
625 "vld1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVLD1]> {
628 let DecoderMethod = "DecodeVLDST1Instruction";
630 class VLD1Q<bits<4> op7_4, string Dt, Operand AddrMode>
631 : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd),
632 (ins AddrMode:$Rn), IIC_VLD1x2,
633 "vld1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVLD2]> {
635 let Inst{5-4} = Rn{5-4};
636 let DecoderMethod = "DecodeVLDST1Instruction";
639 def VLD1d8 : VLD1D<{0,0,0,?}, "8", addrmode6align64>;
640 def VLD1d16 : VLD1D<{0,1,0,?}, "16", addrmode6align64>;
641 def VLD1d32 : VLD1D<{1,0,0,?}, "32", addrmode6align64>;
642 def VLD1d64 : VLD1D<{1,1,0,?}, "64", addrmode6align64>;
644 def VLD1q8 : VLD1Q<{0,0,?,?}, "8", addrmode6align64or128>;
645 def VLD1q16 : VLD1Q<{0,1,?,?}, "16", addrmode6align64or128>;
646 def VLD1q32 : VLD1Q<{1,0,?,?}, "32", addrmode6align64or128>;
647 def VLD1q64 : VLD1Q<{1,1,?,?}, "64", addrmode6align64or128>;
649 // ...with address register writeback:
650 multiclass VLD1DWB<bits<4> op7_4, string Dt, Operand AddrMode> {
651 def _fixed : NLdSt<0,0b10, 0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb),
652 (ins AddrMode:$Rn), IIC_VLD1u,
653 "vld1", Dt, "$Vd, $Rn!",
654 "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> {
655 let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
657 let DecoderMethod = "DecodeVLDST1Instruction";
659 def _register : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb),
660 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1u,
661 "vld1", Dt, "$Vd, $Rn, $Rm",
662 "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> {
664 let DecoderMethod = "DecodeVLDST1Instruction";
667 multiclass VLD1QWB<bits<4> op7_4, string Dt, Operand AddrMode> {
668 def _fixed : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb),
669 (ins AddrMode:$Rn), IIC_VLD1x2u,
670 "vld1", Dt, "$Vd, $Rn!",
671 "$Rn.addr = $wb", []>, Sched<[WriteVLD2]> {
672 let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
673 let Inst{5-4} = Rn{5-4};
674 let DecoderMethod = "DecodeVLDST1Instruction";
676 def _register : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb),
677 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u,
678 "vld1", Dt, "$Vd, $Rn, $Rm",
679 "$Rn.addr = $wb", []>, Sched<[WriteVLD2]> {
680 let Inst{5-4} = Rn{5-4};
681 let DecoderMethod = "DecodeVLDST1Instruction";
685 defm VLD1d8wb : VLD1DWB<{0,0,0,?}, "8", addrmode6align64>;
686 defm VLD1d16wb : VLD1DWB<{0,1,0,?}, "16", addrmode6align64>;
687 defm VLD1d32wb : VLD1DWB<{1,0,0,?}, "32", addrmode6align64>;
688 defm VLD1d64wb : VLD1DWB<{1,1,0,?}, "64", addrmode6align64>;
689 defm VLD1q8wb : VLD1QWB<{0,0,?,?}, "8", addrmode6align64or128>;
690 defm VLD1q16wb : VLD1QWB<{0,1,?,?}, "16", addrmode6align64or128>;
691 defm VLD1q32wb : VLD1QWB<{1,0,?,?}, "32", addrmode6align64or128>;
692 defm VLD1q64wb : VLD1QWB<{1,1,?,?}, "64", addrmode6align64or128>;
694 // ...with 3 registers
695 class VLD1D3<bits<4> op7_4, string Dt, Operand AddrMode>
696 : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd),
697 (ins AddrMode:$Rn), IIC_VLD1x3, "vld1", Dt,
698 "$Vd, $Rn", "", []>, Sched<[WriteVLD3]> {
701 let DecoderMethod = "DecodeVLDST1Instruction";
703 multiclass VLD1D3WB<bits<4> op7_4, string Dt, Operand AddrMode> {
704 def _fixed : NLdSt<0,0b10,0b0110, op7_4, (outs VecListThreeD:$Vd, GPR:$wb),
705 (ins AddrMode:$Rn), IIC_VLD1x2u,
706 "vld1", Dt, "$Vd, $Rn!",
707 "$Rn.addr = $wb", []>, Sched<[WriteVLD3]> {
708 let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
710 let DecoderMethod = "DecodeVLDST1Instruction";
712 def _register : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd, GPR:$wb),
713 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u,
714 "vld1", Dt, "$Vd, $Rn, $Rm",
715 "$Rn.addr = $wb", []>, Sched<[WriteVLD3]> {
717 let DecoderMethod = "DecodeVLDST1Instruction";
721 def VLD1d8T : VLD1D3<{0,0,0,?}, "8", addrmode6align64>;
722 def VLD1d16T : VLD1D3<{0,1,0,?}, "16", addrmode6align64>;
723 def VLD1d32T : VLD1D3<{1,0,0,?}, "32", addrmode6align64>;
724 def VLD1d64T : VLD1D3<{1,1,0,?}, "64", addrmode6align64>;
726 defm VLD1d8Twb : VLD1D3WB<{0,0,0,?}, "8", addrmode6align64>;
727 defm VLD1d16Twb : VLD1D3WB<{0,1,0,?}, "16", addrmode6align64>;
728 defm VLD1d32Twb : VLD1D3WB<{1,0,0,?}, "32", addrmode6align64>;
729 defm VLD1d64Twb : VLD1D3WB<{1,1,0,?}, "64", addrmode6align64>;
731 def VLD1d8TPseudo : VLDQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
732 def VLD1d16TPseudo : VLDQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
733 def VLD1d32TPseudo : VLDQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
734 def VLD1d64TPseudo : VLDQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
735 def VLD1d64TPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
736 def VLD1d64TPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
738 def VLD1q8HighTPseudo : VLDQQQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
739 def VLD1q8LowTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
740 def VLD1q16HighTPseudo : VLDQQQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
741 def VLD1q16LowTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
742 def VLD1q32HighTPseudo : VLDQQQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
743 def VLD1q32LowTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
744 def VLD1q64HighTPseudo : VLDQQQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
745 def VLD1q64LowTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
747 // ...with 4 registers
748 class VLD1D4<bits<4> op7_4, string Dt, Operand AddrMode>
749 : NLdSt<0, 0b10, 0b0010, op7_4, (outs VecListFourD:$Vd),
750 (ins AddrMode:$Rn), IIC_VLD1x4, "vld1", Dt,
751 "$Vd, $Rn", "", []>, Sched<[WriteVLD4]> {
753 let Inst{5-4} = Rn{5-4};
754 let DecoderMethod = "DecodeVLDST1Instruction";
756 multiclass VLD1D4WB<bits<4> op7_4, string Dt, Operand AddrMode> {
757 def _fixed : NLdSt<0,0b10,0b0010, op7_4, (outs VecListFourD:$Vd, GPR:$wb),
758 (ins AddrMode:$Rn), IIC_VLD1x2u,
759 "vld1", Dt, "$Vd, $Rn!",
760 "$Rn.addr = $wb", []>, Sched<[WriteVLD4]> {
761 let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
762 let Inst{5-4} = Rn{5-4};
763 let DecoderMethod = "DecodeVLDST1Instruction";
765 def _register : NLdSt<0,0b10,0b0010,op7_4, (outs VecListFourD:$Vd, GPR:$wb),
766 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u,
767 "vld1", Dt, "$Vd, $Rn, $Rm",
768 "$Rn.addr = $wb", []>, Sched<[WriteVLD4]> {
769 let Inst{5-4} = Rn{5-4};
770 let DecoderMethod = "DecodeVLDST1Instruction";
774 def VLD1d8Q : VLD1D4<{0,0,?,?}, "8", addrmode6align64or128or256>;
775 def VLD1d16Q : VLD1D4<{0,1,?,?}, "16", addrmode6align64or128or256>;
776 def VLD1d32Q : VLD1D4<{1,0,?,?}, "32", addrmode6align64or128or256>;
777 def VLD1d64Q : VLD1D4<{1,1,?,?}, "64", addrmode6align64or128or256>;
779 defm VLD1d8Qwb : VLD1D4WB<{0,0,?,?}, "8", addrmode6align64or128or256>;
780 defm VLD1d16Qwb : VLD1D4WB<{0,1,?,?}, "16", addrmode6align64or128or256>;
781 defm VLD1d32Qwb : VLD1D4WB<{1,0,?,?}, "32", addrmode6align64or128or256>;
782 defm VLD1d64Qwb : VLD1D4WB<{1,1,?,?}, "64", addrmode6align64or128or256>;
784 def VLD1d8QPseudo : VLDQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
785 def VLD1d16QPseudo : VLDQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
786 def VLD1d32QPseudo : VLDQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
787 def VLD1d64QPseudo : VLDQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
788 def VLD1d64QPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
789 def VLD1d64QPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
791 def VLD1q8LowQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
792 def VLD1q8HighQPseudo : VLDQQQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
793 def VLD1q16LowQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
794 def VLD1q16HighQPseudo : VLDQQQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
795 def VLD1q32LowQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
796 def VLD1q32HighQPseudo : VLDQQQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
797 def VLD1q64LowQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
798 def VLD1q64HighQPseudo : VLDQQQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
800 // VLD2 : Vector Load (multiple 2-element structures)
801 class VLD2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy,
802 InstrItinClass itin, Operand AddrMode>
803 : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd),
804 (ins AddrMode:$Rn), itin,
805 "vld2", Dt, "$Vd, $Rn", "", []> {
807 let Inst{5-4} = Rn{5-4};
808 let DecoderMethod = "DecodeVLDST2Instruction";
811 def VLD2d8 : VLD2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2,
812 addrmode6align64or128>, Sched<[WriteVLD2]>;
813 def VLD2d16 : VLD2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2,
814 addrmode6align64or128>, Sched<[WriteVLD2]>;
815 def VLD2d32 : VLD2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2,
816 addrmode6align64or128>, Sched<[WriteVLD2]>;
818 def VLD2q8 : VLD2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2,
819 addrmode6align64or128or256>, Sched<[WriteVLD4]>;
820 def VLD2q16 : VLD2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2,
821 addrmode6align64or128or256>, Sched<[WriteVLD4]>;
822 def VLD2q32 : VLD2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2,
823 addrmode6align64or128or256>, Sched<[WriteVLD4]>;
825 def VLD2q8Pseudo : VLDQQPseudo<IIC_VLD2x2>, Sched<[WriteVLD4]>;
826 def VLD2q16Pseudo : VLDQQPseudo<IIC_VLD2x2>, Sched<[WriteVLD4]>;
827 def VLD2q32Pseudo : VLDQQPseudo<IIC_VLD2x2>, Sched<[WriteVLD4]>;
829 // ...with address register writeback:
830 multiclass VLD2WB<bits<4> op11_8, bits<4> op7_4, string Dt,
831 RegisterOperand VdTy, InstrItinClass itin, Operand AddrMode> {
832 def _fixed : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb),
833 (ins AddrMode:$Rn), itin,
834 "vld2", Dt, "$Vd, $Rn!",
835 "$Rn.addr = $wb", []> {
836 let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
837 let Inst{5-4} = Rn{5-4};
838 let DecoderMethod = "DecodeVLDST2Instruction";
840 def _register : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb),
841 (ins AddrMode:$Rn, rGPR:$Rm), itin,
842 "vld2", Dt, "$Vd, $Rn, $Rm",
843 "$Rn.addr = $wb", []> {
844 let Inst{5-4} = Rn{5-4};
845 let DecoderMethod = "DecodeVLDST2Instruction";
849 defm VLD2d8wb : VLD2WB<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2u,
850 addrmode6align64or128>, Sched<[WriteVLD2]>;
851 defm VLD2d16wb : VLD2WB<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2u,
852 addrmode6align64or128>, Sched<[WriteVLD2]>;
853 defm VLD2d32wb : VLD2WB<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2u,
854 addrmode6align64or128>, Sched<[WriteVLD2]>;
856 defm VLD2q8wb : VLD2WB<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2u,
857 addrmode6align64or128or256>, Sched<[WriteVLD4]>;
858 defm VLD2q16wb : VLD2WB<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2u,
859 addrmode6align64or128or256>, Sched<[WriteVLD4]>;
860 defm VLD2q32wb : VLD2WB<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2u,
861 addrmode6align64or128or256>, Sched<[WriteVLD4]>;
863 def VLD2q8PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>;
864 def VLD2q16PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>;
865 def VLD2q32PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>;
866 def VLD2q8PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>;
867 def VLD2q16PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>;
868 def VLD2q32PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>;
870 // ...with double-spaced registers
871 def VLD2b8 : VLD2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2,
872 addrmode6align64or128>, Sched<[WriteVLD2]>;
873 def VLD2b16 : VLD2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2,
874 addrmode6align64or128>, Sched<[WriteVLD2]>;
875 def VLD2b32 : VLD2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2,
876 addrmode6align64or128>, Sched<[WriteVLD2]>;
877 defm VLD2b8wb : VLD2WB<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2u,
878 addrmode6align64or128>, Sched<[WriteVLD2]>;
879 defm VLD2b16wb : VLD2WB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2u,
880 addrmode6align64or128>, Sched<[WriteVLD2]>;
881 defm VLD2b32wb : VLD2WB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2u,
882 addrmode6align64or128>, Sched<[WriteVLD2]>;
884 // VLD3 : Vector Load (multiple 3-element structures)
885 class VLD3D<bits<4> op11_8, bits<4> op7_4, string Dt>
886 : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3),
887 (ins addrmode6:$Rn), IIC_VLD3,
888 "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn", "", []>, Sched<[WriteVLD3]> {
891 let DecoderMethod = "DecodeVLDST3Instruction";
894 def VLD3d8 : VLD3D<0b0100, {0,0,0,?}, "8">;
895 def VLD3d16 : VLD3D<0b0100, {0,1,0,?}, "16">;
896 def VLD3d32 : VLD3D<0b0100, {1,0,0,?}, "32">;
898 def VLD3d8Pseudo : VLDQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>;
899 def VLD3d16Pseudo : VLDQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>;
900 def VLD3d32Pseudo : VLDQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>;
902 // ...with address register writeback:
903 class VLD3DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
904 : NLdSt<0, 0b10, op11_8, op7_4,
905 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb),
906 (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD3u,
907 "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn$Rm",
908 "$Rn.addr = $wb", []>, Sched<[WriteVLD3]> {
910 let DecoderMethod = "DecodeVLDST3Instruction";
913 def VLD3d8_UPD : VLD3DWB<0b0100, {0,0,0,?}, "8">;
914 def VLD3d16_UPD : VLD3DWB<0b0100, {0,1,0,?}, "16">;
915 def VLD3d32_UPD : VLD3DWB<0b0100, {1,0,0,?}, "32">;
917 def VLD3d8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
918 def VLD3d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
919 def VLD3d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
921 // ...with double-spaced registers:
922 def VLD3q8 : VLD3D<0b0101, {0,0,0,?}, "8">;
923 def VLD3q16 : VLD3D<0b0101, {0,1,0,?}, "16">;
924 def VLD3q32 : VLD3D<0b0101, {1,0,0,?}, "32">;
925 def VLD3q8_UPD : VLD3DWB<0b0101, {0,0,0,?}, "8">;
926 def VLD3q16_UPD : VLD3DWB<0b0101, {0,1,0,?}, "16">;
927 def VLD3q32_UPD : VLD3DWB<0b0101, {1,0,0,?}, "32">;
929 def VLD3q8Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
930 def VLD3q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
931 def VLD3q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
933 // ...alternate versions to be allocated odd register numbers:
934 def VLD3q8oddPseudo : VLDQQQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>;
935 def VLD3q16oddPseudo : VLDQQQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>;
936 def VLD3q32oddPseudo : VLDQQQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>;
938 def VLD3q8oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
939 def VLD3q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
940 def VLD3q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
942 // VLD4 : Vector Load (multiple 4-element structures)
943 class VLD4D<bits<4> op11_8, bits<4> op7_4, string Dt>
944 : NLdSt<0, 0b10, op11_8, op7_4,
945 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4),
946 (ins addrmode6:$Rn), IIC_VLD4,
947 "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []>,
950 let Inst{5-4} = Rn{5-4};
951 let DecoderMethod = "DecodeVLDST4Instruction";
954 def VLD4d8 : VLD4D<0b0000, {0,0,?,?}, "8">;
955 def VLD4d16 : VLD4D<0b0000, {0,1,?,?}, "16">;
956 def VLD4d32 : VLD4D<0b0000, {1,0,?,?}, "32">;
958 def VLD4d8Pseudo : VLDQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>;
959 def VLD4d16Pseudo : VLDQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>;
960 def VLD4d32Pseudo : VLDQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>;
962 // ...with address register writeback:
963 class VLD4DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
964 : NLdSt<0, 0b10, op11_8, op7_4,
965 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
966 (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD4u,
967 "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm",
968 "$Rn.addr = $wb", []>, Sched<[WriteVLD4]> {
969 let Inst{5-4} = Rn{5-4};
970 let DecoderMethod = "DecodeVLDST4Instruction";
973 def VLD4d8_UPD : VLD4DWB<0b0000, {0,0,?,?}, "8">;
974 def VLD4d16_UPD : VLD4DWB<0b0000, {0,1,?,?}, "16">;
975 def VLD4d32_UPD : VLD4DWB<0b0000, {1,0,?,?}, "32">;
977 def VLD4d8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
978 def VLD4d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
979 def VLD4d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
981 // ...with double-spaced registers:
982 def VLD4q8 : VLD4D<0b0001, {0,0,?,?}, "8">;
983 def VLD4q16 : VLD4D<0b0001, {0,1,?,?}, "16">;
984 def VLD4q32 : VLD4D<0b0001, {1,0,?,?}, "32">;
985 def VLD4q8_UPD : VLD4DWB<0b0001, {0,0,?,?}, "8">;
986 def VLD4q16_UPD : VLD4DWB<0b0001, {0,1,?,?}, "16">;
987 def VLD4q32_UPD : VLD4DWB<0b0001, {1,0,?,?}, "32">;
989 def VLD4q8Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
990 def VLD4q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
991 def VLD4q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
993 // ...alternate versions to be allocated odd register numbers:
994 def VLD4q8oddPseudo : VLDQQQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>;
995 def VLD4q16oddPseudo : VLDQQQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>;
996 def VLD4q32oddPseudo : VLDQQQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>;
998 def VLD4q8oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
999 def VLD4q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
1000 def VLD4q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
1002 } // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1
1004 // Classes for VLD*LN pseudo-instructions with multi-register operands.
1005 // These are expanded to real instructions after register allocation.
1006 class VLDQLNPseudo<InstrItinClass itin>
1007 : PseudoNLdSt<(outs QPR:$dst),
1008 (ins addrmode6:$addr, QPR:$src, nohash_imm:$lane),
1009 itin, "$src = $dst">;
1010 class VLDQLNWBPseudo<InstrItinClass itin>
1011 : PseudoNLdSt<(outs QPR:$dst, GPR:$wb),
1012 (ins addrmode6:$addr, am6offset:$offset, QPR:$src,
1013 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">;
1014 class VLDQQLNPseudo<InstrItinClass itin>
1015 : PseudoNLdSt<(outs QQPR:$dst),
1016 (ins addrmode6:$addr, QQPR:$src, nohash_imm:$lane),
1017 itin, "$src = $dst">;
1018 class VLDQQLNWBPseudo<InstrItinClass itin>
1019 : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
1020 (ins addrmode6:$addr, am6offset:$offset, QQPR:$src,
1021 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">;
1022 class VLDQQQQLNPseudo<InstrItinClass itin>
1023 : PseudoNLdSt<(outs QQQQPR:$dst),
1024 (ins addrmode6:$addr, QQQQPR:$src, nohash_imm:$lane),
1025 itin, "$src = $dst">;
1026 class VLDQQQQLNWBPseudo<InstrItinClass itin>
1027 : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb),
1028 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src,
1029 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">;
1031 // VLD1LN : Vector Load (single element to one lane)
1032 class VLD1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
1034 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd),
1035 (ins addrmode6:$Rn, DPR:$src, nohash_imm:$lane),
1036 IIC_VLD1ln, "vld1", Dt, "\\{$Vd[$lane]\\}, $Rn",
1038 [(set DPR:$Vd, (vector_insert (Ty DPR:$src),
1039 (i32 (LoadOp addrmode6:$Rn)),
1042 let DecoderMethod = "DecodeVLD1LN";
1044 class VLD1LN32<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
1046 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd),
1047 (ins addrmode6oneL32:$Rn, DPR:$src, nohash_imm:$lane),
1048 IIC_VLD1ln, "vld1", Dt, "\\{$Vd[$lane]\\}, $Rn",
1050 [(set DPR:$Vd, (vector_insert (Ty DPR:$src),
1051 (i32 (LoadOp addrmode6oneL32:$Rn)),
1052 imm:$lane))]>, Sched<[WriteVLD1]> {
1054 let DecoderMethod = "DecodeVLD1LN";
1056 class VLD1QLNPseudo<ValueType Ty, PatFrag LoadOp> : VLDQLNPseudo<IIC_VLD1ln>,
1057 Sched<[WriteVLD1]> {
1058 let Pattern = [(set QPR:$dst, (vector_insert (Ty QPR:$src),
1059 (i32 (LoadOp addrmode6:$addr)),
1063 def VLD1LNd8 : VLD1LN<0b0000, {?,?,?,0}, "8", v8i8, extloadi8> {
1064 let Inst{7-5} = lane{2-0};
1066 def VLD1LNd16 : VLD1LN<0b0100, {?,?,0,?}, "16", v4i16, extloadi16> {
1067 let Inst{7-6} = lane{1-0};
1068 let Inst{5-4} = Rn{5-4};
1070 def VLD1LNd32 : VLD1LN32<0b1000, {?,0,?,?}, "32", v2i32, load> {
1071 let Inst{7} = lane{0};
1072 let Inst{5-4} = Rn{5-4};
1075 def VLD1LNq8Pseudo : VLD1QLNPseudo<v16i8, extloadi8>;
1076 def VLD1LNq16Pseudo : VLD1QLNPseudo<v8i16, extloadi16>;
1077 def VLD1LNq32Pseudo : VLD1QLNPseudo<v4i32, load>;
1079 let Predicates = [HasNEON] in {
1080 def : Pat<(vector_insert (v4f16 DPR:$src),
1081 (f16 (load addrmode6:$addr)), imm:$lane),
1082 (VLD1LNd16 addrmode6:$addr, DPR:$src, imm:$lane)>;
1083 def : Pat<(vector_insert (v8f16 QPR:$src),
1084 (f16 (load addrmode6:$addr)), imm:$lane),
1085 (VLD1LNq16Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>;
1086 def : Pat<(vector_insert (v2f32 DPR:$src),
1087 (f32 (load addrmode6:$addr)), imm:$lane),
1088 (VLD1LNd32 addrmode6:$addr, DPR:$src, imm:$lane)>;
1089 def : Pat<(vector_insert (v4f32 QPR:$src),
1090 (f32 (load addrmode6:$addr)), imm:$lane),
1091 (VLD1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>;
1093 // A 64-bit subvector insert to the first 128-bit vector position
1094 // is a subregister copy that needs no instruction.
1095 def : Pat<(insert_subvector undef, (v1i64 DPR:$src), (i32 0)),
1096 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), DPR:$src, dsub_0)>;
1097 def : Pat<(insert_subvector undef, (v2i32 DPR:$src), (i32 0)),
1098 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), DPR:$src, dsub_0)>;
1099 def : Pat<(insert_subvector undef, (v2f32 DPR:$src), (i32 0)),
1100 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), DPR:$src, dsub_0)>;
1101 def : Pat<(insert_subvector undef, (v4i16 DPR:$src), (i32 0)),
1102 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), DPR:$src, dsub_0)>;
1103 def : Pat<(insert_subvector undef, (v4f16 DPR:$src), (i32 0)),
1104 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), DPR:$src, dsub_0)>;
1105 def : Pat<(insert_subvector (v16i8 undef), (v8i8 DPR:$src), (i32 0)),
1106 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), DPR:$src, dsub_0)>;
1110 let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in {
1112 // ...with address register writeback:
1113 class VLD1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
1114 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, GPR:$wb),
1115 (ins addrmode6:$Rn, am6offset:$Rm,
1116 DPR:$src, nohash_imm:$lane), IIC_VLD1lnu, "vld1", Dt,
1117 "\\{$Vd[$lane]\\}, $Rn$Rm",
1118 "$src = $Vd, $Rn.addr = $wb", []>, Sched<[WriteVLD1]> {
1119 let DecoderMethod = "DecodeVLD1LN";
1122 def VLD1LNd8_UPD : VLD1LNWB<0b0000, {?,?,?,0}, "8"> {
1123 let Inst{7-5} = lane{2-0};
1125 def VLD1LNd16_UPD : VLD1LNWB<0b0100, {?,?,0,?}, "16"> {
1126 let Inst{7-6} = lane{1-0};
1127 let Inst{4} = Rn{4};
1129 def VLD1LNd32_UPD : VLD1LNWB<0b1000, {?,0,?,?}, "32"> {
1130 let Inst{7} = lane{0};
1131 let Inst{5} = Rn{4};
1132 let Inst{4} = Rn{4};
1135 def VLD1LNq8Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>, Sched<[WriteVLD1]>;
1136 def VLD1LNq16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>, Sched<[WriteVLD1]>;
1137 def VLD1LNq32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>, Sched<[WriteVLD1]>;
1139 // VLD2LN : Vector Load (single 2-element structure to one lane)
1140 class VLD2LN<bits<4> op11_8, bits<4> op7_4, string Dt>
1141 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2),
1142 (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, nohash_imm:$lane),
1143 IIC_VLD2ln, "vld2", Dt, "\\{$Vd[$lane], $dst2[$lane]\\}, $Rn",
1144 "$src1 = $Vd, $src2 = $dst2", []>, Sched<[WriteVLD1]> {
1146 let Inst{4} = Rn{4};
1147 let DecoderMethod = "DecodeVLD2LN";
1150 def VLD2LNd8 : VLD2LN<0b0001, {?,?,?,?}, "8"> {
1151 let Inst{7-5} = lane{2-0};
1153 def VLD2LNd16 : VLD2LN<0b0101, {?,?,0,?}, "16"> {
1154 let Inst{7-6} = lane{1-0};
1156 def VLD2LNd32 : VLD2LN<0b1001, {?,0,0,?}, "32"> {
1157 let Inst{7} = lane{0};
1160 def VLD2LNd8Pseudo : VLDQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>;
1161 def VLD2LNd16Pseudo : VLDQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>;
1162 def VLD2LNd32Pseudo : VLDQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>;
1164 // ...with double-spaced registers:
1165 def VLD2LNq16 : VLD2LN<0b0101, {?,?,1,?}, "16"> {
1166 let Inst{7-6} = lane{1-0};
1168 def VLD2LNq32 : VLD2LN<0b1001, {?,1,0,?}, "32"> {
1169 let Inst{7} = lane{0};
1172 def VLD2LNq16Pseudo : VLDQQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>;
1173 def VLD2LNq32Pseudo : VLDQQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>;
1175 // ...with address register writeback:
1176 class VLD2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
1177 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb),
1178 (ins addrmode6:$Rn, am6offset:$Rm,
1179 DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VLD2lnu, "vld2", Dt,
1180 "\\{$Vd[$lane], $dst2[$lane]\\}, $Rn$Rm",
1181 "$src1 = $Vd, $src2 = $dst2, $Rn.addr = $wb", []> {
1182 let Inst{4} = Rn{4};
1183 let DecoderMethod = "DecodeVLD2LN";
1186 def VLD2LNd8_UPD : VLD2LNWB<0b0001, {?,?,?,?}, "8"> {
1187 let Inst{7-5} = lane{2-0};
1189 def VLD2LNd16_UPD : VLD2LNWB<0b0101, {?,?,0,?}, "16"> {
1190 let Inst{7-6} = lane{1-0};
1192 def VLD2LNd32_UPD : VLD2LNWB<0b1001, {?,0,0,?}, "32"> {
1193 let Inst{7} = lane{0};
1196 def VLD2LNd8Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>;
1197 def VLD2LNd16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>;
1198 def VLD2LNd32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>;
1200 def VLD2LNq16_UPD : VLD2LNWB<0b0101, {?,?,1,?}, "16"> {
1201 let Inst{7-6} = lane{1-0};
1203 def VLD2LNq32_UPD : VLD2LNWB<0b1001, {?,1,0,?}, "32"> {
1204 let Inst{7} = lane{0};
1207 def VLD2LNq16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>;
1208 def VLD2LNq32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>;
1210 // VLD3LN : Vector Load (single 3-element structure to one lane)
1211 class VLD3LN<bits<4> op11_8, bits<4> op7_4, string Dt>
1212 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3),
1213 (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3,
1214 nohash_imm:$lane), IIC_VLD3ln, "vld3", Dt,
1215 "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn",
1216 "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3", []>, Sched<[WriteVLD2]> {
1218 let DecoderMethod = "DecodeVLD3LN";
1221 def VLD3LNd8 : VLD3LN<0b0010, {?,?,?,0}, "8"> {
1222 let Inst{7-5} = lane{2-0};
1224 def VLD3LNd16 : VLD3LN<0b0110, {?,?,0,0}, "16"> {
1225 let Inst{7-6} = lane{1-0};
1227 def VLD3LNd32 : VLD3LN<0b1010, {?,0,0,0}, "32"> {
1228 let Inst{7} = lane{0};
1231 def VLD3LNd8Pseudo : VLDQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>;
1232 def VLD3LNd16Pseudo : VLDQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>;
1233 def VLD3LNd32Pseudo : VLDQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>;
1235 // ...with double-spaced registers:
1236 def VLD3LNq16 : VLD3LN<0b0110, {?,?,1,0}, "16"> {
1237 let Inst{7-6} = lane{1-0};
1239 def VLD3LNq32 : VLD3LN<0b1010, {?,1,0,0}, "32"> {
1240 let Inst{7} = lane{0};
1243 def VLD3LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>;
1244 def VLD3LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>;
1246 // ...with address register writeback:
1247 class VLD3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
1248 : NLdStLn<1, 0b10, op11_8, op7_4,
1249 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb),
1250 (ins addrmode6:$Rn, am6offset:$Rm,
1251 DPR:$src1, DPR:$src2, DPR:$src3, nohash_imm:$lane),
1252 IIC_VLD3lnu, "vld3", Dt,
1253 "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn$Rm",
1254 "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $Rn.addr = $wb",
1255 []>, Sched<[WriteVLD2]> {
1256 let DecoderMethod = "DecodeVLD3LN";
1259 def VLD3LNd8_UPD : VLD3LNWB<0b0010, {?,?,?,0}, "8"> {
1260 let Inst{7-5} = lane{2-0};
1262 def VLD3LNd16_UPD : VLD3LNWB<0b0110, {?,?,0,0}, "16"> {
1263 let Inst{7-6} = lane{1-0};
1265 def VLD3LNd32_UPD : VLD3LNWB<0b1010, {?,0,0,0}, "32"> {
1266 let Inst{7} = lane{0};
1269 def VLD3LNd8Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>;
1270 def VLD3LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>;
1271 def VLD3LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>;
1273 def VLD3LNq16_UPD : VLD3LNWB<0b0110, {?,?,1,0}, "16"> {
1274 let Inst{7-6} = lane{1-0};
1276 def VLD3LNq32_UPD : VLD3LNWB<0b1010, {?,1,0,0}, "32"> {
1277 let Inst{7} = lane{0};
1280 def VLD3LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>;
1281 def VLD3LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>;
1283 // VLD4LN : Vector Load (single 4-element structure to one lane)
1284 class VLD4LN<bits<4> op11_8, bits<4> op7_4, string Dt>
1285 : NLdStLn<1, 0b10, op11_8, op7_4,
1286 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4),
1287 (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4,
1288 nohash_imm:$lane), IIC_VLD4ln, "vld4", Dt,
1289 "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn",
1290 "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []>,
1291 Sched<[WriteVLD2]> {
1293 let Inst{4} = Rn{4};
1294 let DecoderMethod = "DecodeVLD4LN";
1297 def VLD4LNd8 : VLD4LN<0b0011, {?,?,?,?}, "8"> {
1298 let Inst{7-5} = lane{2-0};
1300 def VLD4LNd16 : VLD4LN<0b0111, {?,?,0,?}, "16"> {
1301 let Inst{7-6} = lane{1-0};
1303 def VLD4LNd32 : VLD4LN<0b1011, {?,0,?,?}, "32"> {
1304 let Inst{7} = lane{0};
1305 let Inst{5} = Rn{5};
1308 def VLD4LNd8Pseudo : VLDQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>;
1309 def VLD4LNd16Pseudo : VLDQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>;
1310 def VLD4LNd32Pseudo : VLDQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>;
1312 // ...with double-spaced registers:
1313 def VLD4LNq16 : VLD4LN<0b0111, {?,?,1,?}, "16"> {
1314 let Inst{7-6} = lane{1-0};
1316 def VLD4LNq32 : VLD4LN<0b1011, {?,1,?,?}, "32"> {
1317 let Inst{7} = lane{0};
1318 let Inst{5} = Rn{5};
1321 def VLD4LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>;
1322 def VLD4LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>;
1324 // ...with address register writeback:
1325 class VLD4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
1326 : NLdStLn<1, 0b10, op11_8, op7_4,
1327 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
1328 (ins addrmode6:$Rn, am6offset:$Rm,
1329 DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane),
1330 IIC_VLD4lnu, "vld4", Dt,
1331 "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn$Rm",
1332 "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4, $Rn.addr = $wb",
1334 let Inst{4} = Rn{4};
1335 let DecoderMethod = "DecodeVLD4LN" ;
1338 def VLD4LNd8_UPD : VLD4LNWB<0b0011, {?,?,?,?}, "8"> {
1339 let Inst{7-5} = lane{2-0};
1341 def VLD4LNd16_UPD : VLD4LNWB<0b0111, {?,?,0,?}, "16"> {
1342 let Inst{7-6} = lane{1-0};
1344 def VLD4LNd32_UPD : VLD4LNWB<0b1011, {?,0,?,?}, "32"> {
1345 let Inst{7} = lane{0};
1346 let Inst{5} = Rn{5};
1349 def VLD4LNd8Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>;
1350 def VLD4LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>;
1351 def VLD4LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>;
1353 def VLD4LNq16_UPD : VLD4LNWB<0b0111, {?,?,1,?}, "16"> {
1354 let Inst{7-6} = lane{1-0};
1356 def VLD4LNq32_UPD : VLD4LNWB<0b1011, {?,1,?,?}, "32"> {
1357 let Inst{7} = lane{0};
1358 let Inst{5} = Rn{5};
1361 def VLD4LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>;
1362 def VLD4LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>;
1364 } // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1
1366 // VLD1DUP : Vector Load (single element to all lanes)
1367 class VLD1DUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp,
1369 : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListOneDAllLanes:$Vd),
1371 IIC_VLD1dup, "vld1", Dt, "$Vd, $Rn", "",
1372 [(set VecListOneDAllLanes:$Vd,
1373 (Ty (ARMvdup (i32 (LoadOp AddrMode:$Rn)))))]>,
1374 Sched<[WriteVLD2]> {
1376 let Inst{4} = Rn{4};
1377 let DecoderMethod = "DecodeVLD1DupInstruction";
1379 def VLD1DUPd8 : VLD1DUP<{0,0,0,?}, "8", v8i8, extloadi8,
1380 addrmode6dupalignNone>;
1381 def VLD1DUPd16 : VLD1DUP<{0,1,0,?}, "16", v4i16, extloadi16,
1382 addrmode6dupalign16>;
1383 def VLD1DUPd32 : VLD1DUP<{1,0,0,?}, "32", v2i32, load,
1384 addrmode6dupalign32>;
1386 let Predicates = [HasNEON] in {
1387 def : Pat<(v2f32 (ARMvdup (f32 (load addrmode6dup:$addr)))),
1388 (VLD1DUPd32 addrmode6:$addr)>;
1391 class VLD1QDUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp,
1393 : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListDPairAllLanes:$Vd),
1394 (ins AddrMode:$Rn), IIC_VLD1dup,
1395 "vld1", Dt, "$Vd, $Rn", "",
1396 [(set VecListDPairAllLanes:$Vd,
1397 (Ty (ARMvdup (i32 (LoadOp AddrMode:$Rn)))))]> {
1399 let Inst{4} = Rn{4};
1400 let DecoderMethod = "DecodeVLD1DupInstruction";
1403 def VLD1DUPq8 : VLD1QDUP<{0,0,1,0}, "8", v16i8, extloadi8,
1404 addrmode6dupalignNone>;
1405 def VLD1DUPq16 : VLD1QDUP<{0,1,1,?}, "16", v8i16, extloadi16,
1406 addrmode6dupalign16>;
1407 def VLD1DUPq32 : VLD1QDUP<{1,0,1,?}, "32", v4i32, load,
1408 addrmode6dupalign32>;
1410 let Predicates = [HasNEON] in {
1411 def : Pat<(v4f32 (ARMvdup (f32 (load addrmode6dup:$addr)))),
1412 (VLD1DUPq32 addrmode6:$addr)>;
1415 let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in {
1416 // ...with address register writeback:
1417 multiclass VLD1DUPWB<bits<4> op7_4, string Dt, Operand AddrMode> {
1418 def _fixed : NLdSt<1, 0b10, 0b1100, op7_4,
1419 (outs VecListOneDAllLanes:$Vd, GPR:$wb),
1420 (ins AddrMode:$Rn), IIC_VLD1dupu,
1421 "vld1", Dt, "$Vd, $Rn!",
1422 "$Rn.addr = $wb", []> {
1423 let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
1424 let Inst{4} = Rn{4};
1425 let DecoderMethod = "DecodeVLD1DupInstruction";
1427 def _register : NLdSt<1, 0b10, 0b1100, op7_4,
1428 (outs VecListOneDAllLanes:$Vd, GPR:$wb),
1429 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1dupu,
1430 "vld1", Dt, "$Vd, $Rn, $Rm",
1431 "$Rn.addr = $wb", []> {
1432 let Inst{4} = Rn{4};
1433 let DecoderMethod = "DecodeVLD1DupInstruction";
1436 multiclass VLD1QDUPWB<bits<4> op7_4, string Dt, Operand AddrMode> {
1437 def _fixed : NLdSt<1, 0b10, 0b1100, op7_4,
1438 (outs VecListDPairAllLanes:$Vd, GPR:$wb),
1439 (ins AddrMode:$Rn), IIC_VLD1dupu,
1440 "vld1", Dt, "$Vd, $Rn!",
1441 "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> {
1442 let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
1443 let Inst{4} = Rn{4};
1444 let DecoderMethod = "DecodeVLD1DupInstruction";
1446 def _register : NLdSt<1, 0b10, 0b1100, op7_4,
1447 (outs VecListDPairAllLanes:$Vd, GPR:$wb),
1448 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1dupu,
1449 "vld1", Dt, "$Vd, $Rn, $Rm",
1450 "$Rn.addr = $wb", []> {
1451 let Inst{4} = Rn{4};
1452 let DecoderMethod = "DecodeVLD1DupInstruction";
1456 defm VLD1DUPd8wb : VLD1DUPWB<{0,0,0,0}, "8", addrmode6dupalignNone>;
1457 defm VLD1DUPd16wb : VLD1DUPWB<{0,1,0,?}, "16", addrmode6dupalign16>;
1458 defm VLD1DUPd32wb : VLD1DUPWB<{1,0,0,?}, "32", addrmode6dupalign32>;
1460 defm VLD1DUPq8wb : VLD1QDUPWB<{0,0,1,0}, "8", addrmode6dupalignNone>;
1461 defm VLD1DUPq16wb : VLD1QDUPWB<{0,1,1,?}, "16", addrmode6dupalign16>;
1462 defm VLD1DUPq32wb : VLD1QDUPWB<{1,0,1,?}, "32", addrmode6dupalign32>;
1464 // VLD2DUP : Vector Load (single 2-element structure to all lanes)
1465 class VLD2DUP<bits<4> op7_4, string Dt, RegisterOperand VdTy, Operand AddrMode>
1466 : NLdSt<1, 0b10, 0b1101, op7_4, (outs VdTy:$Vd),
1467 (ins AddrMode:$Rn), IIC_VLD2dup,
1468 "vld2", Dt, "$Vd, $Rn", "", []> {
1470 let Inst{4} = Rn{4};
1471 let DecoderMethod = "DecodeVLD2DupInstruction";
1474 def VLD2DUPd8 : VLD2DUP<{0,0,0,?}, "8", VecListDPairAllLanes,
1475 addrmode6dupalign16>;
1476 def VLD2DUPd16 : VLD2DUP<{0,1,0,?}, "16", VecListDPairAllLanes,
1477 addrmode6dupalign32>;
1478 def VLD2DUPd32 : VLD2DUP<{1,0,0,?}, "32", VecListDPairAllLanes,
1479 addrmode6dupalign64>;
1481 // HACK this one, VLD2DUPd8x2 must be changed at the same time with VLD2b8 or
1482 // "vld2.8 {d0[], d2[]}, [r4:32]" will become "vld2.8 {d0, d2}, [r4:32]".
1483 // ...with double-spaced registers
1484 def VLD2DUPd8x2 : VLD2DUP<{0,0,1,?}, "8", VecListDPairSpacedAllLanes,
1485 addrmode6dupalign16>;
1486 def VLD2DUPd16x2 : VLD2DUP<{0,1,1,?}, "16", VecListDPairSpacedAllLanes,
1487 addrmode6dupalign32>;
1488 def VLD2DUPd32x2 : VLD2DUP<{1,0,1,?}, "32", VecListDPairSpacedAllLanes,
1489 addrmode6dupalign64>;
1491 def VLD2DUPq8EvenPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
1492 def VLD2DUPq8OddPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
1493 def VLD2DUPq16EvenPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
1494 def VLD2DUPq16OddPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
1495 def VLD2DUPq32EvenPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
1496 def VLD2DUPq32OddPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
1498 // ...with address register writeback:
1499 multiclass VLD2DUPWB<bits<4> op7_4, string Dt, RegisterOperand VdTy,
1501 def _fixed : NLdSt<1, 0b10, 0b1101, op7_4,
1502 (outs VdTy:$Vd, GPR:$wb),
1503 (ins AddrMode:$Rn), IIC_VLD2dupu,
1504 "vld2", Dt, "$Vd, $Rn!",
1505 "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> {
1506 let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
1507 let Inst{4} = Rn{4};
1508 let DecoderMethod = "DecodeVLD2DupInstruction";
1510 def _register : NLdSt<1, 0b10, 0b1101, op7_4,
1511 (outs VdTy:$Vd, GPR:$wb),
1512 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD2dupu,
1513 "vld2", Dt, "$Vd, $Rn, $Rm",
1514 "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> {
1515 let Inst{4} = Rn{4};
1516 let DecoderMethod = "DecodeVLD2DupInstruction";
1520 defm VLD2DUPd8wb : VLD2DUPWB<{0,0,0,0}, "8", VecListDPairAllLanes,
1521 addrmode6dupalign16>;
1522 defm VLD2DUPd16wb : VLD2DUPWB<{0,1,0,?}, "16", VecListDPairAllLanes,
1523 addrmode6dupalign32>;
1524 defm VLD2DUPd32wb : VLD2DUPWB<{1,0,0,?}, "32", VecListDPairAllLanes,
1525 addrmode6dupalign64>;
1527 defm VLD2DUPd8x2wb : VLD2DUPWB<{0,0,1,0}, "8", VecListDPairSpacedAllLanes,
1528 addrmode6dupalign16>;
1529 defm VLD2DUPd16x2wb : VLD2DUPWB<{0,1,1,?}, "16", VecListDPairSpacedAllLanes,
1530 addrmode6dupalign32>;
1531 defm VLD2DUPd32x2wb : VLD2DUPWB<{1,0,1,?}, "32", VecListDPairSpacedAllLanes,
1532 addrmode6dupalign64>;
1534 // VLD3DUP : Vector Load (single 3-element structure to all lanes)
1535 class VLD3DUP<bits<4> op7_4, string Dt>
1536 : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3),
1537 (ins addrmode6dup:$Rn), IIC_VLD3dup,
1538 "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn", "", []>,
1539 Sched<[WriteVLD2]> {
1542 let DecoderMethod = "DecodeVLD3DupInstruction";
1545 def VLD3DUPd8 : VLD3DUP<{0,0,0,?}, "8">;
1546 def VLD3DUPd16 : VLD3DUP<{0,1,0,?}, "16">;
1547 def VLD3DUPd32 : VLD3DUP<{1,0,0,?}, "32">;
1549 def VLD3DUPd8Pseudo : VLDQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>;
1550 def VLD3DUPd16Pseudo : VLDQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>;
1551 def VLD3DUPd32Pseudo : VLDQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>;
1553 // ...with double-spaced registers (not used for codegen):
1554 def VLD3DUPq8 : VLD3DUP<{0,0,1,?}, "8">;
1555 def VLD3DUPq16 : VLD3DUP<{0,1,1,?}, "16">;
1556 def VLD3DUPq32 : VLD3DUP<{1,0,1,?}, "32">;
1558 def VLD3DUPq8EvenPseudo : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>;
1559 def VLD3DUPq8OddPseudo : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>;
1560 def VLD3DUPq16EvenPseudo : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>;
1561 def VLD3DUPq16OddPseudo : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>;
1562 def VLD3DUPq32EvenPseudo : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>;
1563 def VLD3DUPq32OddPseudo : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>;
1565 // ...with address register writeback:
1566 class VLD3DUPWB<bits<4> op7_4, string Dt, Operand AddrMode>
1567 : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb),
1568 (ins AddrMode:$Rn, am6offset:$Rm), IIC_VLD3dupu,
1569 "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn$Rm",
1570 "$Rn.addr = $wb", []>, Sched<[WriteVLD2]> {
1572 let DecoderMethod = "DecodeVLD3DupInstruction";
1575 def VLD3DUPd8_UPD : VLD3DUPWB<{0,0,0,0}, "8", addrmode6dupalign64>;
1576 def VLD3DUPd16_UPD : VLD3DUPWB<{0,1,0,?}, "16", addrmode6dupalign64>;
1577 def VLD3DUPd32_UPD : VLD3DUPWB<{1,0,0,?}, "32", addrmode6dupalign64>;
1579 def VLD3DUPq8_UPD : VLD3DUPWB<{0,0,1,0}, "8", addrmode6dupalign64>;
1580 def VLD3DUPq16_UPD : VLD3DUPWB<{0,1,1,?}, "16", addrmode6dupalign64>;
1581 def VLD3DUPq32_UPD : VLD3DUPWB<{1,0,1,?}, "32", addrmode6dupalign64>;
1583 def VLD3DUPd8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>, Sched<[WriteVLD2]>;
1584 def VLD3DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>, Sched<[WriteVLD2]>;
1585 def VLD3DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>, Sched<[WriteVLD2]>;
1587 // VLD4DUP : Vector Load (single 4-element structure to all lanes)
1588 class VLD4DUP<bits<4> op7_4, string Dt>
1589 : NLdSt<1, 0b10, 0b1111, op7_4,
1590 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4),
1591 (ins addrmode6dup:$Rn), IIC_VLD4dup,
1592 "vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn", "", []> {
1594 let Inst{4} = Rn{4};
1595 let DecoderMethod = "DecodeVLD4DupInstruction";
1598 def VLD4DUPd8 : VLD4DUP<{0,0,0,?}, "8">;
1599 def VLD4DUPd16 : VLD4DUP<{0,1,0,?}, "16">;
1600 def VLD4DUPd32 : VLD4DUP<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; }
1602 def VLD4DUPd8Pseudo : VLDQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>;
1603 def VLD4DUPd16Pseudo : VLDQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>;
1604 def VLD4DUPd32Pseudo : VLDQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>;
1606 // ...with double-spaced registers (not used for codegen):
1607 def VLD4DUPq8 : VLD4DUP<{0,0,1,?}, "8">;
1608 def VLD4DUPq16 : VLD4DUP<{0,1,1,?}, "16">;
1609 def VLD4DUPq32 : VLD4DUP<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; }
1611 def VLD4DUPq8EvenPseudo : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>;
1612 def VLD4DUPq8OddPseudo : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>;
1613 def VLD4DUPq16EvenPseudo : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>;
1614 def VLD4DUPq16OddPseudo : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>;
1615 def VLD4DUPq32EvenPseudo : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>;
1616 def VLD4DUPq32OddPseudo : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>;
1618 // ...with address register writeback:
1619 class VLD4DUPWB<bits<4> op7_4, string Dt>
1620 : NLdSt<1, 0b10, 0b1111, op7_4,
1621 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
1622 (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD4dupu,
1623 "vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn$Rm",
1624 "$Rn.addr = $wb", []>, Sched<[WriteVLD2]> {
1625 let Inst{4} = Rn{4};
1626 let DecoderMethod = "DecodeVLD4DupInstruction";
1629 def VLD4DUPd8_UPD : VLD4DUPWB<{0,0,0,0}, "8">;
1630 def VLD4DUPd16_UPD : VLD4DUPWB<{0,1,0,?}, "16">;
1631 def VLD4DUPd32_UPD : VLD4DUPWB<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; }
1633 def VLD4DUPq8_UPD : VLD4DUPWB<{0,0,1,0}, "8">;
1634 def VLD4DUPq16_UPD : VLD4DUPWB<{0,1,1,?}, "16">;
1635 def VLD4DUPq32_UPD : VLD4DUPWB<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; }
1637 def VLD4DUPd8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>, Sched<[WriteVLD2]>;
1638 def VLD4DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>, Sched<[WriteVLD2]>;
1639 def VLD4DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>, Sched<[WriteVLD2]>;
1641 } // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1
1643 let mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 in {
1645 // Classes for VST* pseudo-instructions with multi-register operands.
1646 // These are expanded to real instructions after register allocation.
1647 class VSTQPseudo<InstrItinClass itin>
1648 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src), itin, "">;
1649 class VSTQWBPseudo<InstrItinClass itin>
1650 : PseudoNLdSt<(outs GPR:$wb),
1651 (ins addrmode6:$addr, am6offset:$offset, QPR:$src), itin,
1652 "$addr.addr = $wb">;
1653 class VSTQWBfixedPseudo<InstrItinClass itin>
1654 : PseudoNLdSt<(outs GPR:$wb),
1655 (ins addrmode6:$addr, QPR:$src), itin,
1656 "$addr.addr = $wb">;
1657 class VSTQWBregisterPseudo<InstrItinClass itin>
1658 : PseudoNLdSt<(outs GPR:$wb),
1659 (ins addrmode6:$addr, rGPR:$offset, QPR:$src), itin,
1660 "$addr.addr = $wb">;
1661 class VSTQQPseudo<InstrItinClass itin>
1662 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src), itin, "">;
1663 class VSTQQWBPseudo<InstrItinClass itin>
1664 : PseudoNLdSt<(outs GPR:$wb),
1665 (ins addrmode6:$addr, am6offset:$offset, QQPR:$src), itin,
1666 "$addr.addr = $wb">;
1667 class VSTQQWBfixedPseudo<InstrItinClass itin>
1668 : PseudoNLdSt<(outs GPR:$wb),
1669 (ins addrmode6:$addr, QQPR:$src), itin,
1670 "$addr.addr = $wb">;
1671 class VSTQQWBregisterPseudo<InstrItinClass itin>
1672 : PseudoNLdSt<(outs GPR:$wb),
1673 (ins addrmode6:$addr, rGPR:$offset, QQPR:$src), itin,
1674 "$addr.addr = $wb">;
1676 class VSTQQQQPseudo<InstrItinClass itin>
1677 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src), itin, "">;
1678 class VSTQQQQWBPseudo<InstrItinClass itin>
1679 : PseudoNLdSt<(outs GPR:$wb),
1680 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin,
1681 "$addr.addr = $wb">;
1683 // VST1 : Vector Store (multiple single elements)
1684 class VST1D<bits<4> op7_4, string Dt, Operand AddrMode>
1685 : NLdSt<0,0b00,0b0111,op7_4, (outs), (ins AddrMode:$Rn, VecListOneD:$Vd),
1686 IIC_VST1, "vst1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVST1]> {
1688 let Inst{4} = Rn{4};
1689 let DecoderMethod = "DecodeVLDST1Instruction";
1691 class VST1Q<bits<4> op7_4, string Dt, Operand AddrMode>
1692 : NLdSt<0,0b00,0b1010,op7_4, (outs), (ins AddrMode:$Rn, VecListDPair:$Vd),
1693 IIC_VST1x2, "vst1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVST2]> {
1695 let Inst{5-4} = Rn{5-4};
1696 let DecoderMethod = "DecodeVLDST1Instruction";
1699 def VST1d8 : VST1D<{0,0,0,?}, "8", addrmode6align64>;
1700 def VST1d16 : VST1D<{0,1,0,?}, "16", addrmode6align64>;
1701 def VST1d32 : VST1D<{1,0,0,?}, "32", addrmode6align64>;
1702 def VST1d64 : VST1D<{1,1,0,?}, "64", addrmode6align64>;
1704 def VST1q8 : VST1Q<{0,0,?,?}, "8", addrmode6align64or128>;
1705 def VST1q16 : VST1Q<{0,1,?,?}, "16", addrmode6align64or128>;
1706 def VST1q32 : VST1Q<{1,0,?,?}, "32", addrmode6align64or128>;
1707 def VST1q64 : VST1Q<{1,1,?,?}, "64", addrmode6align64or128>;
1709 // ...with address register writeback:
1710 multiclass VST1DWB<bits<4> op7_4, string Dt, Operand AddrMode> {
1711 def _fixed : NLdSt<0,0b00, 0b0111,op7_4, (outs GPR:$wb),
1712 (ins AddrMode:$Rn, VecListOneD:$Vd), IIC_VLD1u,
1713 "vst1", Dt, "$Vd, $Rn!",
1714 "$Rn.addr = $wb", []>, Sched<[WriteVST1]> {
1715 let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
1716 let Inst{4} = Rn{4};
1717 let DecoderMethod = "DecodeVLDST1Instruction";
1719 def _register : NLdSt<0,0b00,0b0111,op7_4, (outs GPR:$wb),
1720 (ins AddrMode:$Rn, rGPR:$Rm, VecListOneD:$Vd),
1722 "vst1", Dt, "$Vd, $Rn, $Rm",
1723 "$Rn.addr = $wb", []>, Sched<[WriteVST1]> {
1724 let Inst{4} = Rn{4};
1725 let DecoderMethod = "DecodeVLDST1Instruction";
1728 multiclass VST1QWB<bits<4> op7_4, string Dt, Operand AddrMode> {
1729 def _fixed : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb),
1730 (ins AddrMode:$Rn, VecListDPair:$Vd), IIC_VLD1x2u,
1731 "vst1", Dt, "$Vd, $Rn!",
1732 "$Rn.addr = $wb", []>, Sched<[WriteVST2]> {
1733 let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
1734 let Inst{5-4} = Rn{5-4};
1735 let DecoderMethod = "DecodeVLDST1Instruction";
1737 def _register : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb),
1738 (ins AddrMode:$Rn, rGPR:$Rm, VecListDPair:$Vd),
1740 "vst1", Dt, "$Vd, $Rn, $Rm",
1741 "$Rn.addr = $wb", []>, Sched<[WriteVST2]> {
1742 let Inst{5-4} = Rn{5-4};
1743 let DecoderMethod = "DecodeVLDST1Instruction";
1747 defm VST1d8wb : VST1DWB<{0,0,0,?}, "8", addrmode6align64>;
1748 defm VST1d16wb : VST1DWB<{0,1,0,?}, "16", addrmode6align64>;
1749 defm VST1d32wb : VST1DWB<{1,0,0,?}, "32", addrmode6align64>;
1750 defm VST1d64wb : VST1DWB<{1,1,0,?}, "64", addrmode6align64>;
1752 defm VST1q8wb : VST1QWB<{0,0,?,?}, "8", addrmode6align64or128>;
1753 defm VST1q16wb : VST1QWB<{0,1,?,?}, "16", addrmode6align64or128>;
1754 defm VST1q32wb : VST1QWB<{1,0,?,?}, "32", addrmode6align64or128>;
1755 defm VST1q64wb : VST1QWB<{1,1,?,?}, "64", addrmode6align64or128>;
1757 // ...with 3 registers
1758 class VST1D3<bits<4> op7_4, string Dt, Operand AddrMode>
1759 : NLdSt<0, 0b00, 0b0110, op7_4, (outs),
1760 (ins AddrMode:$Rn, VecListThreeD:$Vd),
1761 IIC_VST1x3, "vst1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVST3]> {
1763 let Inst{4} = Rn{4};
1764 let DecoderMethod = "DecodeVLDST1Instruction";
1766 multiclass VST1D3WB<bits<4> op7_4, string Dt, Operand AddrMode> {
1767 def _fixed : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb),
1768 (ins AddrMode:$Rn, VecListThreeD:$Vd), IIC_VLD1x3u,
1769 "vst1", Dt, "$Vd, $Rn!",
1770 "$Rn.addr = $wb", []>, Sched<[WriteVST3]> {
1771 let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
1772 let Inst{5-4} = Rn{5-4};
1773 let DecoderMethod = "DecodeVLDST1Instruction";
1775 def _register : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb),
1776 (ins AddrMode:$Rn, rGPR:$Rm, VecListThreeD:$Vd),
1778 "vst1", Dt, "$Vd, $Rn, $Rm",
1779 "$Rn.addr = $wb", []>, Sched<[WriteVST3]> {
1780 let Inst{5-4} = Rn{5-4};
1781 let DecoderMethod = "DecodeVLDST1Instruction";
1785 def VST1d8T : VST1D3<{0,0,0,?}, "8", addrmode6align64>;
1786 def VST1d16T : VST1D3<{0,1,0,?}, "16", addrmode6align64>;
1787 def VST1d32T : VST1D3<{1,0,0,?}, "32", addrmode6align64>;
1788 def VST1d64T : VST1D3<{1,1,0,?}, "64", addrmode6align64>;
1790 defm VST1d8Twb : VST1D3WB<{0,0,0,?}, "8", addrmode6align64>;
1791 defm VST1d16Twb : VST1D3WB<{0,1,0,?}, "16", addrmode6align64>;
1792 defm VST1d32Twb : VST1D3WB<{1,0,0,?}, "32", addrmode6align64>;
1793 defm VST1d64Twb : VST1D3WB<{1,1,0,?}, "64", addrmode6align64>;
1795 def VST1d8TPseudo : VSTQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
1796 def VST1d16TPseudo : VSTQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
1797 def VST1d32TPseudo : VSTQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
1798 def VST1d64TPseudo : VSTQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
1799 def VST1d64TPseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST1x3u>, Sched<[WriteVST3]>;
1800 def VST1d64TPseudoWB_register : VSTQQWBPseudo<IIC_VST1x3u>, Sched<[WriteVST3]>;
1802 def VST1q8HighTPseudo : VSTQQQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
1803 def VST1q8LowTPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
1804 def VST1q16HighTPseudo : VSTQQQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
1805 def VST1q16LowTPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
1806 def VST1q32HighTPseudo : VSTQQQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
1807 def VST1q32LowTPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
1808 def VST1q64HighTPseudo : VSTQQQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
1809 def VST1q64LowTPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
1811 // ...with 4 registers
1812 class VST1D4<bits<4> op7_4, string Dt, Operand AddrMode>
1813 : NLdSt<0, 0b00, 0b0010, op7_4, (outs),
1814 (ins AddrMode:$Rn, VecListFourD:$Vd),
1815 IIC_VST1x4, "vst1", Dt, "$Vd, $Rn", "",
1816 []>, Sched<[WriteVST4]> {
1818 let Inst{5-4} = Rn{5-4};
1819 let DecoderMethod = "DecodeVLDST1Instruction";
1821 multiclass VST1D4WB<bits<4> op7_4, string Dt, Operand AddrMode> {
1822 def _fixed : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb),
1823 (ins AddrMode:$Rn, VecListFourD:$Vd), IIC_VLD1x4u,
1824 "vst1", Dt, "$Vd, $Rn!",
1825 "$Rn.addr = $wb", []>, Sched<[WriteVST4]> {
1826 let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
1827 let Inst{5-4} = Rn{5-4};
1828 let DecoderMethod = "DecodeVLDST1Instruction";
1830 def _register : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb),
1831 (ins AddrMode:$Rn, rGPR:$Rm, VecListFourD:$Vd),
1833 "vst1", Dt, "$Vd, $Rn, $Rm",
1834 "$Rn.addr = $wb", []>, Sched<[WriteVST4]> {
1835 let Inst{5-4} = Rn{5-4};
1836 let DecoderMethod = "DecodeVLDST1Instruction";
1840 def VST1d8Q : VST1D4<{0,0,?,?}, "8", addrmode6align64or128or256>;
1841 def VST1d16Q : VST1D4<{0,1,?,?}, "16", addrmode6align64or128or256>;
1842 def VST1d32Q : VST1D4<{1,0,?,?}, "32", addrmode6align64or128or256>;
1843 def VST1d64Q : VST1D4<{1,1,?,?}, "64", addrmode6align64or128or256>;
1845 defm VST1d8Qwb : VST1D4WB<{0,0,?,?}, "8", addrmode6align64or128or256>;
1846 defm VST1d16Qwb : VST1D4WB<{0,1,?,?}, "16", addrmode6align64or128or256>;
1847 defm VST1d32Qwb : VST1D4WB<{1,0,?,?}, "32", addrmode6align64or128or256>;
1848 defm VST1d64Qwb : VST1D4WB<{1,1,?,?}, "64", addrmode6align64or128or256>;
1850 def VST1d8QPseudo : VSTQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
1851 def VST1d16QPseudo : VSTQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
1852 def VST1d32QPseudo : VSTQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
1853 def VST1d64QPseudo : VSTQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
1854 def VST1d64QPseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST1x4u>, Sched<[WriteVST4]>;
1855 def VST1d64QPseudoWB_register : VSTQQWBPseudo<IIC_VST1x4u>, Sched<[WriteVST4]>;
1857 def VST1q8HighQPseudo : VSTQQQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
1858 def VST1q8LowQPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
1859 def VST1q16HighQPseudo : VSTQQQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
1860 def VST1q16LowQPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
1861 def VST1q32HighQPseudo : VSTQQQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
1862 def VST1q32LowQPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
1863 def VST1q64HighQPseudo : VSTQQQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
1864 def VST1q64LowQPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
1866 // VST2 : Vector Store (multiple 2-element structures)
1867 class VST2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy,
1868 InstrItinClass itin, Operand AddrMode>
1869 : NLdSt<0, 0b00, op11_8, op7_4, (outs), (ins AddrMode:$Rn, VdTy:$Vd),
1870 itin, "vst2", Dt, "$Vd, $Rn", "", []> {
1872 let Inst{5-4} = Rn{5-4};
1873 let DecoderMethod = "DecodeVLDST2Instruction";
1876 def VST2d8 : VST2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VST2,
1877 addrmode6align64or128>, Sched<[WriteVST2]>;
1878 def VST2d16 : VST2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VST2,
1879 addrmode6align64or128>, Sched<[WriteVST2]>;
1880 def VST2d32 : VST2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VST2,
1881 addrmode6align64or128>, Sched<[WriteVST2]>;
1883 def VST2q8 : VST2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VST2x2,
1884 addrmode6align64or128or256>, Sched<[WriteVST4]>;
1885 def VST2q16 : VST2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VST2x2,
1886 addrmode6align64or128or256>, Sched<[WriteVST4]>;
1887 def VST2q32 : VST2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VST2x2,
1888 addrmode6align64or128or256>, Sched<[WriteVST4]>;
1890 def VST2q8Pseudo : VSTQQPseudo<IIC_VST2x2>, Sched<[WriteVST4]>;
1891 def VST2q16Pseudo : VSTQQPseudo<IIC_VST2x2>, Sched<[WriteVST4]>;
1892 def VST2q32Pseudo : VSTQQPseudo<IIC_VST2x2>, Sched<[WriteVST4]>;
1894 // ...with address register writeback:
1895 multiclass VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt,
1896 RegisterOperand VdTy, Operand AddrMode> {
1897 def _fixed : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
1898 (ins AddrMode:$Rn, VdTy:$Vd), IIC_VLD1u,
1899 "vst2", Dt, "$Vd, $Rn!",
1900 "$Rn.addr = $wb", []>, Sched<[WriteVST2]> {
1901 let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
1902 let Inst{5-4} = Rn{5-4};
1903 let DecoderMethod = "DecodeVLDST2Instruction";
1905 def _register : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
1906 (ins AddrMode:$Rn, rGPR:$Rm, VdTy:$Vd), IIC_VLD1u,
1907 "vst2", Dt, "$Vd, $Rn, $Rm",
1908 "$Rn.addr = $wb", []>, Sched<[WriteVST2]> {
1909 let Inst{5-4} = Rn{5-4};
1910 let DecoderMethod = "DecodeVLDST2Instruction";
1913 multiclass VST2QWB<bits<4> op7_4, string Dt, Operand AddrMode> {
1914 def _fixed : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb),
1915 (ins AddrMode:$Rn, VecListFourD:$Vd), IIC_VLD1u,
1916 "vst2", Dt, "$Vd, $Rn!",
1917 "$Rn.addr = $wb", []>, Sched<[WriteVST4]> {
1918 let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
1919 let Inst{5-4} = Rn{5-4};
1920 let DecoderMethod = "DecodeVLDST2Instruction";
1922 def _register : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb),
1923 (ins AddrMode:$Rn, rGPR:$Rm, VecListFourD:$Vd),
1925 "vst2", Dt, "$Vd, $Rn, $Rm",
1926 "$Rn.addr = $wb", []>, Sched<[WriteVST4]> {
1927 let Inst{5-4} = Rn{5-4};
1928 let DecoderMethod = "DecodeVLDST2Instruction";
1932 defm VST2d8wb : VST2DWB<0b1000, {0,0,?,?}, "8", VecListDPair,
1933 addrmode6align64or128>;
1934 defm VST2d16wb : VST2DWB<0b1000, {0,1,?,?}, "16", VecListDPair,
1935 addrmode6align64or128>;
1936 defm VST2d32wb : VST2DWB<0b1000, {1,0,?,?}, "32", VecListDPair,
1937 addrmode6align64or128>;
1939 defm VST2q8wb : VST2QWB<{0,0,?,?}, "8", addrmode6align64or128or256>;
1940 defm VST2q16wb : VST2QWB<{0,1,?,?}, "16", addrmode6align64or128or256>;
1941 defm VST2q32wb : VST2QWB<{1,0,?,?}, "32", addrmode6align64or128or256>;
1943 def VST2q8PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>;
1944 def VST2q16PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>;
1945 def VST2q32PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>;
1946 def VST2q8PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>;
1947 def VST2q16PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>;
1948 def VST2q32PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>;
1950 // ...with double-spaced registers
1951 def VST2b8 : VST2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VST2,
1952 addrmode6align64or128>;
1953 def VST2b16 : VST2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VST2,
1954 addrmode6align64or128>;
1955 def VST2b32 : VST2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VST2,
1956 addrmode6align64or128>;
1957 defm VST2b8wb : VST2DWB<0b1001, {0,0,?,?}, "8", VecListDPairSpaced,
1958 addrmode6align64or128>;
1959 defm VST2b16wb : VST2DWB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced,
1960 addrmode6align64or128>;
1961 defm VST2b32wb : VST2DWB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced,
1962 addrmode6align64or128>;
1964 // VST3 : Vector Store (multiple 3-element structures)
1965 class VST3D<bits<4> op11_8, bits<4> op7_4, string Dt>
1966 : NLdSt<0, 0b00, op11_8, op7_4, (outs),
1967 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3,
1968 "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn", "", []>, Sched<[WriteVST3]> {
1970 let Inst{4} = Rn{4};
1971 let DecoderMethod = "DecodeVLDST3Instruction";
1974 def VST3d8 : VST3D<0b0100, {0,0,0,?}, "8">;
1975 def VST3d16 : VST3D<0b0100, {0,1,0,?}, "16">;
1976 def VST3d32 : VST3D<0b0100, {1,0,0,?}, "32">;
1978 def VST3d8Pseudo : VSTQQPseudo<IIC_VST3>, Sched<[WriteVST3]>;
1979 def VST3d16Pseudo : VSTQQPseudo<IIC_VST3>, Sched<[WriteVST3]>;
1980 def VST3d32Pseudo : VSTQQPseudo<IIC_VST3>, Sched<[WriteVST3]>;
1982 // ...with address register writeback:
1983 class VST3DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
1984 : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
1985 (ins addrmode6:$Rn, am6offset:$Rm,
1986 DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3u,
1987 "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn$Rm",
1988 "$Rn.addr = $wb", []>, Sched<[WriteVST3]> {
1989 let Inst{4} = Rn{4};
1990 let DecoderMethod = "DecodeVLDST3Instruction";
1993 def VST3d8_UPD : VST3DWB<0b0100, {0,0,0,?}, "8">;
1994 def VST3d16_UPD : VST3DWB<0b0100, {0,1,0,?}, "16">;
1995 def VST3d32_UPD : VST3DWB<0b0100, {1,0,0,?}, "32">;
1997 def VST3d8Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
1998 def VST3d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
1999 def VST3d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
2001 // ...with double-spaced registers:
2002 def VST3q8 : VST3D<0b0101, {0,0,0,?}, "8">;
2003 def VST3q16 : VST3D<0b0101, {0,1,0,?}, "16">;
2004 def VST3q32 : VST3D<0b0101, {1,0,0,?}, "32">;
2005 def VST3q8_UPD : VST3DWB<0b0101, {0,0,0,?}, "8">;
2006 def VST3q16_UPD : VST3DWB<0b0101, {0,1,0,?}, "16">;
2007 def VST3q32_UPD : VST3DWB<0b0101, {1,0,0,?}, "32">;
2009 def VST3q8Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
2010 def VST3q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
2011 def VST3q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
2013 // ...alternate versions to be allocated odd register numbers:
2014 def VST3q8oddPseudo : VSTQQQQPseudo<IIC_VST3>, Sched<[WriteVST3]>;
2015 def VST3q16oddPseudo : VSTQQQQPseudo<IIC_VST3>, Sched<[WriteVST3]>;
2016 def VST3q32oddPseudo : VSTQQQQPseudo<IIC_VST3>, Sched<[WriteVST3]>;
2018 def VST3q8oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
2019 def VST3q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
2020 def VST3q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
2022 // VST4 : Vector Store (multiple 4-element structures)
2023 class VST4D<bits<4> op11_8, bits<4> op7_4, string Dt>
2024 : NLdSt<0, 0b00, op11_8, op7_4, (outs),
2025 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4),
2026 IIC_VST4, "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn",
2027 "", []>, Sched<[WriteVST4]> {
2029 let Inst{5-4} = Rn{5-4};
2030 let DecoderMethod = "DecodeVLDST4Instruction";
2033 def VST4d8 : VST4D<0b0000, {0,0,?,?}, "8">;
2034 def VST4d16 : VST4D<0b0000, {0,1,?,?}, "16">;
2035 def VST4d32 : VST4D<0b0000, {1,0,?,?}, "32">;
2037 def VST4d8Pseudo : VSTQQPseudo<IIC_VST4>, Sched<[WriteVST4]>;
2038 def VST4d16Pseudo : VSTQQPseudo<IIC_VST4>, Sched<[WriteVST4]>;
2039 def VST4d32Pseudo : VSTQQPseudo<IIC_VST4>, Sched<[WriteVST4]>;
2041 // ...with address register writeback:
2042 class VST4DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
2043 : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
2044 (ins addrmode6:$Rn, am6offset:$Rm,
2045 DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST4u,
2046 "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm",
2047 "$Rn.addr = $wb", []>, Sched<[WriteVST4]> {
2048 let Inst{5-4} = Rn{5-4};
2049 let DecoderMethod = "DecodeVLDST4Instruction";
2052 def VST4d8_UPD : VST4DWB<0b0000, {0,0,?,?}, "8">;
2053 def VST4d16_UPD : VST4DWB<0b0000, {0,1,?,?}, "16">;
2054 def VST4d32_UPD : VST4DWB<0b0000, {1,0,?,?}, "32">;
2056 def VST4d8Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
2057 def VST4d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
2058 def VST4d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
2060 // ...with double-spaced registers:
2061 def VST4q8 : VST4D<0b0001, {0,0,?,?}, "8">;
2062 def VST4q16 : VST4D<0b0001, {0,1,?,?}, "16">;
2063 def VST4q32 : VST4D<0b0001, {1,0,?,?}, "32">;
2064 def VST4q8_UPD : VST4DWB<0b0001, {0,0,?,?}, "8">;
2065 def VST4q16_UPD : VST4DWB<0b0001, {0,1,?,?}, "16">;
2066 def VST4q32_UPD : VST4DWB<0b0001, {1,0,?,?}, "32">;
2068 def VST4q8Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
2069 def VST4q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
2070 def VST4q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
2072 // ...alternate versions to be allocated odd register numbers:
2073 def VST4q8oddPseudo : VSTQQQQPseudo<IIC_VST4>, Sched<[WriteVST4]>;
2074 def VST4q16oddPseudo : VSTQQQQPseudo<IIC_VST4>, Sched<[WriteVST4]>;
2075 def VST4q32oddPseudo : VSTQQQQPseudo<IIC_VST4>, Sched<[WriteVST4]>;
2077 def VST4q8oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
2078 def VST4q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
2079 def VST4q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
2081 } // mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1
2083 // Classes for VST*LN pseudo-instructions with multi-register operands.
2084 // These are expanded to real instructions after register allocation.
2085 class VSTQLNPseudo<InstrItinClass itin>
2086 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src, nohash_imm:$lane),
2088 class VSTQLNWBPseudo<InstrItinClass itin>
2089 : PseudoNLdSt<(outs GPR:$wb),
2090 (ins addrmode6:$addr, am6offset:$offset, QPR:$src,
2091 nohash_imm:$lane), itin, "$addr.addr = $wb">;
2092 class VSTQQLNPseudo<InstrItinClass itin>
2093 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src, nohash_imm:$lane),
2095 class VSTQQLNWBPseudo<InstrItinClass itin>
2096 : PseudoNLdSt<(outs GPR:$wb),
2097 (ins addrmode6:$addr, am6offset:$offset, QQPR:$src,
2098 nohash_imm:$lane), itin, "$addr.addr = $wb">;
2099 class VSTQQQQLNPseudo<InstrItinClass itin>
2100 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src, nohash_imm:$lane),
2102 class VSTQQQQLNWBPseudo<InstrItinClass itin>
2103 : PseudoNLdSt<(outs GPR:$wb),
2104 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src,
2105 nohash_imm:$lane), itin, "$addr.addr = $wb">;
2107 // VST1LN : Vector Store (single element from one lane)
2108 class VST1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
2109 PatFrag StoreOp, SDNode ExtractOp, Operand AddrMode>
2110 : NLdStLn<1, 0b00, op11_8, op7_4, (outs),
2111 (ins AddrMode:$Rn, DPR:$Vd, nohash_imm:$lane),
2112 IIC_VST1ln, "vst1", Dt, "\\{$Vd[$lane]\\}, $Rn", "",
2113 [(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), AddrMode:$Rn)]>,
2114 Sched<[WriteVST1]> {
2116 let DecoderMethod = "DecodeVST1LN";
2118 class VST1QLNPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp>
2119 : VSTQLNPseudo<IIC_VST1ln>, Sched<[WriteVST1]> {
2120 let Pattern = [(StoreOp (ExtractOp (Ty QPR:$src), imm:$lane),
2124 def VST1LNd8 : VST1LN<0b0000, {?,?,?,0}, "8", v8i8, truncstorei8,
2125 ARMvgetlaneu, addrmode6> {
2126 let Inst{7-5} = lane{2-0};
2128 def VST1LNd16 : VST1LN<0b0100, {?,?,0,?}, "16", v4i16, truncstorei16,
2129 ARMvgetlaneu, addrmode6> {
2130 let Inst{7-6} = lane{1-0};
2131 let Inst{4} = Rn{4};
2134 def VST1LNd32 : VST1LN<0b1000, {?,0,?,?}, "32", v2i32, store, extractelt,
2136 let Inst{7} = lane{0};
2137 let Inst{5-4} = Rn{5-4};
2140 def VST1LNq8Pseudo : VST1QLNPseudo<v16i8, truncstorei8, ARMvgetlaneu>;
2141 def VST1LNq16Pseudo : VST1QLNPseudo<v8i16, truncstorei16, ARMvgetlaneu>;
2142 def VST1LNq32Pseudo : VST1QLNPseudo<v4i32, store, extractelt>;
2144 let Predicates = [HasNEON] in {
2145 def : Pat<(store (extractelt (v2f32 DPR:$src), imm:$lane), addrmode6:$addr),
2146 (VST1LNd32 addrmode6:$addr, DPR:$src, imm:$lane)>;
2147 def : Pat<(store (extractelt (v4f32 QPR:$src), imm:$lane), addrmode6:$addr),
2148 (VST1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>;
2150 def : Pat<(store (extractelt (v4f16 DPR:$src), imm:$lane), addrmode6:$addr),
2151 (VST1LNd16 addrmode6:$addr, DPR:$src, imm:$lane)>;
2152 def : Pat<(store (extractelt (v8f16 QPR:$src), imm:$lane), addrmode6:$addr),
2153 (VST1LNq16Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>;
2156 // ...with address register writeback:
2157 class VST1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
2158 PatFrag StoreOp, SDNode ExtractOp, Operand AdrMode>
2159 : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
2160 (ins AdrMode:$Rn, am6offset:$Rm,
2161 DPR:$Vd, nohash_imm:$lane), IIC_VST1lnu, "vst1", Dt,
2162 "\\{$Vd[$lane]\\}, $Rn$Rm",
2164 [(set GPR:$wb, (StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane),
2165 AdrMode:$Rn, am6offset:$Rm))]>,
2166 Sched<[WriteVST1]> {
2167 let DecoderMethod = "DecodeVST1LN";
2169 class VST1QLNWBPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp>
2170 : VSTQLNWBPseudo<IIC_VST1lnu>, Sched<[WriteVST1]> {
2171 let Pattern = [(set GPR:$wb, (StoreOp (ExtractOp (Ty QPR:$src), imm:$lane),
2172 addrmode6:$addr, am6offset:$offset))];
2175 def VST1LNd8_UPD : VST1LNWB<0b0000, {?,?,?,0}, "8", v8i8, post_truncsti8,
2176 ARMvgetlaneu, addrmode6> {
2177 let Inst{7-5} = lane{2-0};
2179 def VST1LNd16_UPD : VST1LNWB<0b0100, {?,?,0,?}, "16", v4i16, post_truncsti16,
2180 ARMvgetlaneu, addrmode6> {
2181 let Inst{7-6} = lane{1-0};
2182 let Inst{4} = Rn{4};
2184 def VST1LNd32_UPD : VST1LNWB<0b1000, {?,0,?,?}, "32", v2i32, post_store,
2185 extractelt, addrmode6oneL32> {
2186 let Inst{7} = lane{0};
2187 let Inst{5-4} = Rn{5-4};
2190 def VST1LNq8Pseudo_UPD : VST1QLNWBPseudo<v16i8, post_truncsti8, ARMvgetlaneu>;
2191 def VST1LNq16Pseudo_UPD : VST1QLNWBPseudo<v8i16, post_truncsti16,ARMvgetlaneu>;
2192 def VST1LNq32Pseudo_UPD : VST1QLNWBPseudo<v4i32, post_store, extractelt>;
2194 let mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 in {
2196 // VST2LN : Vector Store (single 2-element structure from one lane)
2197 class VST2LN<bits<4> op11_8, bits<4> op7_4, string Dt>
2198 : NLdStLn<1, 0b00, op11_8, op7_4, (outs),
2199 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, nohash_imm:$lane),
2200 IIC_VST2ln, "vst2", Dt, "\\{$Vd[$lane], $src2[$lane]\\}, $Rn",
2201 "", []>, Sched<[WriteVST1]> {
2203 let Inst{4} = Rn{4};
2204 let DecoderMethod = "DecodeVST2LN";
2207 def VST2LNd8 : VST2LN<0b0001, {?,?,?,?}, "8"> {
2208 let Inst{7-5} = lane{2-0};
2210 def VST2LNd16 : VST2LN<0b0101, {?,?,0,?}, "16"> {
2211 let Inst{7-6} = lane{1-0};
2213 def VST2LNd32 : VST2LN<0b1001, {?,0,0,?}, "32"> {
2214 let Inst{7} = lane{0};
2217 def VST2LNd8Pseudo : VSTQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>;
2218 def VST2LNd16Pseudo : VSTQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>;
2219 def VST2LNd32Pseudo : VSTQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>;
2221 // ...with double-spaced registers:
2222 def VST2LNq16 : VST2LN<0b0101, {?,?,1,?}, "16"> {
2223 let Inst{7-6} = lane{1-0};
2224 let Inst{4} = Rn{4};
2226 def VST2LNq32 : VST2LN<0b1001, {?,1,0,?}, "32"> {
2227 let Inst{7} = lane{0};
2228 let Inst{4} = Rn{4};
2231 def VST2LNq16Pseudo : VSTQQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>;
2232 def VST2LNq32Pseudo : VSTQQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>;
2234 // ...with address register writeback:
2235 class VST2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
2236 : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
2237 (ins addrmode6:$Rn, am6offset:$Rm,
2238 DPR:$Vd, DPR:$src2, nohash_imm:$lane), IIC_VST2lnu, "vst2", Dt,
2239 "\\{$Vd[$lane], $src2[$lane]\\}, $Rn$Rm",
2240 "$Rn.addr = $wb", []> {
2241 let Inst{4} = Rn{4};
2242 let DecoderMethod = "DecodeVST2LN";
2245 def VST2LNd8_UPD : VST2LNWB<0b0001, {?,?,?,?}, "8"> {
2246 let Inst{7-5} = lane{2-0};
2248 def VST2LNd16_UPD : VST2LNWB<0b0101, {?,?,0,?}, "16"> {
2249 let Inst{7-6} = lane{1-0};
2251 def VST2LNd32_UPD : VST2LNWB<0b1001, {?,0,0,?}, "32"> {
2252 let Inst{7} = lane{0};
2255 def VST2LNd8Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>;
2256 def VST2LNd16Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>;
2257 def VST2LNd32Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>;
2259 def VST2LNq16_UPD : VST2LNWB<0b0101, {?,?,1,?}, "16"> {
2260 let Inst{7-6} = lane{1-0};
2262 def VST2LNq32_UPD : VST2LNWB<0b1001, {?,1,0,?}, "32"> {
2263 let Inst{7} = lane{0};
2266 def VST2LNq16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>;
2267 def VST2LNq32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>;
2269 // VST3LN : Vector Store (single 3-element structure from one lane)
2270 class VST3LN<bits<4> op11_8, bits<4> op7_4, string Dt>
2271 : NLdStLn<1, 0b00, op11_8, op7_4, (outs),
2272 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3,
2273 nohash_imm:$lane), IIC_VST3ln, "vst3", Dt,
2274 "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn", "", []>,
2275 Sched<[WriteVST2]> {
2277 let DecoderMethod = "DecodeVST3LN";
2280 def VST3LNd8 : VST3LN<0b0010, {?,?,?,0}, "8"> {
2281 let Inst{7-5} = lane{2-0};
2283 def VST3LNd16 : VST3LN<0b0110, {?,?,0,0}, "16"> {
2284 let Inst{7-6} = lane{1-0};
2286 def VST3LNd32 : VST3LN<0b1010, {?,0,0,0}, "32"> {
2287 let Inst{7} = lane{0};
2290 def VST3LNd8Pseudo : VSTQQLNPseudo<IIC_VST3ln>, Sched<[WriteVST2]>;
2291 def VST3LNd16Pseudo : VSTQQLNPseudo<IIC_VST3ln>, Sched<[WriteVST2]>;
2292 def VST3LNd32Pseudo : VSTQQLNPseudo<IIC_VST3ln>, Sched<[WriteVST2]>;
2294 // ...with double-spaced registers:
2295 def VST3LNq16 : VST3LN<0b0110, {?,?,1,0}, "16"> {
2296 let Inst{7-6} = lane{1-0};
2298 def VST3LNq32 : VST3LN<0b1010, {?,1,0,0}, "32"> {
2299 let Inst{7} = lane{0};
2302 def VST3LNq16Pseudo : VSTQQQQLNPseudo<IIC_VST3ln>;
2303 def VST3LNq32Pseudo : VSTQQQQLNPseudo<IIC_VST3ln>;
2305 // ...with address register writeback:
2306 class VST3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
2307 : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
2308 (ins addrmode6:$Rn, am6offset:$Rm,
2309 DPR:$Vd, DPR:$src2, DPR:$src3, nohash_imm:$lane),
2310 IIC_VST3lnu, "vst3", Dt,
2311 "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn$Rm",
2312 "$Rn.addr = $wb", []> {
2313 let DecoderMethod = "DecodeVST3LN";
2316 def VST3LNd8_UPD : VST3LNWB<0b0010, {?,?,?,0}, "8"> {
2317 let Inst{7-5} = lane{2-0};
2319 def VST3LNd16_UPD : VST3LNWB<0b0110, {?,?,0,0}, "16"> {
2320 let Inst{7-6} = lane{1-0};
2322 def VST3LNd32_UPD : VST3LNWB<0b1010, {?,0,0,0}, "32"> {
2323 let Inst{7} = lane{0};
2326 def VST3LNd8Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>;
2327 def VST3LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>;
2328 def VST3LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>;
2330 def VST3LNq16_UPD : VST3LNWB<0b0110, {?,?,1,0}, "16"> {
2331 let Inst{7-6} = lane{1-0};
2333 def VST3LNq32_UPD : VST3LNWB<0b1010, {?,1,0,0}, "32"> {
2334 let Inst{7} = lane{0};
2337 def VST3LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>;
2338 def VST3LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>;
2340 // VST4LN : Vector Store (single 4-element structure from one lane)
2341 class VST4LN<bits<4> op11_8, bits<4> op7_4, string Dt>
2342 : NLdStLn<1, 0b00, op11_8, op7_4, (outs),
2343 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4,
2344 nohash_imm:$lane), IIC_VST4ln, "vst4", Dt,
2345 "\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn",
2346 "", []>, Sched<[WriteVST2]> {
2348 let Inst{4} = Rn{4};
2349 let DecoderMethod = "DecodeVST4LN";
2352 def VST4LNd8 : VST4LN<0b0011, {?,?,?,?}, "8"> {
2353 let Inst{7-5} = lane{2-0};
2355 def VST4LNd16 : VST4LN<0b0111, {?,?,0,?}, "16"> {
2356 let Inst{7-6} = lane{1-0};
2358 def VST4LNd32 : VST4LN<0b1011, {?,0,?,?}, "32"> {
2359 let Inst{7} = lane{0};
2360 let Inst{5} = Rn{5};
2363 def VST4LNd8Pseudo : VSTQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>;
2364 def VST4LNd16Pseudo : VSTQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>;
2365 def VST4LNd32Pseudo : VSTQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>;
2367 // ...with double-spaced registers:
2368 def VST4LNq16 : VST4LN<0b0111, {?,?,1,?}, "16"> {
2369 let Inst{7-6} = lane{1-0};
2371 def VST4LNq32 : VST4LN<0b1011, {?,1,?,?}, "32"> {
2372 let Inst{7} = lane{0};
2373 let Inst{5} = Rn{5};
2376 def VST4LNq16Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>;
2377 def VST4LNq32Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>;
2379 // ...with address register writeback:
2380 class VST4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
2381 : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
2382 (ins addrmode6:$Rn, am6offset:$Rm,
2383 DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane),
2384 IIC_VST4lnu, "vst4", Dt,
2385 "\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn$Rm",
2386 "$Rn.addr = $wb", []> {
2387 let Inst{4} = Rn{4};
2388 let DecoderMethod = "DecodeVST4LN";
2391 def VST4LNd8_UPD : VST4LNWB<0b0011, {?,?,?,?}, "8"> {
2392 let Inst{7-5} = lane{2-0};
2394 def VST4LNd16_UPD : VST4LNWB<0b0111, {?,?,0,?}, "16"> {
2395 let Inst{7-6} = lane{1-0};
2397 def VST4LNd32_UPD : VST4LNWB<0b1011, {?,0,?,?}, "32"> {
2398 let Inst{7} = lane{0};
2399 let Inst{5} = Rn{5};
2402 def VST4LNd8Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>;
2403 def VST4LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>;
2404 def VST4LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>;
2406 def VST4LNq16_UPD : VST4LNWB<0b0111, {?,?,1,?}, "16"> {
2407 let Inst{7-6} = lane{1-0};
2409 def VST4LNq32_UPD : VST4LNWB<0b1011, {?,1,?,?}, "32"> {
2410 let Inst{7} = lane{0};
2411 let Inst{5} = Rn{5};
2414 def VST4LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>;
2415 def VST4LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>;
2417 } // mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1
2419 // Use vld1/vst1 for unaligned f64 load / store
2420 let Predicates = [IsLE,HasNEON] in {
2421 def : Pat<(f64 (hword_alignedload addrmode6:$addr)),
2422 (VLD1d16 addrmode6:$addr)>;
2423 def : Pat<(hword_alignedstore (f64 DPR:$value), addrmode6:$addr),
2424 (VST1d16 addrmode6:$addr, DPR:$value)>;
2425 def : Pat<(f64 (byte_alignedload addrmode6:$addr)),
2426 (VLD1d8 addrmode6:$addr)>;
2427 def : Pat<(byte_alignedstore (f64 DPR:$value), addrmode6:$addr),
2428 (VST1d8 addrmode6:$addr, DPR:$value)>;
2430 let Predicates = [IsBE,HasNEON] in {
2431 def : Pat<(f64 (non_word_alignedload addrmode6:$addr)),
2432 (VLD1d64 addrmode6:$addr)>;
2433 def : Pat<(non_word_alignedstore (f64 DPR:$value), addrmode6:$addr),
2434 (VST1d64 addrmode6:$addr, DPR:$value)>;
2437 // Use vld1/vst1 for Q and QQ. Also use them for unaligned v2f64
2438 // load / store if it's legal.
2439 let Predicates = [HasNEON] in {
2440 def : Pat<(v2f64 (dword_alignedload addrmode6:$addr)),
2441 (VLD1q64 addrmode6:$addr)>;
2442 def : Pat<(dword_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
2443 (VST1q64 addrmode6:$addr, QPR:$value)>;
2445 let Predicates = [IsLE,HasNEON] in {
2446 def : Pat<(v2f64 (word_alignedload addrmode6:$addr)),
2447 (VLD1q32 addrmode6:$addr)>;
2448 def : Pat<(word_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
2449 (VST1q32 addrmode6:$addr, QPR:$value)>;
2450 def : Pat<(v2f64 (hword_alignedload addrmode6:$addr)),
2451 (VLD1q16 addrmode6:$addr)>;
2452 def : Pat<(hword_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
2453 (VST1q16 addrmode6:$addr, QPR:$value)>;
2454 def : Pat<(v2f64 (byte_alignedload addrmode6:$addr)),
2455 (VLD1q8 addrmode6:$addr)>;
2456 def : Pat<(byte_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
2457 (VST1q8 addrmode6:$addr, QPR:$value)>;
2460 //===----------------------------------------------------------------------===//
2461 // NEON pattern fragments
2462 //===----------------------------------------------------------------------===//
2464 // Extract D sub-registers of Q registers.
2465 def DSubReg_i8_reg : SDNodeXForm<imm, [{
2466 assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
2467 return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/8, SDLoc(N),
2470 def DSubReg_i16_reg : SDNodeXForm<imm, [{
2471 assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
2472 return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/4, SDLoc(N),
2475 def DSubReg_i32_reg : SDNodeXForm<imm, [{
2476 assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
2477 return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/2, SDLoc(N),
2480 def DSubReg_f64_reg : SDNodeXForm<imm, [{
2481 assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
2482 return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue(), SDLoc(N),
2486 // Extract S sub-registers of Q/D registers.
2487 def SSubReg_f32_reg : SDNodeXForm<imm, [{
2488 assert(ARM::ssub_3 == ARM::ssub_0+3 && "Unexpected subreg numbering");
2489 return CurDAG->getTargetConstant(ARM::ssub_0 + N->getZExtValue(), SDLoc(N),
2493 // Extract S sub-registers of Q/D registers containing a given f16 lane.
2494 def SSubReg_f16_reg : SDNodeXForm<imm, [{
2495 assert(ARM::ssub_3 == ARM::ssub_0+3 && "Unexpected subreg numbering");
2496 return CurDAG->getTargetConstant(ARM::ssub_0 + N->getZExtValue()/2, SDLoc(N),
2500 // Translate lane numbers from Q registers to D subregs.
2501 def SubReg_i8_lane : SDNodeXForm<imm, [{
2502 return CurDAG->getTargetConstant(N->getZExtValue() & 7, SDLoc(N), MVT::i32);
2504 def SubReg_i16_lane : SDNodeXForm<imm, [{
2505 return CurDAG->getTargetConstant(N->getZExtValue() & 3, SDLoc(N), MVT::i32);
2507 def SubReg_i32_lane : SDNodeXForm<imm, [{
2508 return CurDAG->getTargetConstant(N->getZExtValue() & 1, SDLoc(N), MVT::i32);
2511 //===----------------------------------------------------------------------===//
2512 // Instruction Classes
2513 //===----------------------------------------------------------------------===//
2515 // Basic 2-register operations: double- and quad-register.
2516 class N2VD<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
2517 bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,
2518 string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode>
2519 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd),
2520 (ins DPR:$Vm), IIC_VUNAD, OpcodeStr, Dt,"$Vd, $Vm", "",
2521 [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vm))))]>;
2522 class N2VQ<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
2523 bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,
2524 string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode>
2525 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd),
2526 (ins QPR:$Vm), IIC_VUNAQ, OpcodeStr, Dt,"$Vd, $Vm", "",
2527 [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vm))))]>;
2529 // Basic 2-register intrinsics, both double- and quad-register.
2530 class N2VDInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
2531 bits<2> op17_16, bits<5> op11_7, bit op4,
2532 InstrItinClass itin, string OpcodeStr, string Dt,
2533 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2534 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd),
2535 (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
2536 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>;
2537 class N2VQInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
2538 bits<2> op17_16, bits<5> op11_7, bit op4,
2539 InstrItinClass itin, string OpcodeStr, string Dt,
2540 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2541 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd),
2542 (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
2543 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>;
2545 // Same as above, but not predicated.
2546 class N2VDIntnp<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op7,
2547 InstrItinClass itin, string OpcodeStr, string Dt,
2548 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2549 : N2Vnp<op19_18, op17_16, op10_8, op7, 0, (outs DPR:$Vd), (ins DPR:$Vm),
2550 itin, OpcodeStr, Dt,
2551 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>;
2553 class N2VQIntnp<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op7,
2554 InstrItinClass itin, string OpcodeStr, string Dt,
2555 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2556 : N2Vnp<op19_18, op17_16, op10_8, op7, 1, (outs QPR:$Vd), (ins QPR:$Vm),
2557 itin, OpcodeStr, Dt,
2558 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>;
2560 // Similar to NV2VQIntnp with some more encoding bits exposed (crypto).
2561 class N2VQIntXnp<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op6,
2562 bit op7, InstrItinClass itin, string OpcodeStr, string Dt,
2563 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2564 : N2Vnp<op19_18, op17_16, op10_8, op7, op6, (outs QPR:$Vd), (ins QPR:$Vm),
2565 itin, OpcodeStr, Dt,
2566 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>;
2568 // Same as N2VQIntXnp but with Vd as a src register.
2569 class N2VQIntX2np<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op6,
2570 bit op7, InstrItinClass itin, string OpcodeStr, string Dt,
2571 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2572 : N2Vnp<op19_18, op17_16, op10_8, op7, op6,
2573 (outs QPR:$Vd), (ins QPR:$src, QPR:$Vm),
2574 itin, OpcodeStr, Dt,
2575 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src), (OpTy QPR:$Vm))))]> {
2576 let Constraints = "$src = $Vd";
2579 // Narrow 2-register operations.
2580 class N2VN<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
2581 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
2582 InstrItinClass itin, string OpcodeStr, string Dt,
2583 ValueType TyD, ValueType TyQ, SDNode OpNode>
2584 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$Vd),
2585 (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
2586 [(set DPR:$Vd, (TyD (OpNode (TyQ QPR:$Vm))))]>;
2588 // Narrow 2-register intrinsics.
2589 class N2VNInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
2590 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
2591 InstrItinClass itin, string OpcodeStr, string Dt,
2592 ValueType TyD, ValueType TyQ, SDPatternOperator IntOp>
2593 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$Vd),
2594 (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
2595 [(set DPR:$Vd, (TyD (IntOp (TyQ QPR:$Vm))))]>;
2597 // Long 2-register operations (currently only used for VMOVL).
2598 class N2VL<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
2599 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
2600 InstrItinClass itin, string OpcodeStr, string Dt,
2601 ValueType TyQ, ValueType TyD, SDNode OpNode>
2602 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$Vd),
2603 (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
2604 [(set QPR:$Vd, (TyQ (OpNode (TyD DPR:$Vm))))]>;
2606 // Long 2-register intrinsics.
2607 class N2VLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
2608 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
2609 InstrItinClass itin, string OpcodeStr, string Dt,
2610 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp>
2611 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$Vd),
2612 (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
2613 [(set QPR:$Vd, (TyQ (IntOp (TyD DPR:$Vm))))]>;
2615 // 2-register shuffles (VTRN/VZIP/VUZP), both double- and quad-register.
2616 class N2VDShuffle<bits<2> op19_18, bits<5> op11_7, string OpcodeStr, string Dt>
2617 : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 0, 0, (outs DPR:$Vd, DPR:$Vm),
2618 (ins DPR:$src1, DPR:$src2), IIC_VPERMD,
2619 OpcodeStr, Dt, "$Vd, $Vm",
2620 "$src1 = $Vd, $src2 = $Vm", []>;
2621 class N2VQShuffle<bits<2> op19_18, bits<5> op11_7,
2622 InstrItinClass itin, string OpcodeStr, string Dt>
2623 : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 1, 0, (outs QPR:$Vd, QPR:$Vm),
2624 (ins QPR:$src1, QPR:$src2), itin, OpcodeStr, Dt, "$Vd, $Vm",
2625 "$src1 = $Vd, $src2 = $Vm", []>;
2627 // Basic 3-register operations: double- and quad-register.
2628 class N3VD<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2629 InstrItinClass itin, string OpcodeStr, string Dt,
2630 ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable>
2631 : N3V<op24, op23, op21_20, op11_8, 0, op4,
2632 (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
2633 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
2634 [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> {
2635 // All of these have a two-operand InstAlias.
2636 let TwoOperandAliasConstraint = "$Vn = $Vd";
2637 let isCommutable = Commutable;
2639 // Same as N3VD but no data type.
2640 class N3VDX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2641 InstrItinClass itin, string OpcodeStr,
2642 ValueType ResTy, ValueType OpTy,
2643 SDNode OpNode, bit Commutable>
2644 : N3VX<op24, op23, op21_20, op11_8, 0, op4,
2645 (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
2646 OpcodeStr, "$Vd, $Vn, $Vm", "",
2647 [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>{
2648 // All of these have a two-operand InstAlias.
2649 let TwoOperandAliasConstraint = "$Vn = $Vd";
2650 let isCommutable = Commutable;
2653 class N3VDSL<bits<2> op21_20, bits<4> op11_8,
2654 InstrItinClass itin, string OpcodeStr, string Dt,
2655 ValueType Ty, SDNode ShOp>
2656 : N3VLane32<0, 1, op21_20, op11_8, 1, 0,
2657 (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
2658 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
2660 (Ty (ShOp (Ty DPR:$Vn),
2661 (Ty (ARMvduplane (Ty DPR_VFP2:$Vm),imm:$lane)))))]> {
2662 // All of these have a two-operand InstAlias.
2663 let TwoOperandAliasConstraint = "$Vn = $Vd";
2664 let isCommutable = 0;
2666 class N3VDSL16<bits<2> op21_20, bits<4> op11_8,
2667 string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp>
2668 : N3VLane16<0, 1, op21_20, op11_8, 1, 0,
2669 (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
2670 NVMulSLFrm, IIC_VMULi16D, OpcodeStr, Dt,"$Vd, $Vn, $Vm$lane","",
2672 (Ty (ShOp (Ty DPR:$Vn),
2673 (Ty (ARMvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> {
2674 // All of these have a two-operand InstAlias.
2675 let TwoOperandAliasConstraint = "$Vn = $Vd";
2676 let isCommutable = 0;
2679 class N3VQ<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2680 InstrItinClass itin, string OpcodeStr, string Dt,
2681 ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable>
2682 : N3V<op24, op23, op21_20, op11_8, 1, op4,
2683 (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
2684 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
2685 [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]> {
2686 // All of these have a two-operand InstAlias.
2687 let TwoOperandAliasConstraint = "$Vn = $Vd";
2688 let isCommutable = Commutable;
2690 class N3VQX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2691 InstrItinClass itin, string OpcodeStr,
2692 ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable>
2693 : N3VX<op24, op23, op21_20, op11_8, 1, op4,
2694 (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
2695 OpcodeStr, "$Vd, $Vn, $Vm", "",
2696 [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>{
2697 // All of these have a two-operand InstAlias.
2698 let TwoOperandAliasConstraint = "$Vn = $Vd";
2699 let isCommutable = Commutable;
2701 class N3VQSL<bits<2> op21_20, bits<4> op11_8,
2702 InstrItinClass itin, string OpcodeStr, string Dt,
2703 ValueType ResTy, ValueType OpTy, SDNode ShOp>
2704 : N3VLane32<1, 1, op21_20, op11_8, 1, 0,
2705 (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
2706 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
2707 [(set (ResTy QPR:$Vd),
2708 (ResTy (ShOp (ResTy QPR:$Vn),
2709 (ResTy (ARMvduplane (OpTy DPR_VFP2:$Vm),
2711 // All of these have a two-operand InstAlias.
2712 let TwoOperandAliasConstraint = "$Vn = $Vd";
2713 let isCommutable = 0;
2715 class N3VQSL16<bits<2> op21_20, bits<4> op11_8, string OpcodeStr, string Dt,
2716 ValueType ResTy, ValueType OpTy, SDNode ShOp>
2717 : N3VLane16<1, 1, op21_20, op11_8, 1, 0,
2718 (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
2719 NVMulSLFrm, IIC_VMULi16Q, OpcodeStr, Dt,"$Vd, $Vn, $Vm$lane", "",
2720 [(set (ResTy QPR:$Vd),
2721 (ResTy (ShOp (ResTy QPR:$Vn),
2722 (ResTy (ARMvduplane (OpTy DPR_8:$Vm),
2724 // All of these have a two-operand InstAlias.
2725 let TwoOperandAliasConstraint = "$Vn = $Vd";
2726 let isCommutable = 0;
2729 // Basic 3-register intrinsics, both double- and quad-register.
2730 class N3VDInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2731 Format f, InstrItinClass itin, string OpcodeStr, string Dt,
2732 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp, bit Commutable>
2733 : N3V<op24, op23, op21_20, op11_8, 0, op4,
2734 (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), f, itin,
2735 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
2736 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> {
2737 // All of these have a two-operand InstAlias.
2738 let TwoOperandAliasConstraint = "$Vn = $Vd";
2739 let isCommutable = Commutable;
2742 class N3VDIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6,
2743 bit op4, Format f, InstrItinClass itin, string OpcodeStr,
2744 string Dt, ValueType ResTy, ValueType OpTy,
2745 SDPatternOperator IntOp, bit Commutable>
2746 : N3Vnp<op27_23, op21_20, op11_8, op6, op4,
2747 (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, OpcodeStr, Dt,
2748 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>;
2750 class N3VDIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
2751 string OpcodeStr, string Dt, ValueType Ty, SDPatternOperator IntOp>
2752 : N3VLane32<0, 1, op21_20, op11_8, 1, 0,
2753 (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
2754 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
2756 (Ty (IntOp (Ty DPR:$Vn),
2757 (Ty (ARMvduplane (Ty DPR_VFP2:$Vm),
2759 let isCommutable = 0;
2762 class N3VDIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
2763 string OpcodeStr, string Dt, ValueType Ty, SDPatternOperator IntOp>
2764 : N3VLane16<0, 1, op21_20, op11_8, 1, 0,
2765 (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
2766 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
2768 (Ty (IntOp (Ty DPR:$Vn),
2769 (Ty (ARMvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> {
2770 let isCommutable = 0;
2772 class N3VDIntSh<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2773 Format f, InstrItinClass itin, string OpcodeStr, string Dt,
2774 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2775 : N3V<op24, op23, op21_20, op11_8, 0, op4,
2776 (outs DPR:$Vd), (ins DPR:$Vm, DPR:$Vn), f, itin,
2777 OpcodeStr, Dt, "$Vd, $Vm, $Vn", "",
2778 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm), (OpTy DPR:$Vn))))]> {
2779 let TwoOperandAliasConstraint = "$Vm = $Vd";
2780 let isCommutable = 0;
2783 class N3VQInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2784 Format f, InstrItinClass itin, string OpcodeStr, string Dt,
2785 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp, bit Commutable>
2786 : N3V<op24, op23, op21_20, op11_8, 1, op4,
2787 (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), f, itin,
2788 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
2789 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]> {
2790 // All of these have a two-operand InstAlias.
2791 let TwoOperandAliasConstraint = "$Vn = $Vd";
2792 let isCommutable = Commutable;
2795 class N3VQIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6,
2796 bit op4, Format f, InstrItinClass itin, string OpcodeStr,
2797 string Dt, ValueType ResTy, ValueType OpTy,
2798 SDPatternOperator IntOp, bit Commutable>
2799 : N3Vnp<op27_23, op21_20, op11_8, op6, op4,
2800 (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), f, itin, OpcodeStr, Dt,
2801 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>;
2803 // Same as N3VQIntnp but with Vd as a src register.
2804 class N3VQInt3np<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6,
2805 bit op4, Format f, InstrItinClass itin, string OpcodeStr,
2806 string Dt, ValueType ResTy, ValueType OpTy,
2807 SDPatternOperator IntOp, bit Commutable>
2808 : N3Vnp<op27_23, op21_20, op11_8, op6, op4,
2809 (outs QPR:$Vd), (ins QPR:$src, QPR:$Vn, QPR:$Vm),
2810 f, itin, OpcodeStr, Dt,
2811 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src), (OpTy QPR:$Vn),
2812 (OpTy QPR:$Vm))))]> {
2813 let Constraints = "$src = $Vd";
2816 class N3VQIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
2817 string OpcodeStr, string Dt,
2818 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2819 : N3VLane32<1, 1, op21_20, op11_8, 1, 0,
2820 (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
2821 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
2822 [(set (ResTy QPR:$Vd),
2823 (ResTy (IntOp (ResTy QPR:$Vn),
2824 (ResTy (ARMvduplane (OpTy DPR_VFP2:$Vm),
2826 let isCommutable = 0;
2828 class N3VQIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
2829 string OpcodeStr, string Dt,
2830 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2831 : N3VLane16<1, 1, op21_20, op11_8, 1, 0,
2832 (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
2833 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
2834 [(set (ResTy QPR:$Vd),
2835 (ResTy (IntOp (ResTy QPR:$Vn),
2836 (ResTy (ARMvduplane (OpTy DPR_8:$Vm),
2838 let isCommutable = 0;
2840 class N3VQIntSh<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2841 Format f, InstrItinClass itin, string OpcodeStr, string Dt,
2842 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2843 : N3V<op24, op23, op21_20, op11_8, 1, op4,
2844 (outs QPR:$Vd), (ins QPR:$Vm, QPR:$Vn), f, itin,
2845 OpcodeStr, Dt, "$Vd, $Vm, $Vn", "",
2846 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm), (OpTy QPR:$Vn))))]> {
2847 let TwoOperandAliasConstraint = "$Vm = $Vd";
2848 let isCommutable = 0;
2851 // Multiply-Add/Sub operations: double- and quad-register.
2852 class N3VDMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2853 InstrItinClass itin, string OpcodeStr, string Dt,
2854 ValueType Ty, SDPatternOperator MulOp, SDPatternOperator OpNode>
2855 : N3V<op24, op23, op21_20, op11_8, 0, op4,
2856 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
2857 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
2858 [(set DPR:$Vd, (Ty (OpNode DPR:$src1,
2859 (Ty (MulOp DPR:$Vn, DPR:$Vm)))))]>;
2861 class N3VDMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
2862 string OpcodeStr, string Dt,
2863 ValueType Ty, SDPatternOperator MulOp, SDPatternOperator ShOp>
2864 : N3VLane32<0, 1, op21_20, op11_8, 1, 0,
2866 (ins DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
2868 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
2870 (Ty (ShOp (Ty DPR:$src1),
2872 (Ty (ARMvduplane (Ty DPR_VFP2:$Vm),
2874 class N3VDMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
2875 string OpcodeStr, string Dt,
2876 ValueType Ty, SDPatternOperator MulOp, SDPatternOperator ShOp>
2877 : N3VLane16<0, 1, op21_20, op11_8, 1, 0,
2879 (ins DPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
2881 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
2883 (Ty (ShOp (Ty DPR:$src1),
2885 (Ty (ARMvduplane (Ty DPR_8:$Vm),
2888 class N3VQMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2889 InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty,
2890 SDPatternOperator MulOp, SDPatternOperator OpNode>
2891 : N3V<op24, op23, op21_20, op11_8, 1, op4,
2892 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
2893 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
2894 [(set QPR:$Vd, (Ty (OpNode QPR:$src1,
2895 (Ty (MulOp QPR:$Vn, QPR:$Vm)))))]>;
2896 class N3VQMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
2897 string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
2898 SDPatternOperator MulOp, SDPatternOperator ShOp>
2899 : N3VLane32<1, 1, op21_20, op11_8, 1, 0,
2901 (ins QPR:$src1, QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
2903 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
2904 [(set (ResTy QPR:$Vd),
2905 (ResTy (ShOp (ResTy QPR:$src1),
2906 (ResTy (MulOp QPR:$Vn,
2907 (ResTy (ARMvduplane (OpTy DPR_VFP2:$Vm),
2909 class N3VQMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
2910 string OpcodeStr, string Dt,
2911 ValueType ResTy, ValueType OpTy,
2912 SDPatternOperator MulOp, SDPatternOperator ShOp>
2913 : N3VLane16<1, 1, op21_20, op11_8, 1, 0,
2915 (ins QPR:$src1, QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
2917 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
2918 [(set (ResTy QPR:$Vd),
2919 (ResTy (ShOp (ResTy QPR:$src1),
2920 (ResTy (MulOp QPR:$Vn,
2921 (ResTy (ARMvduplane (OpTy DPR_8:$Vm),
2924 // Neon Intrinsic-Op instructions (VABA): double- and quad-register.
2925 class N3VDIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2926 InstrItinClass itin, string OpcodeStr, string Dt,
2927 ValueType Ty, SDPatternOperator IntOp, SDNode OpNode>
2928 : N3V<op24, op23, op21_20, op11_8, 0, op4,
2929 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
2930 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
2931 [(set DPR:$Vd, (Ty (OpNode DPR:$src1,
2932 (Ty (IntOp (Ty DPR:$Vn), (Ty DPR:$Vm))))))]>;
2933 class N3VQIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2934 InstrItinClass itin, string OpcodeStr, string Dt,
2935 ValueType Ty, SDPatternOperator IntOp, SDNode OpNode>
2936 : N3V<op24, op23, op21_20, op11_8, 1, op4,
2937 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
2938 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
2939 [(set QPR:$Vd, (Ty (OpNode QPR:$src1,
2940 (Ty (IntOp (Ty QPR:$Vn), (Ty QPR:$Vm))))))]>;
2942 // Neon 3-argument intrinsics, both double- and quad-register.
2943 // The destination register is also used as the first source operand register.
2944 class N3VDInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2945 InstrItinClass itin, string OpcodeStr, string Dt,
2946 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2947 : N3V<op24, op23, op21_20, op11_8, 0, op4,
2948 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
2949 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
2950 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$src1),
2951 (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>;
2952 class N3VQInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2953 InstrItinClass itin, string OpcodeStr, string Dt,
2954 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2955 : N3V<op24, op23, op21_20, op11_8, 1, op4,
2956 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
2957 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
2958 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src1),
2959 (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>;
2961 // Long Multiply-Add/Sub operations.
2962 class N3VLMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2963 InstrItinClass itin, string OpcodeStr, string Dt,
2964 ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode>
2965 : N3V<op24, op23, op21_20, op11_8, 0, op4,
2966 (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
2967 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
2968 [(set QPR:$Vd, (OpNode (TyQ QPR:$src1),
2969 (TyQ (MulOp (TyD DPR:$Vn),
2970 (TyD DPR:$Vm)))))]>;
2971 class N3VLMulOpSL<bit op24, bits<2> op21_20, bits<4> op11_8,
2972 InstrItinClass itin, string OpcodeStr, string Dt,
2973 ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode>
2974 : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd),
2975 (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
2977 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
2979 (OpNode (TyQ QPR:$src1),
2980 (TyQ (MulOp (TyD DPR:$Vn),
2981 (TyD (ARMvduplane (TyD DPR_VFP2:$Vm),
2983 class N3VLMulOpSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
2984 InstrItinClass itin, string OpcodeStr, string Dt,
2985 ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode>
2986 : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd),
2987 (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
2989 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
2991 (OpNode (TyQ QPR:$src1),
2992 (TyQ (MulOp (TyD DPR:$Vn),
2993 (TyD (ARMvduplane (TyD DPR_8:$Vm),
2996 // Long Intrinsic-Op vector operations with explicit extend (VABAL).
2997 class N3VLIntExtOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2998 InstrItinClass itin, string OpcodeStr, string Dt,
2999 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, SDNode ExtOp,
3001 : N3V<op24, op23, op21_20, op11_8, 0, op4,
3002 (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
3003 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
3004 [(set QPR:$Vd, (OpNode (TyQ QPR:$src1),
3005 (TyQ (ExtOp (TyD (IntOp (TyD DPR:$Vn),
3006 (TyD DPR:$Vm)))))))]>;
3008 // Neon Long 3-argument intrinsic. The destination register is
3009 // a quad-register and is also used as the first source operand register.
3010 class N3VLInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
3011 InstrItinClass itin, string OpcodeStr, string Dt,
3012 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp>
3013 : N3V<op24, op23, op21_20, op11_8, 0, op4,
3014 (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
3015 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
3017 (TyQ (IntOp (TyQ QPR:$src1), (TyD DPR:$Vn), (TyD DPR:$Vm))))]>;
3018 class N3VLInt3SL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
3019 string OpcodeStr, string Dt,
3020 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
3021 : N3VLane32<op24, 1, op21_20, op11_8, 1, 0,
3023 (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
3025 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
3026 [(set (ResTy QPR:$Vd),
3027 (ResTy (IntOp (ResTy QPR:$src1),
3029 (OpTy (ARMvduplane (OpTy DPR_VFP2:$Vm),
3031 class N3VLInt3SL16<bit op24, bits<2> op21_20, bits<4> op11_8,
3032 InstrItinClass itin, string OpcodeStr, string Dt,
3033 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
3034 : N3VLane16<op24, 1, op21_20, op11_8, 1, 0,
3036 (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
3038 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
3039 [(set (ResTy QPR:$Vd),
3040 (ResTy (IntOp (ResTy QPR:$src1),
3042 (OpTy (ARMvduplane (OpTy DPR_8:$Vm),
3045 // Narrowing 3-register intrinsics.
3046 class N3VNInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
3047 string OpcodeStr, string Dt, ValueType TyD, ValueType TyQ,
3048 SDPatternOperator IntOp, bit Commutable>
3049 : N3V<op24, op23, op21_20, op11_8, 0, op4,
3050 (outs DPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINi4D,
3051 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
3052 [(set DPR:$Vd, (TyD (IntOp (TyQ QPR:$Vn), (TyQ QPR:$Vm))))]> {
3053 let isCommutable = Commutable;
3056 // Long 3-register operations.
3057 class N3VL<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
3058 InstrItinClass itin, string OpcodeStr, string Dt,
3059 ValueType TyQ, ValueType TyD, SDNode OpNode, bit Commutable>
3060 : N3V<op24, op23, op21_20, op11_8, 0, op4,
3061 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
3062 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
3063 [(set QPR:$Vd, (TyQ (OpNode (TyD DPR:$Vn), (TyD DPR:$Vm))))]> {
3064 let isCommutable = Commutable;
3067 class N3VLSL<bit op24, bits<2> op21_20, bits<4> op11_8,
3068 InstrItinClass itin, string OpcodeStr, string Dt,
3069 ValueType TyQ, ValueType TyD, SDNode OpNode>
3070 : N3VLane32<op24, 1, op21_20, op11_8, 1, 0,
3071 (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
3072 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
3074 (TyQ (OpNode (TyD DPR:$Vn),
3075 (TyD (ARMvduplane (TyD DPR_VFP2:$Vm),imm:$lane)))))]>;
3076 class N3VLSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
3077 InstrItinClass itin, string OpcodeStr, string Dt,
3078 ValueType TyQ, ValueType TyD, SDNode OpNode>
3079 : N3VLane16<op24, 1, op21_20, op11_8, 1, 0,
3080 (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
3081 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
3083 (TyQ (OpNode (TyD DPR:$Vn),
3084 (TyD (ARMvduplane (TyD DPR_8:$Vm), imm:$lane)))))]>;
3086 // Long 3-register operations with explicitly extended operands.
3087 class N3VLExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
3088 InstrItinClass itin, string OpcodeStr, string Dt,
3089 ValueType TyQ, ValueType TyD, SDNode OpNode, SDNode ExtOp,
3091 : N3V<op24, op23, op21_20, op11_8, 0, op4,
3092 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
3093 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
3094 [(set QPR:$Vd, (OpNode (TyQ (ExtOp (TyD DPR:$Vn))),
3095 (TyQ (ExtOp (TyD DPR:$Vm)))))]> {
3096 let isCommutable = Commutable;
3099 // Long 3-register intrinsics with explicit extend (VABDL).
3100 class N3VLIntExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
3101 InstrItinClass itin, string OpcodeStr, string Dt,
3102 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, SDNode ExtOp,
3104 : N3V<op24, op23, op21_20, op11_8, 0, op4,
3105 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
3106 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
3107 [(set QPR:$Vd, (TyQ (ExtOp (TyD (IntOp (TyD DPR:$Vn),
3108 (TyD DPR:$Vm))))))]> {
3109 let isCommutable = Commutable;
3112 // Long 3-register intrinsics.
3113 class N3VLInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
3114 InstrItinClass itin, string OpcodeStr, string Dt,
3115 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, bit Commutable>
3116 : N3V<op24, op23, op21_20, op11_8, 0, op4,
3117 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
3118 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
3119 [(set QPR:$Vd, (TyQ (IntOp (TyD DPR:$Vn), (TyD DPR:$Vm))))]> {
3120 let isCommutable = Commutable;
3123 // Same as above, but not predicated.
3124 class N3VLIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6,
3125 bit op4, InstrItinClass itin, string OpcodeStr,
3126 string Dt, ValueType ResTy, ValueType OpTy,
3127 SDPatternOperator IntOp, bit Commutable>
3128 : N3Vnp<op27_23, op21_20, op11_8, op6, op4,
3129 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, OpcodeStr, Dt,
3130 [(set QPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>;
3132 class N3VLIntSL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
3133 string OpcodeStr, string Dt,
3134 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
3135 : N3VLane32<op24, 1, op21_20, op11_8, 1, 0,
3136 (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
3137 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
3138 [(set (ResTy QPR:$Vd),
3139 (ResTy (IntOp (OpTy DPR:$Vn),
3140 (OpTy (ARMvduplane (OpTy DPR_VFP2:$Vm),
3142 class N3VLIntSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
3143 InstrItinClass itin, string OpcodeStr, string Dt,
3144 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
3145 : N3VLane16<op24, 1, op21_20, op11_8, 1, 0,
3146 (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
3147 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
3148 [(set (ResTy QPR:$Vd),
3149 (ResTy (IntOp (OpTy DPR:$Vn),
3150 (OpTy (ARMvduplane (OpTy DPR_8:$Vm),
3153 // Wide 3-register operations.
3154 class N3VW<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
3155 string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD,
3156 SDNode OpNode, SDNode ExtOp, bit Commutable>
3157 : N3V<op24, op23, op21_20, op11_8, 0, op4,
3158 (outs QPR:$Vd), (ins QPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VSUBiD,
3159 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
3160 [(set QPR:$Vd, (OpNode (TyQ QPR:$Vn),
3161 (TyQ (ExtOp (TyD DPR:$Vm)))))]> {
3162 // All of these have a two-operand InstAlias.
3163 let TwoOperandAliasConstraint = "$Vn = $Vd";
3164 let isCommutable = Commutable;
3167 // Pairwise long 2-register intrinsics, both double- and quad-register.
3168 class N2VDPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
3169 bits<2> op17_16, bits<5> op11_7, bit op4,
3170 string OpcodeStr, string Dt,
3171 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
3172 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd),
3173 (ins DPR:$Vm), IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "",
3174 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>;
3175 class N2VQPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
3176 bits<2> op17_16, bits<5> op11_7, bit op4,
3177 string OpcodeStr, string Dt,
3178 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
3179 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd),
3180 (ins QPR:$Vm), IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "",
3181 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>;
3183 // Pairwise long 2-register accumulate intrinsics,
3184 // both double- and quad-register.
3185 // The destination register is also used as the first source operand register.
3186 class N2VDPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
3187 bits<2> op17_16, bits<5> op11_7, bit op4,
3188 string OpcodeStr, string Dt,
3189 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
3190 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4,
3191 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vm), IIC_VPALiD,
3192 OpcodeStr, Dt, "$Vd, $Vm", "$src1 = $Vd",
3193 [(set DPR:$Vd, (ResTy (IntOp (ResTy DPR:$src1), (OpTy DPR:$Vm))))]>;
3194 class N2VQPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
3195 bits<2> op17_16, bits<5> op11_7, bit op4,
3196 string OpcodeStr, string Dt,
3197 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
3198 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4,
3199 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vm), IIC_VPALiQ,
3200 OpcodeStr, Dt, "$Vd, $Vm", "$src1 = $Vd",
3201 [(set QPR:$Vd, (ResTy (IntOp (ResTy QPR:$src1), (OpTy QPR:$Vm))))]>;
3203 // Shift by immediate,
3204 // both double- and quad-register.
3205 let TwoOperandAliasConstraint = "$Vm = $Vd" in {
3206 class N2VDSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
3207 Format f, InstrItinClass itin, Operand ImmTy,
3208 string OpcodeStr, string Dt, ValueType Ty, SDNode OpNode>
3209 : N2VImm<op24, op23, op11_8, op7, 0, op4,
3210 (outs DPR:$Vd), (ins DPR:$Vm, ImmTy:$SIMM), f, itin,
3211 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
3212 [(set DPR:$Vd, (Ty (OpNode (Ty DPR:$Vm), (i32 imm:$SIMM))))]>;
3213 class N2VQSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
3214 Format f, InstrItinClass itin, Operand ImmTy,
3215 string OpcodeStr, string Dt, ValueType Ty, SDNode OpNode>
3216 : N2VImm<op24, op23, op11_8, op7, 1, op4,
3217 (outs QPR:$Vd), (ins QPR:$Vm, ImmTy:$SIMM), f, itin,
3218 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
3219 [(set QPR:$Vd, (Ty (OpNode (Ty QPR:$Vm), (i32 imm:$SIMM))))]>;
3222 // Long shift by immediate.
3223 class N2VLSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4,
3224 string OpcodeStr, string Dt,
3225 ValueType ResTy, ValueType OpTy, Operand ImmTy,
3226 SDPatternOperator OpNode>
3227 : N2VImm<op24, op23, op11_8, op7, op6, op4,
3228 (outs QPR:$Vd), (ins DPR:$Vm, ImmTy:$SIMM), N2RegVShLFrm,
3229 IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
3230 [(set QPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vm), ImmTy:$SIMM)))]>;
3232 // Narrow shift by immediate.
3233 class N2VNSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4,
3234 InstrItinClass itin, string OpcodeStr, string Dt,
3235 ValueType ResTy, ValueType OpTy, Operand ImmTy,
3236 SDPatternOperator OpNode>
3237 : N2VImm<op24, op23, op11_8, op7, op6, op4,
3238 (outs DPR:$Vd), (ins QPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, itin,
3239 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
3240 [(set DPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vm),
3241 (i32 ImmTy:$SIMM))))]>;
3243 // Shift right by immediate and accumulate,
3244 // both double- and quad-register.
3245 let TwoOperandAliasConstraint = "$Vm = $Vd" in {
3246 class N2VDShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
3247 Operand ImmTy, string OpcodeStr, string Dt,
3248 ValueType Ty, SDNode ShOp>
3249 : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$Vd),
3250 (ins DPR:$src1, DPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, IIC_VPALiD,
3251 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd",
3252 [(set DPR:$Vd, (Ty (add DPR:$src1,
3253 (Ty (ShOp DPR:$Vm, (i32 imm:$SIMM))))))]>;
3254 class N2VQShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
3255 Operand ImmTy, string OpcodeStr, string Dt,
3256 ValueType Ty, SDNode ShOp>
3257 : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$Vd),
3258 (ins QPR:$src1, QPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, IIC_VPALiD,
3259 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd",
3260 [(set QPR:$Vd, (Ty (add QPR:$src1,
3261 (Ty (ShOp QPR:$Vm, (i32 imm:$SIMM))))))]>;
3264 // Shift by immediate and insert,
3265 // both double- and quad-register.
3266 let TwoOperandAliasConstraint = "$Vm = $Vd" in {
3267 class N2VDShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
3268 Operand ImmTy, Format f, string OpcodeStr, string Dt,
3269 ValueType Ty,SDNode ShOp>
3270 : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$Vd),
3271 (ins DPR:$src1, DPR:$Vm, ImmTy:$SIMM), f, IIC_VSHLiD,
3272 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd",
3273 [(set DPR:$Vd, (Ty (ShOp DPR:$src1, DPR:$Vm, (i32 imm:$SIMM))))]>;
3274 class N2VQShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
3275 Operand ImmTy, Format f, string OpcodeStr, string Dt,
3276 ValueType Ty,SDNode ShOp>
3277 : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$Vd),
3278 (ins QPR:$src1, QPR:$Vm, ImmTy:$SIMM), f, IIC_VSHLiQ,
3279 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd",
3280 [(set QPR:$Vd, (Ty (ShOp QPR:$src1, QPR:$Vm, (i32 imm:$SIMM))))]>;
3283 // Convert, with fractional bits immediate,
3284 // both double- and quad-register.
3285 class N2VCvtD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
3286 string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
3287 SDPatternOperator IntOp>
3288 : N2VImm<op24, op23, op11_8, op7, 0, op4,
3289 (outs DPR:$Vd), (ins DPR:$Vm, neon_vcvt_imm32:$SIMM), NVCVTFrm,
3290 IIC_VUNAD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
3291 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm), (i32 imm:$SIMM))))]>;
3292 class N2VCvtQ<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
3293 string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
3294 SDPatternOperator IntOp>
3295 : N2VImm<op24, op23, op11_8, op7, 1, op4,
3296 (outs QPR:$Vd), (ins QPR:$Vm, neon_vcvt_imm32:$SIMM), NVCVTFrm,
3297 IIC_VUNAQ, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
3298 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm), (i32 imm:$SIMM))))]>;
3300 //===----------------------------------------------------------------------===//
3302 //===----------------------------------------------------------------------===//
3304 // Abbreviations used in multiclass suffixes:
3305 // Q = quarter int (8 bit) elements
3306 // H = half int (16 bit) elements
3307 // S = single int (32 bit) elements
3308 // D = double int (64 bit) elements
3310 // Neon 2-register vector operations and intrinsics.
3312 // Neon 2-register comparisons.
3313 // source operand element sizes of 8, 16 and 32 bits:
3314 multiclass N2V_QHS_cmp<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
3315 bits<5> op11_7, bit op4, string opc, string Dt,
3316 string asm, SDNode OpNode> {
3317 // 64-bit vector types.
3318 def v8i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 0, op4,
3319 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
3320 opc, !strconcat(Dt, "8"), asm, "",
3321 [(set DPR:$Vd, (v8i8 (OpNode (v8i8 DPR:$Vm))))]>;
3322 def v4i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 0, op4,
3323 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
3324 opc, !strconcat(Dt, "16"), asm, "",
3325 [(set DPR:$Vd, (v4i16 (OpNode (v4i16 DPR:$Vm))))]>;
3326 def v2i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4,
3327 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
3328 opc, !strconcat(Dt, "32"), asm, "",
3329 [(set DPR:$Vd, (v2i32 (OpNode (v2i32 DPR:$Vm))))]>;
3330 def v2f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4,
3331 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
3332 opc, "f32", asm, "",
3333 [(set DPR:$Vd, (v2i32 (OpNode (v2f32 DPR:$Vm))))]> {
3334 let Inst{10} = 1; // overwrite F = 1
3336 def v4f16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 0, op4,
3337 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
3338 opc, "f16", asm, "",
3339 [(set DPR:$Vd, (v4i16 (OpNode (v4f16 DPR:$Vm))))]>,
3340 Requires<[HasNEON,HasFullFP16]> {
3341 let Inst{10} = 1; // overwrite F = 1
3344 // 128-bit vector types.
3345 def v16i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 1, op4,
3346 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
3347 opc, !strconcat(Dt, "8"), asm, "",
3348 [(set QPR:$Vd, (v16i8 (OpNode (v16i8 QPR:$Vm))))]>;
3349 def v8i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 1, op4,
3350 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
3351 opc, !strconcat(Dt, "16"), asm, "",
3352 [(set QPR:$Vd, (v8i16 (OpNode (v8i16 QPR:$Vm))))]>;
3353 def v4i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4,
3354 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
3355 opc, !strconcat(Dt, "32"), asm, "",
3356 [(set QPR:$Vd, (v4i32 (OpNode (v4i32 QPR:$Vm))))]>;
3357 def v4f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4,
3358 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
3359 opc, "f32", asm, "",
3360 [(set QPR:$Vd, (v4i32 (OpNode (v4f32 QPR:$Vm))))]> {
3361 let Inst{10} = 1; // overwrite F = 1
3363 def v8f16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 1, op4,
3364 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
3365 opc, "f16", asm, "",
3366 [(set QPR:$Vd, (v8i16 (OpNode (v8f16 QPR:$Vm))))]>,
3367 Requires<[HasNEON,HasFullFP16]> {
3368 let Inst{10} = 1; // overwrite F = 1
3373 // Neon 2-register vector intrinsics,
3374 // element sizes of 8, 16 and 32 bits:
3375 multiclass N2VInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
3376 bits<5> op11_7, bit op4,
3377 InstrItinClass itinD, InstrItinClass itinQ,
3378 string OpcodeStr, string Dt, SDPatternOperator IntOp> {
3379 // 64-bit vector types.
3380 def v8i8 : N2VDInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
3381 itinD, OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>;
3382 def v4i16 : N2VDInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
3383 itinD, OpcodeStr, !strconcat(Dt, "16"),v4i16,v4i16,IntOp>;
3384 def v2i32 : N2VDInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
3385 itinD, OpcodeStr, !strconcat(Dt, "32"),v2i32,v2i32,IntOp>;
3387 // 128-bit vector types.
3388 def v16i8 : N2VQInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
3389 itinQ, OpcodeStr, !strconcat(Dt, "8"), v16i8,v16i8,IntOp>;
3390 def v8i16 : N2VQInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
3391 itinQ, OpcodeStr, !strconcat(Dt, "16"),v8i16,v8i16,IntOp>;
3392 def v4i32 : N2VQInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
3393 itinQ, OpcodeStr, !strconcat(Dt, "32"),v4i32,v4i32,IntOp>;
3397 // Neon Narrowing 2-register vector operations,
3398 // source operand element sizes of 16, 32 and 64 bits:
3399 multiclass N2VN_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
3400 bits<5> op11_7, bit op6, bit op4,
3401 InstrItinClass itin, string OpcodeStr, string Dt,
3403 def v8i8 : N2VN<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4,
3404 itin, OpcodeStr, !strconcat(Dt, "16"),
3405 v8i8, v8i16, OpNode>;
3406 def v4i16 : N2VN<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4,
3407 itin, OpcodeStr, !strconcat(Dt, "32"),
3408 v4i16, v4i32, OpNode>;
3409 def v2i32 : N2VN<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4,
3410 itin, OpcodeStr, !strconcat(Dt, "64"),
3411 v2i32, v2i64, OpNode>;
3414 // Neon Narrowing 2-register vector intrinsics,
3415 // source operand element sizes of 16, 32 and 64 bits:
3416 multiclass N2VNInt_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
3417 bits<5> op11_7, bit op6, bit op4,
3418 InstrItinClass itin, string OpcodeStr, string Dt,
3419 SDPatternOperator IntOp> {
3420 def v8i8 : N2VNInt<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4,
3421 itin, OpcodeStr, !strconcat(Dt, "16"),
3422 v8i8, v8i16, IntOp>;
3423 def v4i16 : N2VNInt<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4,
3424 itin, OpcodeStr, !strconcat(Dt, "32"),
3425 v4i16, v4i32, IntOp>;
3426 def v2i32 : N2VNInt<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4,
3427 itin, OpcodeStr, !strconcat(Dt, "64"),
3428 v2i32, v2i64, IntOp>;
3432 // Neon Lengthening 2-register vector intrinsic (currently specific to VMOVL).
3433 // source operand element sizes of 16, 32 and 64 bits:
3434 multiclass N2VL_QHS<bits<2> op24_23, bits<5> op11_7, bit op6, bit op4,
3435 string OpcodeStr, string Dt, SDNode OpNode> {
3436 def v8i16 : N2VL<op24_23, 0b00, 0b10, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
3437 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, OpNode>;
3438 def v4i32 : N2VL<op24_23, 0b01, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
3439 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, OpNode>;
3440 def v2i64 : N2VL<op24_23, 0b10, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
3441 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, OpNode>;
3445 // Neon 3-register vector operations.
3447 // First with only element sizes of 8, 16 and 32 bits:
3448 multiclass N3V_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3449 InstrItinClass itinD16, InstrItinClass itinD32,
3450 InstrItinClass itinQ16, InstrItinClass itinQ32,
3451 string OpcodeStr, string Dt,
3452 SDNode OpNode, bit Commutable = 0> {
3453 // 64-bit vector types.
3454 def v8i8 : N3VD<op24, op23, 0b00, op11_8, op4, itinD16,
3455 OpcodeStr, !strconcat(Dt, "8"),
3456 v8i8, v8i8, OpNode, Commutable>;
3457 def v4i16 : N3VD<op24, op23, 0b01, op11_8, op4, itinD16,
3458 OpcodeStr, !strconcat(Dt, "16"),
3459 v4i16, v4i16, OpNode, Commutable>;
3460 def v2i32 : N3VD<op24, op23, 0b10, op11_8, op4, itinD32,
3461 OpcodeStr, !strconcat(Dt, "32"),
3462 v2i32, v2i32, OpNode, Commutable>;
3464 // 128-bit vector types.
3465 def v16i8 : N3VQ<op24, op23, 0b00, op11_8, op4, itinQ16,
3466 OpcodeStr, !strconcat(Dt, "8"),
3467 v16i8, v16i8, OpNode, Commutable>;
3468 def v8i16 : N3VQ<op24, op23, 0b01, op11_8, op4, itinQ16,
3469 OpcodeStr, !strconcat(Dt, "16"),
3470 v8i16, v8i16, OpNode, Commutable>;
3471 def v4i32 : N3VQ<op24, op23, 0b10, op11_8, op4, itinQ32,
3472 OpcodeStr, !strconcat(Dt, "32"),
3473 v4i32, v4i32, OpNode, Commutable>;
3476 multiclass N3VSL_HS<bits<4> op11_8, string OpcodeStr, SDNode ShOp> {
3477 def v4i16 : N3VDSL16<0b01, op11_8, OpcodeStr, "i16", v4i16, ShOp>;
3478 def v2i32 : N3VDSL<0b10, op11_8, IIC_VMULi32D, OpcodeStr, "i32", v2i32, ShOp>;
3479 def v8i16 : N3VQSL16<0b01, op11_8, OpcodeStr, "i16", v8i16, v4i16, ShOp>;
3480 def v4i32 : N3VQSL<0b10, op11_8, IIC_VMULi32Q, OpcodeStr, "i32",
3481 v4i32, v2i32, ShOp>;
3484 // ....then also with element size 64 bits:
3485 multiclass N3V_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
3486 InstrItinClass itinD, InstrItinClass itinQ,
3487 string OpcodeStr, string Dt,
3488 SDNode OpNode, bit Commutable = 0>
3489 : N3V_QHS<op24, op23, op11_8, op4, itinD, itinD, itinQ, itinQ,
3490 OpcodeStr, Dt, OpNode, Commutable> {
3491 def v1i64 : N3VD<op24, op23, 0b11, op11_8, op4, itinD,
3492 OpcodeStr, !strconcat(Dt, "64"),
3493 v1i64, v1i64, OpNode, Commutable>;
3494 def v2i64 : N3VQ<op24, op23, 0b11, op11_8, op4, itinQ,
3495 OpcodeStr, !strconcat(Dt, "64"),
3496 v2i64, v2i64, OpNode, Commutable>;
3500 // Neon 3-register vector intrinsics.
3502 // First with only element sizes of 16 and 32 bits:
3503 multiclass N3VInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
3504 InstrItinClass itinD16, InstrItinClass itinD32,
3505 InstrItinClass itinQ16, InstrItinClass itinQ32,
3506 string OpcodeStr, string Dt,
3507 SDPatternOperator IntOp, bit Commutable = 0> {
3508 // 64-bit vector types.
3509 def v4i16 : N3VDInt<op24, op23, 0b01, op11_8, op4, f, itinD16,
3510 OpcodeStr, !strconcat(Dt, "16"),
3511 v4i16, v4i16, IntOp, Commutable>;
3512 def v2i32 : N3VDInt<op24, op23, 0b10, op11_8, op4, f, itinD32,
3513 OpcodeStr, !strconcat(Dt, "32"),
3514 v2i32, v2i32, IntOp, Commutable>;
3516 // 128-bit vector types.
3517 def v8i16 : N3VQInt<op24, op23, 0b01, op11_8, op4, f, itinQ16,
3518 OpcodeStr, !strconcat(Dt, "16"),
3519 v8i16, v8i16, IntOp, Commutable>;
3520 def v4i32 : N3VQInt<op24, op23, 0b10, op11_8, op4, f, itinQ32,
3521 OpcodeStr, !strconcat(Dt, "32"),
3522 v4i32, v4i32, IntOp, Commutable>;
3524 multiclass N3VInt_HSSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
3525 InstrItinClass itinD16, InstrItinClass itinD32,
3526 InstrItinClass itinQ16, InstrItinClass itinQ32,
3527 string OpcodeStr, string Dt,
3528 SDPatternOperator IntOp> {
3529 // 64-bit vector types.
3530 def v4i16 : N3VDIntSh<op24, op23, 0b01, op11_8, op4, f, itinD16,
3531 OpcodeStr, !strconcat(Dt, "16"),
3532 v4i16, v4i16, IntOp>;
3533 def v2i32 : N3VDIntSh<op24, op23, 0b10, op11_8, op4, f, itinD32,
3534 OpcodeStr, !strconcat(Dt, "32"),
3535 v2i32, v2i32, IntOp>;
3537 // 128-bit vector types.
3538 def v8i16 : N3VQIntSh<op24, op23, 0b01, op11_8, op4, f, itinQ16,
3539 OpcodeStr, !strconcat(Dt, "16"),
3540 v8i16, v8i16, IntOp>;
3541 def v4i32 : N3VQIntSh<op24, op23, 0b10, op11_8, op4, f, itinQ32,
3542 OpcodeStr, !strconcat(Dt, "32"),
3543 v4i32, v4i32, IntOp>;
3546 multiclass N3VIntSL_HS<bits<4> op11_8,
3547 InstrItinClass itinD16, InstrItinClass itinD32,
3548 InstrItinClass itinQ16, InstrItinClass itinQ32,
3549 string OpcodeStr, string Dt, SDPatternOperator IntOp> {
3550 def v4i16 : N3VDIntSL16<0b01, op11_8, itinD16,
3551 OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp>;
3552 def v2i32 : N3VDIntSL<0b10, op11_8, itinD32,
3553 OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp>;
3554 def v8i16 : N3VQIntSL16<0b01, op11_8, itinQ16,
3555 OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16, IntOp>;
3556 def v4i32 : N3VQIntSL<0b10, op11_8, itinQ32,
3557 OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32, IntOp>;
3560 // ....then also with element size of 8 bits:
3561 multiclass N3VInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
3562 InstrItinClass itinD16, InstrItinClass itinD32,
3563 InstrItinClass itinQ16, InstrItinClass itinQ32,
3564 string OpcodeStr, string Dt,
3565 SDPatternOperator IntOp, bit Commutable = 0>
3566 : N3VInt_HS<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32,
3567 OpcodeStr, Dt, IntOp, Commutable> {
3568 def v8i8 : N3VDInt<op24, op23, 0b00, op11_8, op4, f, itinD16,
3569 OpcodeStr, !strconcat(Dt, "8"),
3570 v8i8, v8i8, IntOp, Commutable>;
3571 def v16i8 : N3VQInt<op24, op23, 0b00, op11_8, op4, f, itinQ16,
3572 OpcodeStr, !strconcat(Dt, "8"),
3573 v16i8, v16i8, IntOp, Commutable>;
3575 multiclass N3VInt_QHSSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
3576 InstrItinClass itinD16, InstrItinClass itinD32,
3577 InstrItinClass itinQ16, InstrItinClass itinQ32,
3578 string OpcodeStr, string Dt,
3579 SDPatternOperator IntOp>
3580 : N3VInt_HSSh<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32,
3581 OpcodeStr, Dt, IntOp> {
3582 def v8i8 : N3VDIntSh<op24, op23, 0b00, op11_8, op4, f, itinD16,
3583 OpcodeStr, !strconcat(Dt, "8"),
3585 def v16i8 : N3VQIntSh<op24, op23, 0b00, op11_8, op4, f, itinQ16,
3586 OpcodeStr, !strconcat(Dt, "8"),
3587 v16i8, v16i8, IntOp>;
3591 // ....then also with element size of 64 bits:
3592 multiclass N3VInt_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
3593 InstrItinClass itinD16, InstrItinClass itinD32,
3594 InstrItinClass itinQ16, InstrItinClass itinQ32,
3595 string OpcodeStr, string Dt,
3596 SDPatternOperator IntOp, bit Commutable = 0>
3597 : N3VInt_QHS<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32,
3598 OpcodeStr, Dt, IntOp, Commutable> {
3599 def v1i64 : N3VDInt<op24, op23, 0b11, op11_8, op4, f, itinD32,
3600 OpcodeStr, !strconcat(Dt, "64"),
3601 v1i64, v1i64, IntOp, Commutable>;
3602 def v2i64 : N3VQInt<op24, op23, 0b11, op11_8, op4, f, itinQ32,
3603 OpcodeStr, !strconcat(Dt, "64"),
3604 v2i64, v2i64, IntOp, Commutable>;
3606 multiclass N3VInt_QHSDSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
3607 InstrItinClass itinD16, InstrItinClass itinD32,
3608 InstrItinClass itinQ16, InstrItinClass itinQ32,
3609 string OpcodeStr, string Dt,
3610 SDPatternOperator IntOp>
3611 : N3VInt_QHSSh<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32,
3612 OpcodeStr, Dt, IntOp> {
3613 def v1i64 : N3VDIntSh<op24, op23, 0b11, op11_8, op4, f, itinD32,
3614 OpcodeStr, !strconcat(Dt, "64"),
3615 v1i64, v1i64, IntOp>;
3616 def v2i64 : N3VQIntSh<op24, op23, 0b11, op11_8, op4, f, itinQ32,
3617 OpcodeStr, !strconcat(Dt, "64"),
3618 v2i64, v2i64, IntOp>;
3621 // Neon Narrowing 3-register vector intrinsics,
3622 // source operand element sizes of 16, 32 and 64 bits:
3623 multiclass N3VNInt_HSD<bit op24, bit op23, bits<4> op11_8, bit op4,
3624 string OpcodeStr, string Dt,
3625 SDPatternOperator IntOp, bit Commutable = 0> {
3626 def v8i8 : N3VNInt<op24, op23, 0b00, op11_8, op4,
3627 OpcodeStr, !strconcat(Dt, "16"),
3628 v8i8, v8i16, IntOp, Commutable>;
3629 def v4i16 : N3VNInt<op24, op23, 0b01, op11_8, op4,
3630 OpcodeStr, !strconcat(Dt, "32"),
3631 v4i16, v4i32, IntOp, Commutable>;
3632 def v2i32 : N3VNInt<op24, op23, 0b10, op11_8, op4,
3633 OpcodeStr, !strconcat(Dt, "64"),
3634 v2i32, v2i64, IntOp, Commutable>;
3638 // Neon Long 3-register vector operations.
3640 multiclass N3VL_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3641 InstrItinClass itin16, InstrItinClass itin32,
3642 string OpcodeStr, string Dt,
3643 SDNode OpNode, bit Commutable = 0> {
3644 def v8i16 : N3VL<op24, op23, 0b00, op11_8, op4, itin16,
3645 OpcodeStr, !strconcat(Dt, "8"),
3646 v8i16, v8i8, OpNode, Commutable>;
3647 def v4i32 : N3VL<op24, op23, 0b01, op11_8, op4, itin16,
3648 OpcodeStr, !strconcat(Dt, "16"),
3649 v4i32, v4i16, OpNode, Commutable>;
3650 def v2i64 : N3VL<op24, op23, 0b10, op11_8, op4, itin32,
3651 OpcodeStr, !strconcat(Dt, "32"),
3652 v2i64, v2i32, OpNode, Commutable>;
3655 multiclass N3VLSL_HS<bit op24, bits<4> op11_8,
3656 InstrItinClass itin, string OpcodeStr, string Dt,
3658 def v4i16 : N3VLSL16<op24, 0b01, op11_8, itin, OpcodeStr,
3659 !strconcat(Dt, "16"), v4i32, v4i16, OpNode>;
3660 def v2i32 : N3VLSL<op24, 0b10, op11_8, itin, OpcodeStr,
3661 !strconcat(Dt, "32"), v2i64, v2i32, OpNode>;
3664 multiclass N3VLExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3665 InstrItinClass itin16, InstrItinClass itin32,
3666 string OpcodeStr, string Dt,
3667 SDNode OpNode, SDNode ExtOp, bit Commutable = 0> {
3668 def v8i16 : N3VLExt<op24, op23, 0b00, op11_8, op4, itin16,
3669 OpcodeStr, !strconcat(Dt, "8"),
3670 v8i16, v8i8, OpNode, ExtOp, Commutable>;
3671 def v4i32 : N3VLExt<op24, op23, 0b01, op11_8, op4, itin16,
3672 OpcodeStr, !strconcat(Dt, "16"),
3673 v4i32, v4i16, OpNode, ExtOp, Commutable>;
3674 def v2i64 : N3VLExt<op24, op23, 0b10, op11_8, op4, itin32,
3675 OpcodeStr, !strconcat(Dt, "32"),
3676 v2i64, v2i32, OpNode, ExtOp, Commutable>;
3679 // Neon Long 3-register vector intrinsics.
3681 // First with only element sizes of 16 and 32 bits:
3682 multiclass N3VLInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4,
3683 InstrItinClass itin16, InstrItinClass itin32,
3684 string OpcodeStr, string Dt,
3685 SDPatternOperator IntOp, bit Commutable = 0> {
3686 def v4i32 : N3VLInt<op24, op23, 0b01, op11_8, op4, itin16,
3687 OpcodeStr, !strconcat(Dt, "16"),
3688 v4i32, v4i16, IntOp, Commutable>;
3689 def v2i64 : N3VLInt<op24, op23, 0b10, op11_8, op4, itin32,
3690 OpcodeStr, !strconcat(Dt, "32"),
3691 v2i64, v2i32, IntOp, Commutable>;
3694 multiclass N3VLIntSL_HS<bit op24, bits<4> op11_8,
3695 InstrItinClass itin, string OpcodeStr, string Dt,
3696 SDPatternOperator IntOp> {
3697 def v4i16 : N3VLIntSL16<op24, 0b01, op11_8, itin,
3698 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>;
3699 def v2i32 : N3VLIntSL<op24, 0b10, op11_8, itin,
3700 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>;
3703 // ....then also with element size of 8 bits:
3704 multiclass N3VLInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3705 InstrItinClass itin16, InstrItinClass itin32,
3706 string OpcodeStr, string Dt,
3707 SDPatternOperator IntOp, bit Commutable = 0>
3708 : N3VLInt_HS<op24, op23, op11_8, op4, itin16, itin32, OpcodeStr, Dt,
3709 IntOp, Commutable> {
3710 def v8i16 : N3VLInt<op24, op23, 0b00, op11_8, op4, itin16,
3711 OpcodeStr, !strconcat(Dt, "8"),
3712 v8i16, v8i8, IntOp, Commutable>;
3715 // ....with explicit extend (VABDL).
3716 multiclass N3VLIntExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3717 InstrItinClass itin, string OpcodeStr, string Dt,
3718 SDPatternOperator IntOp, SDNode ExtOp, bit Commutable = 0> {
3719 def v8i16 : N3VLIntExt<op24, op23, 0b00, op11_8, op4, itin,
3720 OpcodeStr, !strconcat(Dt, "8"),
3721 v8i16, v8i8, IntOp, ExtOp, Commutable>;
3722 def v4i32 : N3VLIntExt<op24, op23, 0b01, op11_8, op4, itin,
3723 OpcodeStr, !strconcat(Dt, "16"),
3724 v4i32, v4i16, IntOp, ExtOp, Commutable>;
3725 def v2i64 : N3VLIntExt<op24, op23, 0b10, op11_8, op4, itin,
3726 OpcodeStr, !strconcat(Dt, "32"),
3727 v2i64, v2i32, IntOp, ExtOp, Commutable>;
3731 // Neon Wide 3-register vector intrinsics,
3732 // source operand element sizes of 8, 16 and 32 bits:
3733 multiclass N3VW_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3734 string OpcodeStr, string Dt,
3735 SDNode OpNode, SDNode ExtOp, bit Commutable = 0> {
3736 def v8i16 : N3VW<op24, op23, 0b00, op11_8, op4,
3737 OpcodeStr, !strconcat(Dt, "8"),
3738 v8i16, v8i8, OpNode, ExtOp, Commutable>;
3739 def v4i32 : N3VW<op24, op23, 0b01, op11_8, op4,
3740 OpcodeStr, !strconcat(Dt, "16"),
3741 v4i32, v4i16, OpNode, ExtOp, Commutable>;
3742 def v2i64 : N3VW<op24, op23, 0b10, op11_8, op4,
3743 OpcodeStr, !strconcat(Dt, "32"),
3744 v2i64, v2i32, OpNode, ExtOp, Commutable>;
3748 // Neon Multiply-Op vector operations,
3749 // element sizes of 8, 16 and 32 bits:
3750 multiclass N3VMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3751 InstrItinClass itinD16, InstrItinClass itinD32,
3752 InstrItinClass itinQ16, InstrItinClass itinQ32,
3753 string OpcodeStr, string Dt, SDNode OpNode> {
3754 // 64-bit vector types.
3755 def v8i8 : N3VDMulOp<op24, op23, 0b00, op11_8, op4, itinD16,
3756 OpcodeStr, !strconcat(Dt, "8"), v8i8, mul, OpNode>;
3757 def v4i16 : N3VDMulOp<op24, op23, 0b01, op11_8, op4, itinD16,
3758 OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, OpNode>;
3759 def v2i32 : N3VDMulOp<op24, op23, 0b10, op11_8, op4, itinD32,
3760 OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, OpNode>;
3762 // 128-bit vector types.
3763 def v16i8 : N3VQMulOp<op24, op23, 0b00, op11_8, op4, itinQ16,
3764 OpcodeStr, !strconcat(Dt, "8"), v16i8, mul, OpNode>;
3765 def v8i16 : N3VQMulOp<op24, op23, 0b01, op11_8, op4, itinQ16,
3766 OpcodeStr, !strconcat(Dt, "16"), v8i16, mul, OpNode>;
3767 def v4i32 : N3VQMulOp<op24, op23, 0b10, op11_8, op4, itinQ32,
3768 OpcodeStr, !strconcat(Dt, "32"), v4i32, mul, OpNode>;
3771 multiclass N3VMulOpSL_HS<bits<4> op11_8,
3772 InstrItinClass itinD16, InstrItinClass itinD32,
3773 InstrItinClass itinQ16, InstrItinClass itinQ32,
3774 string OpcodeStr, string Dt, SDPatternOperator ShOp> {
3775 def v4i16 : N3VDMulOpSL16<0b01, op11_8, itinD16,
3776 OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, ShOp>;
3777 def v2i32 : N3VDMulOpSL<0b10, op11_8, itinD32,
3778 OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, ShOp>;
3779 def v8i16 : N3VQMulOpSL16<0b01, op11_8, itinQ16,
3780 OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16,
3782 def v4i32 : N3VQMulOpSL<0b10, op11_8, itinQ32,
3783 OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32,
3787 // Neon Intrinsic-Op vector operations,
3788 // element sizes of 8, 16 and 32 bits:
3789 multiclass N3VIntOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3790 InstrItinClass itinD, InstrItinClass itinQ,
3791 string OpcodeStr, string Dt, SDPatternOperator IntOp,
3793 // 64-bit vector types.
3794 def v8i8 : N3VDIntOp<op24, op23, 0b00, op11_8, op4, itinD,
3795 OpcodeStr, !strconcat(Dt, "8"), v8i8, IntOp, OpNode>;
3796 def v4i16 : N3VDIntOp<op24, op23, 0b01, op11_8, op4, itinD,
3797 OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp, OpNode>;
3798 def v2i32 : N3VDIntOp<op24, op23, 0b10, op11_8, op4, itinD,
3799 OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp, OpNode>;
3801 // 128-bit vector types.
3802 def v16i8 : N3VQIntOp<op24, op23, 0b00, op11_8, op4, itinQ,
3803 OpcodeStr, !strconcat(Dt, "8"), v16i8, IntOp, OpNode>;
3804 def v8i16 : N3VQIntOp<op24, op23, 0b01, op11_8, op4, itinQ,
3805 OpcodeStr, !strconcat(Dt, "16"), v8i16, IntOp, OpNode>;
3806 def v4i32 : N3VQIntOp<op24, op23, 0b10, op11_8, op4, itinQ,
3807 OpcodeStr, !strconcat(Dt, "32"), v4i32, IntOp, OpNode>;
3810 // Neon 3-argument intrinsics,
3811 // element sizes of 16 and 32 bits:
3812 multiclass N3VInt3_HS<bit op24, bit op23, bits<4> op11_8, bit op4,
3813 InstrItinClass itinD16, InstrItinClass itinD32,
3814 InstrItinClass itinQ16, InstrItinClass itinQ32,
3815 string OpcodeStr, string Dt, SDPatternOperator IntOp> {
3816 // 64-bit vector types.
3817 def v4i16 : N3VDInt3<op24, op23, 0b01, op11_8, op4, itinD16,
3818 OpcodeStr, !strconcat(Dt, "16"), v4i16, v4i16, IntOp>;
3819 def v2i32 : N3VDInt3<op24, op23, 0b10, op11_8, op4, itinD32,
3820 OpcodeStr, !strconcat(Dt, "32"), v2i32, v2i32, IntOp>;
3822 // 128-bit vector types.
3823 def v8i16 : N3VQInt3<op24, op23, 0b01, op11_8, op4, itinQ16,
3824 OpcodeStr, !strconcat(Dt, "16"), v8i16, v8i16, IntOp>;
3825 def v4i32 : N3VQInt3<op24, op23, 0b10, op11_8, op4, itinQ32,
3826 OpcodeStr, !strconcat(Dt, "32"), v4i32, v4i32, IntOp>;
3829 // element sizes of 8, 16 and 32 bits:
3830 multiclass N3VInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3831 InstrItinClass itinD16, InstrItinClass itinD32,
3832 InstrItinClass itinQ16, InstrItinClass itinQ32,
3833 string OpcodeStr, string Dt, SDPatternOperator IntOp>
3834 :N3VInt3_HS <op24, op23, op11_8, op4, itinD16, itinD32,
3835 itinQ16, itinQ32, OpcodeStr, Dt, IntOp>{
3836 // 64-bit vector types.
3837 def v8i8 : N3VDInt3<op24, op23, 0b00, op11_8, op4, itinD16,
3838 OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>;
3839 // 128-bit vector types.
3840 def v16i8 : N3VQInt3<op24, op23, 0b00, op11_8, op4, itinQ16,
3841 OpcodeStr, !strconcat(Dt, "8"), v16i8, v16i8, IntOp>;
3844 // Neon Long Multiply-Op vector operations,
3845 // element sizes of 8, 16 and 32 bits:
3846 multiclass N3VLMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3847 InstrItinClass itin16, InstrItinClass itin32,
3848 string OpcodeStr, string Dt, SDNode MulOp,
3850 def v8i16 : N3VLMulOp<op24, op23, 0b00, op11_8, op4, itin16, OpcodeStr,
3851 !strconcat(Dt, "8"), v8i16, v8i8, MulOp, OpNode>;
3852 def v4i32 : N3VLMulOp<op24, op23, 0b01, op11_8, op4, itin16, OpcodeStr,
3853 !strconcat(Dt, "16"), v4i32, v4i16, MulOp, OpNode>;
3854 def v2i64 : N3VLMulOp<op24, op23, 0b10, op11_8, op4, itin32, OpcodeStr,
3855 !strconcat(Dt, "32"), v2i64, v2i32, MulOp, OpNode>;
3858 multiclass N3VLMulOpSL_HS<bit op24, bits<4> op11_8, string OpcodeStr,
3859 string Dt, SDNode MulOp, SDNode OpNode> {
3860 def v4i16 : N3VLMulOpSL16<op24, 0b01, op11_8, IIC_VMACi16D, OpcodeStr,
3861 !strconcat(Dt,"16"), v4i32, v4i16, MulOp, OpNode>;
3862 def v2i32 : N3VLMulOpSL<op24, 0b10, op11_8, IIC_VMACi32D, OpcodeStr,
3863 !strconcat(Dt, "32"), v2i64, v2i32, MulOp, OpNode>;
3867 // Neon Long 3-argument intrinsics.
3869 // First with only element sizes of 16 and 32 bits:
3870 multiclass N3VLInt3_HS<bit op24, bit op23, bits<4> op11_8, bit op4,
3871 InstrItinClass itin16, InstrItinClass itin32,
3872 string OpcodeStr, string Dt, SDPatternOperator IntOp> {
3873 def v4i32 : N3VLInt3<op24, op23, 0b01, op11_8, op4, itin16,
3874 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>;
3875 def v2i64 : N3VLInt3<op24, op23, 0b10, op11_8, op4, itin32,
3876 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>;
3879 multiclass N3VLInt3SL_HS<bit op24, bits<4> op11_8,
3880 string OpcodeStr, string Dt, SDPatternOperator IntOp> {
3881 def v4i16 : N3VLInt3SL16<op24, 0b01, op11_8, IIC_VMACi16D,
3882 OpcodeStr, !strconcat(Dt,"16"), v4i32, v4i16, IntOp>;
3883 def v2i32 : N3VLInt3SL<op24, 0b10, op11_8, IIC_VMACi32D,
3884 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>;
3887 // ....then also with element size of 8 bits:
3888 multiclass N3VLInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3889 InstrItinClass itin16, InstrItinClass itin32,
3890 string OpcodeStr, string Dt, SDPatternOperator IntOp>
3891 : N3VLInt3_HS<op24, op23, op11_8, op4, itin16, itin32, OpcodeStr, Dt, IntOp> {
3892 def v8i16 : N3VLInt3<op24, op23, 0b00, op11_8, op4, itin16,
3893 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, IntOp>;
3896 // ....with explicit extend (VABAL).
3897 multiclass N3VLIntExtOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3898 InstrItinClass itin, string OpcodeStr, string Dt,
3899 SDPatternOperator IntOp, SDNode ExtOp, SDNode OpNode> {
3900 def v8i16 : N3VLIntExtOp<op24, op23, 0b00, op11_8, op4, itin,
3901 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8,
3902 IntOp, ExtOp, OpNode>;
3903 def v4i32 : N3VLIntExtOp<op24, op23, 0b01, op11_8, op4, itin,
3904 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16,
3905 IntOp, ExtOp, OpNode>;
3906 def v2i64 : N3VLIntExtOp<op24, op23, 0b10, op11_8, op4, itin,
3907 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32,
3908 IntOp, ExtOp, OpNode>;
3912 // Neon Pairwise long 2-register intrinsics,
3913 // element sizes of 8, 16 and 32 bits:
3914 multiclass N2VPLInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
3915 bits<5> op11_7, bit op4,
3916 string OpcodeStr, string Dt, SDPatternOperator IntOp> {
3917 // 64-bit vector types.
3918 def v8i8 : N2VDPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
3919 OpcodeStr, !strconcat(Dt, "8"), v4i16, v8i8, IntOp>;
3920 def v4i16 : N2VDPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
3921 OpcodeStr, !strconcat(Dt, "16"), v2i32, v4i16, IntOp>;
3922 def v2i32 : N2VDPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
3923 OpcodeStr, !strconcat(Dt, "32"), v1i64, v2i32, IntOp>;
3925 // 128-bit vector types.
3926 def v16i8 : N2VQPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
3927 OpcodeStr, !strconcat(Dt, "8"), v8i16, v16i8, IntOp>;
3928 def v8i16 : N2VQPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
3929 OpcodeStr, !strconcat(Dt, "16"), v4i32, v8i16, IntOp>;
3930 def v4i32 : N2VQPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
3931 OpcodeStr, !strconcat(Dt, "32"), v2i64, v4i32, IntOp>;
3935 // Neon Pairwise long 2-register accumulate intrinsics,
3936 // element sizes of 8, 16 and 32 bits:
3937 multiclass N2VPLInt2_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
3938 bits<5> op11_7, bit op4,
3939 string OpcodeStr, string Dt, SDPatternOperator IntOp> {
3940 // 64-bit vector types.
3941 def v8i8 : N2VDPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
3942 OpcodeStr, !strconcat(Dt, "8"), v4i16, v8i8, IntOp>;
3943 def v4i16 : N2VDPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
3944 OpcodeStr, !strconcat(Dt, "16"), v2i32, v4i16, IntOp>;
3945 def v2i32 : N2VDPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
3946 OpcodeStr, !strconcat(Dt, "32"), v1i64, v2i32, IntOp>;
3948 // 128-bit vector types.
3949 def v16i8 : N2VQPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
3950 OpcodeStr, !strconcat(Dt, "8"), v8i16, v16i8, IntOp>;
3951 def v8i16 : N2VQPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
3952 OpcodeStr, !strconcat(Dt, "16"), v4i32, v8i16, IntOp>;
3953 def v4i32 : N2VQPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
3954 OpcodeStr, !strconcat(Dt, "32"), v2i64, v4i32, IntOp>;
3958 // Neon 2-register vector shift by immediate,
3959 // with f of either N2RegVShLFrm or N2RegVShRFrm
3960 // element sizes of 8, 16, 32 and 64 bits:
3961 multiclass N2VShL_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
3962 InstrItinClass itin, string OpcodeStr, string Dt,
3964 // 64-bit vector types.
3965 def v8i8 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
3966 OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> {
3967 let Inst{21-19} = 0b001; // imm6 = 001xxx
3969 def v4i16 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
3970 OpcodeStr, !strconcat(Dt, "16"), v4i16, OpNode> {
3971 let Inst{21-20} = 0b01; // imm6 = 01xxxx
3973 def v2i32 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
3974 OpcodeStr, !strconcat(Dt, "32"), v2i32, OpNode> {
3975 let Inst{21} = 0b1; // imm6 = 1xxxxx
3977 def v1i64 : N2VDSh<op24, op23, op11_8, 1, op4, N2RegVShLFrm, itin, i32imm,
3978 OpcodeStr, !strconcat(Dt, "64"), v1i64, OpNode>;
3981 // 128-bit vector types.
3982 def v16i8 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
3983 OpcodeStr, !strconcat(Dt, "8"), v16i8, OpNode> {
3984 let Inst{21-19} = 0b001; // imm6 = 001xxx
3986 def v8i16 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
3987 OpcodeStr, !strconcat(Dt, "16"), v8i16, OpNode> {
3988 let Inst{21-20} = 0b01; // imm6 = 01xxxx
3990 def v4i32 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
3991 OpcodeStr, !strconcat(Dt, "32"), v4i32, OpNode> {
3992 let Inst{21} = 0b1; // imm6 = 1xxxxx
3994 def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, N2RegVShLFrm, itin, i32imm,
3995 OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>;
3998 multiclass N2VShR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
3999 InstrItinClass itin, string OpcodeStr, string Dt,
4000 string baseOpc, SDNode OpNode> {
4001 // 64-bit vector types.
4002 def v8i8 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm8,
4003 OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> {
4004 let Inst{21-19} = 0b001; // imm6 = 001xxx
4006 def v4i16 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm16,
4007 OpcodeStr, !strconcat(Dt, "16"), v4i16, OpNode> {
4008 let Inst{21-20} = 0b01; // imm6 = 01xxxx
4010 def v2i32 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm32,
4011 OpcodeStr, !strconcat(Dt, "32"), v2i32, OpNode> {
4012 let Inst{21} = 0b1; // imm6 = 1xxxxx
4014 def v1i64 : N2VDSh<op24, op23, op11_8, 1, op4, N2RegVShRFrm, itin, shr_imm64,
4015 OpcodeStr, !strconcat(Dt, "64"), v1i64, OpNode>;
4018 // 128-bit vector types.
4019 def v16i8 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm8,
4020 OpcodeStr, !strconcat(Dt, "8"), v16i8, OpNode> {
4021 let Inst{21-19} = 0b001; // imm6 = 001xxx
4023 def v8i16 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm16,
4024 OpcodeStr, !strconcat(Dt, "16"), v8i16, OpNode> {
4025 let Inst{21-20} = 0b01; // imm6 = 01xxxx
4027 def v4i32 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm32,
4028 OpcodeStr, !strconcat(Dt, "32"), v4i32, OpNode> {
4029 let Inst{21} = 0b1; // imm6 = 1xxxxx
4031 def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, N2RegVShRFrm, itin, shr_imm64,
4032 OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>;
4036 // Neon Shift-Accumulate vector operations,
4037 // element sizes of 8, 16, 32 and 64 bits:
4038 multiclass N2VShAdd_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
4039 string OpcodeStr, string Dt, SDNode ShOp> {
4040 // 64-bit vector types.
4041 def v8i8 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm8,
4042 OpcodeStr, !strconcat(Dt, "8"), v8i8, ShOp> {
4043 let Inst{21-19} = 0b001; // imm6 = 001xxx
4045 def v4i16 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm16,
4046 OpcodeStr, !strconcat(Dt, "16"), v4i16, ShOp> {
4047 let Inst{21-20} = 0b01; // imm6 = 01xxxx
4049 def v2i32 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm32,
4050 OpcodeStr, !strconcat(Dt, "32"), v2i32, ShOp> {
4051 let Inst{21} = 0b1; // imm6 = 1xxxxx
4053 def v1i64 : N2VDShAdd<op24, op23, op11_8, 1, op4, shr_imm64,
4054 OpcodeStr, !strconcat(Dt, "64"), v1i64, ShOp>;
4057 // 128-bit vector types.
4058 def v16i8 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm8,
4059 OpcodeStr, !strconcat(Dt, "8"), v16i8, ShOp> {
4060 let Inst{21-19} = 0b001; // imm6 = 001xxx
4062 def v8i16 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm16,
4063 OpcodeStr, !strconcat(Dt, "16"), v8i16, ShOp> {
4064 let Inst{21-20} = 0b01; // imm6 = 01xxxx
4066 def v4i32 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm32,
4067 OpcodeStr, !strconcat(Dt, "32"), v4i32, ShOp> {
4068 let Inst{21} = 0b1; // imm6 = 1xxxxx
4070 def v2i64 : N2VQShAdd<op24, op23, op11_8, 1, op4, shr_imm64,
4071 OpcodeStr, !strconcat(Dt, "64"), v2i64, ShOp>;
4075 // Neon Shift-Insert vector operations,
4076 // with f of either N2RegVShLFrm or N2RegVShRFrm
4077 // element sizes of 8, 16, 32 and 64 bits:
4078 multiclass N2VShInsL_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
4080 // 64-bit vector types.
4081 def v8i8 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm,
4082 N2RegVShLFrm, OpcodeStr, "8", v8i8, NEONvsliImm> {
4083 let Inst{21-19} = 0b001; // imm6 = 001xxx
4085 def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm,
4086 N2RegVShLFrm, OpcodeStr, "16", v4i16, NEONvsliImm> {
4087 let Inst{21-20} = 0b01; // imm6 = 01xxxx
4089 def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm,
4090 N2RegVShLFrm, OpcodeStr, "32", v2i32, NEONvsliImm> {
4091 let Inst{21} = 0b1; // imm6 = 1xxxxx
4093 def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4, i32imm,
4094 N2RegVShLFrm, OpcodeStr, "64", v1i64, NEONvsliImm>;
4097 // 128-bit vector types.
4098 def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm,
4099 N2RegVShLFrm, OpcodeStr, "8", v16i8, NEONvsliImm> {
4100 let Inst{21-19} = 0b001; // imm6 = 001xxx
4102 def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm,
4103 N2RegVShLFrm, OpcodeStr, "16", v8i16, NEONvsliImm> {
4104 let Inst{21-20} = 0b01; // imm6 = 01xxxx
4106 def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm,
4107 N2RegVShLFrm, OpcodeStr, "32", v4i32, NEONvsliImm> {
4108 let Inst{21} = 0b1; // imm6 = 1xxxxx
4110 def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4, i32imm,
4111 N2RegVShLFrm, OpcodeStr, "64", v2i64, NEONvsliImm>;
4114 multiclass N2VShInsR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
4116 // 64-bit vector types.
4117 def v8i8 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm8,
4118 N2RegVShRFrm, OpcodeStr, "8", v8i8, NEONvsriImm> {
4119 let Inst{21-19} = 0b001; // imm6 = 001xxx
4121 def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm16,
4122 N2RegVShRFrm, OpcodeStr, "16", v4i16, NEONvsriImm> {
4123 let Inst{21-20} = 0b01; // imm6 = 01xxxx
4125 def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm32,
4126 N2RegVShRFrm, OpcodeStr, "32", v2i32, NEONvsriImm> {
4127 let Inst{21} = 0b1; // imm6 = 1xxxxx
4129 def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4, shr_imm64,
4130 N2RegVShRFrm, OpcodeStr, "64", v1i64, NEONvsriImm>;
4133 // 128-bit vector types.
4134 def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm8,
4135 N2RegVShRFrm, OpcodeStr, "8", v16i8, NEONvsriImm> {
4136 let Inst{21-19} = 0b001; // imm6 = 001xxx
4138 def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm16,
4139 N2RegVShRFrm, OpcodeStr, "16", v8i16, NEONvsriImm> {
4140 let Inst{21-20} = 0b01; // imm6 = 01xxxx
4142 def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm32,
4143 N2RegVShRFrm, OpcodeStr, "32", v4i32, NEONvsriImm> {
4144 let Inst{21} = 0b1; // imm6 = 1xxxxx
4146 def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4, shr_imm64,
4147 N2RegVShRFrm, OpcodeStr, "64", v2i64, NEONvsriImm>;
4151 // Neon Shift Long operations,
4152 // element sizes of 8, 16, 32 bits:
4153 multiclass N2VLSh_QHS<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6,
4154 bit op4, string OpcodeStr, string Dt,
4155 SDPatternOperator OpNode> {
4156 def v8i16 : N2VLSh<op24, op23, op11_8, op7, op6, op4,
4157 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, imm1_7, OpNode> {
4158 let Inst{21-19} = 0b001; // imm6 = 001xxx
4160 def v4i32 : N2VLSh<op24, op23, op11_8, op7, op6, op4,
4161 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, imm1_15, OpNode> {
4162 let Inst{21-20} = 0b01; // imm6 = 01xxxx
4164 def v2i64 : N2VLSh<op24, op23, op11_8, op7, op6, op4,
4165 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, imm1_31, OpNode> {
4166 let Inst{21} = 0b1; // imm6 = 1xxxxx
4170 // Neon Shift Narrow operations,
4171 // element sizes of 16, 32, 64 bits:
4172 multiclass N2VNSh_HSD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6,
4173 bit op4, InstrItinClass itin, string OpcodeStr, string Dt,
4174 SDPatternOperator OpNode> {
4175 def v8i8 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin,
4176 OpcodeStr, !strconcat(Dt, "16"),
4177 v8i8, v8i16, shr_imm8, OpNode> {
4178 let Inst{21-19} = 0b001; // imm6 = 001xxx
4180 def v4i16 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin,
4181 OpcodeStr, !strconcat(Dt, "32"),
4182 v4i16, v4i32, shr_imm16, OpNode> {
4183 let Inst{21-20} = 0b01; // imm6 = 01xxxx
4185 def v2i32 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin,
4186 OpcodeStr, !strconcat(Dt, "64"),
4187 v2i32, v2i64, shr_imm32, OpNode> {
4188 let Inst{21} = 0b1; // imm6 = 1xxxxx
4192 //===----------------------------------------------------------------------===//
4193 // Instruction Definitions.
4194 //===----------------------------------------------------------------------===//
4196 // Vector Add Operations.
4198 // VADD : Vector Add (integer and floating-point)
4199 defm VADD : N3V_QHSD<0, 0, 0b1000, 0, IIC_VBINiD, IIC_VBINiQ, "vadd", "i",
4201 def VADDfd : N3VD<0, 0, 0b00, 0b1101, 0, IIC_VBIND, "vadd", "f32",
4202 v2f32, v2f32, fadd, 1>;
4203 def VADDfq : N3VQ<0, 0, 0b00, 0b1101, 0, IIC_VBINQ, "vadd", "f32",
4204 v4f32, v4f32, fadd, 1>;
4205 def VADDhd : N3VD<0, 0, 0b01, 0b1101, 0, IIC_VBIND, "vadd", "f16",
4206 v4f16, v4f16, fadd, 1>,
4207 Requires<[HasNEON,HasFullFP16]>;
4208 def VADDhq : N3VQ<0, 0, 0b01, 0b1101, 0, IIC_VBINQ, "vadd", "f16",
4209 v8f16, v8f16, fadd, 1>,
4210 Requires<[HasNEON,HasFullFP16]>;
4211 // VADDL : Vector Add Long (Q = D + D)
4212 defm VADDLs : N3VLExt_QHS<0,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD,
4213 "vaddl", "s", add, sext, 1>;
4214 defm VADDLu : N3VLExt_QHS<1,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD,
4215 "vaddl", "u", add, zext, 1>;
4216 // VADDW : Vector Add Wide (Q = Q + D)
4217 defm VADDWs : N3VW_QHS<0,1,0b0001,0, "vaddw", "s", add, sext, 0>;
4218 defm VADDWu : N3VW_QHS<1,1,0b0001,0, "vaddw", "u", add, zext, 0>;
4219 // VHADD : Vector Halving Add
4220 defm VHADDs : N3VInt_QHS<0, 0, 0b0000, 0, N3RegFrm,
4221 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
4222 "vhadd", "s", int_arm_neon_vhadds, 1>;
4223 defm VHADDu : N3VInt_QHS<1, 0, 0b0000, 0, N3RegFrm,
4224 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
4225 "vhadd", "u", int_arm_neon_vhaddu, 1>;
4226 // VRHADD : Vector Rounding Halving Add
4227 defm VRHADDs : N3VInt_QHS<0, 0, 0b0001, 0, N3RegFrm,
4228 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
4229 "vrhadd", "s", int_arm_neon_vrhadds, 1>;
4230 defm VRHADDu : N3VInt_QHS<1, 0, 0b0001, 0, N3RegFrm,
4231 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
4232 "vrhadd", "u", int_arm_neon_vrhaddu, 1>;
4233 // VQADD : Vector Saturating Add
4234 defm VQADDs : N3VInt_QHSD<0, 0, 0b0000, 1, N3RegFrm,
4235 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
4236 "vqadd", "s", int_arm_neon_vqadds, 1>;
4237 defm VQADDu : N3VInt_QHSD<1, 0, 0b0000, 1, N3RegFrm,
4238 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
4239 "vqadd", "u", int_arm_neon_vqaddu, 1>;
4240 // VADDHN : Vector Add and Narrow Returning High Half (D = Q + Q)
4241 defm VADDHN : N3VNInt_HSD<0,1,0b0100,0, "vaddhn", "i", null_frag, 1>;
4242 // VRADDHN : Vector Rounding Add and Narrow Returning High Half (D = Q + Q)
4243 defm VRADDHN : N3VNInt_HSD<1,1,0b0100,0, "vraddhn", "i",
4244 int_arm_neon_vraddhn, 1>;
4246 let Predicates = [HasNEON] in {
4247 def : Pat<(v8i8 (trunc (ARMvshruImm (add (v8i16 QPR:$Vn), QPR:$Vm), 8))),
4248 (VADDHNv8i8 QPR:$Vn, QPR:$Vm)>;
4249 def : Pat<(v4i16 (trunc (ARMvshruImm (add (v4i32 QPR:$Vn), QPR:$Vm), 16))),
4250 (VADDHNv4i16 QPR:$Vn, QPR:$Vm)>;
4251 def : Pat<(v2i32 (trunc (ARMvshruImm (add (v2i64 QPR:$Vn), QPR:$Vm), 32))),
4252 (VADDHNv2i32 QPR:$Vn, QPR:$Vm)>;
4255 // Vector Multiply Operations.
4257 // VMUL : Vector Multiply (integer, polynomial and floating-point)
4258 defm VMUL : N3V_QHS<0, 0, 0b1001, 1, IIC_VMULi16D, IIC_VMULi32D,
4259 IIC_VMULi16Q, IIC_VMULi32Q, "vmul", "i", mul, 1>;
4260 def VMULpd : N3VDInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16D, "vmul",
4261 "p8", v8i8, v8i8, int_arm_neon_vmulp, 1>;
4262 def VMULpq : N3VQInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16Q, "vmul",
4263 "p8", v16i8, v16i8, int_arm_neon_vmulp, 1>;
4264 def VMULfd : N3VD<1, 0, 0b00, 0b1101, 1, IIC_VFMULD, "vmul", "f32",
4265 v2f32, v2f32, fmul, 1>;
4266 def VMULfq : N3VQ<1, 0, 0b00, 0b1101, 1, IIC_VFMULQ, "vmul", "f32",
4267 v4f32, v4f32, fmul, 1>;
4268 def VMULhd : N3VD<1, 0, 0b01, 0b1101, 1, IIC_VFMULD, "vmul", "f16",
4269 v4f16, v4f16, fmul, 1>,
4270 Requires<[HasNEON,HasFullFP16]>;
4271 def VMULhq : N3VQ<1, 0, 0b01, 0b1101, 1, IIC_VFMULQ, "vmul", "f16",
4272 v8f16, v8f16, fmul, 1>,
4273 Requires<[HasNEON,HasFullFP16]>;
4274 defm VMULsl : N3VSL_HS<0b1000, "vmul", mul>;
4275 def VMULslfd : N3VDSL<0b10, 0b1001, IIC_VBIND, "vmul", "f32", v2f32, fmul>;
4276 def VMULslfq : N3VQSL<0b10, 0b1001, IIC_VBINQ, "vmul", "f32", v4f32,
4278 def VMULslhd : N3VDSL16<0b01, 0b1001, "vmul", "f16", v4f16, fmul>,
4279 Requires<[HasNEON,HasFullFP16]>;
4280 def VMULslhq : N3VQSL16<0b01, 0b1001, "vmul", "f16", v8f16,
4282 Requires<[HasNEON,HasFullFP16]>;
4284 let Predicates = [HasNEON] in {
4285 def : Pat<(v8i16 (mul (v8i16 QPR:$src1),
4286 (v8i16 (ARMvduplane (v8i16 QPR:$src2), imm:$lane)))),
4287 (v8i16 (VMULslv8i16 (v8i16 QPR:$src1),
4288 (v4i16 (EXTRACT_SUBREG QPR:$src2,
4289 (DSubReg_i16_reg imm:$lane))),
4290 (SubReg_i16_lane imm:$lane)))>;
4291 def : Pat<(v4i32 (mul (v4i32 QPR:$src1),
4292 (v4i32 (ARMvduplane (v4i32 QPR:$src2), imm:$lane)))),
4293 (v4i32 (VMULslv4i32 (v4i32 QPR:$src1),
4294 (v2i32 (EXTRACT_SUBREG QPR:$src2,
4295 (DSubReg_i32_reg imm:$lane))),
4296 (SubReg_i32_lane imm:$lane)))>;
4297 def : Pat<(v4f32 (fmul (v4f32 QPR:$src1),
4298 (v4f32 (ARMvduplane (v4f32 QPR:$src2), imm:$lane)))),
4299 (v4f32 (VMULslfq (v4f32 QPR:$src1),
4300 (v2f32 (EXTRACT_SUBREG QPR:$src2,
4301 (DSubReg_i32_reg imm:$lane))),
4302 (SubReg_i32_lane imm:$lane)))>;
4303 def : Pat<(v8f16 (fmul (v8f16 QPR:$src1),
4304 (v8f16 (ARMvduplane (v8f16 QPR:$src2), imm:$lane)))),
4305 (v8f16 (VMULslhq(v8f16 QPR:$src1),
4306 (v4f16 (EXTRACT_SUBREG QPR:$src2,
4307 (DSubReg_i16_reg imm:$lane))),
4308 (SubReg_i16_lane imm:$lane)))>;
4310 def : Pat<(v2f32 (fmul DPR:$Rn, (ARMvdup (f32 SPR:$Rm)))),
4312 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$Rm, ssub_0),
4314 def : Pat<(v4f16 (fmul DPR:$Rn, (ARMvdup (f16 HPR:$Rm)))),
4316 (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), HPR:$Rm, ssub_0),
4318 def : Pat<(v4f32 (fmul QPR:$Rn, (ARMvdup (f32 SPR:$Rm)))),
4320 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$Rm, ssub_0),
4322 def : Pat<(v8f16 (fmul QPR:$Rn, (ARMvdup (f16 HPR:$Rm)))),
4324 (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), HPR:$Rm, ssub_0),
4328 // VQDMULH : Vector Saturating Doubling Multiply Returning High Half
4329 defm VQDMULH : N3VInt_HS<0, 0, 0b1011, 0, N3RegFrm, IIC_VMULi16D, IIC_VMULi32D,
4330 IIC_VMULi16Q, IIC_VMULi32Q,
4331 "vqdmulh", "s", int_arm_neon_vqdmulh, 1>;
4332 defm VQDMULHsl: N3VIntSL_HS<0b1100, IIC_VMULi16D, IIC_VMULi32D,
4333 IIC_VMULi16Q, IIC_VMULi32Q,
4334 "vqdmulh", "s", int_arm_neon_vqdmulh>;
4336 let Predicates = [HasNEON] in {
4337 def : Pat<(v8i16 (int_arm_neon_vqdmulh (v8i16 QPR:$src1),
4338 (v8i16 (ARMvduplane (v8i16 QPR:$src2),
4340 (v8i16 (VQDMULHslv8i16 (v8i16 QPR:$src1),
4341 (v4i16 (EXTRACT_SUBREG QPR:$src2,
4342 (DSubReg_i16_reg imm:$lane))),
4343 (SubReg_i16_lane imm:$lane)))>;
4344 def : Pat<(v4i32 (int_arm_neon_vqdmulh (v4i32 QPR:$src1),
4345 (v4i32 (ARMvduplane (v4i32 QPR:$src2),
4347 (v4i32 (VQDMULHslv4i32 (v4i32 QPR:$src1),
4348 (v2i32 (EXTRACT_SUBREG QPR:$src2,
4349 (DSubReg_i32_reg imm:$lane))),
4350 (SubReg_i32_lane imm:$lane)))>;
4353 // VQRDMULH : Vector Rounding Saturating Doubling Multiply Returning High Half
4354 defm VQRDMULH : N3VInt_HS<1, 0, 0b1011, 0, N3RegFrm,
4355 IIC_VMULi16D,IIC_VMULi32D,IIC_VMULi16Q,IIC_VMULi32Q,
4356 "vqrdmulh", "s", int_arm_neon_vqrdmulh, 1>;
4357 defm VQRDMULHsl : N3VIntSL_HS<0b1101, IIC_VMULi16D, IIC_VMULi32D,
4358 IIC_VMULi16Q, IIC_VMULi32Q,
4359 "vqrdmulh", "s", int_arm_neon_vqrdmulh>;
4361 let Predicates = [HasNEON] in {
4362 def : Pat<(v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$src1),
4363 (v8i16 (ARMvduplane (v8i16 QPR:$src2),
4365 (v8i16 (VQRDMULHslv8i16 (v8i16 QPR:$src1),
4366 (v4i16 (EXTRACT_SUBREG QPR:$src2,
4367 (DSubReg_i16_reg imm:$lane))),
4368 (SubReg_i16_lane imm:$lane)))>;
4369 def : Pat<(v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$src1),
4370 (v4i32 (ARMvduplane (v4i32 QPR:$src2),
4372 (v4i32 (VQRDMULHslv4i32 (v4i32 QPR:$src1),
4373 (v2i32 (EXTRACT_SUBREG QPR:$src2,
4374 (DSubReg_i32_reg imm:$lane))),
4375 (SubReg_i32_lane imm:$lane)))>;
4378 // VMULL : Vector Multiply Long (integer and polynomial) (Q = D * D)
4379 let PostEncoderMethod = "NEONThumb2DataIPostEncoder",
4380 DecoderNamespace = "NEONData" in {
4381 defm VMULLs : N3VL_QHS<0,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D,
4382 "vmull", "s", NEONvmulls, 1>;
4383 defm VMULLu : N3VL_QHS<1,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D,
4384 "vmull", "u", NEONvmullu, 1>;
4385 def VMULLp8 : N3VLInt<0, 1, 0b00, 0b1110, 0, IIC_VMULi16D, "vmull", "p8",
4386 v8i16, v8i8, int_arm_neon_vmullp, 1>;
4387 def VMULLp64 : N3VLIntnp<0b00101, 0b10, 0b1110, 0, 0, NoItinerary,
4388 "vmull", "p64", v2i64, v1i64, int_arm_neon_vmullp, 1>,
4389 Requires<[HasV8, HasCrypto]>;
4391 defm VMULLsls : N3VLSL_HS<0, 0b1010, IIC_VMULi16D, "vmull", "s", NEONvmulls>;
4392 defm VMULLslu : N3VLSL_HS<1, 0b1010, IIC_VMULi16D, "vmull", "u", NEONvmullu>;
4394 // VQDMULL : Vector Saturating Doubling Multiply Long (Q = D * D)
4395 defm VQDMULL : N3VLInt_HS<0,1,0b1101,0, IIC_VMULi16D, IIC_VMULi32D,
4396 "vqdmull", "s", int_arm_neon_vqdmull, 1>;
4397 defm VQDMULLsl: N3VLIntSL_HS<0, 0b1011, IIC_VMULi16D,
4398 "vqdmull", "s", int_arm_neon_vqdmull>;
4400 // Vector Multiply-Accumulate and Multiply-Subtract Operations.
4402 // VMLA : Vector Multiply Accumulate (integer and floating-point)
4403 defm VMLA : N3VMulOp_QHS<0, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
4404 IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>;
4405 def VMLAfd : N3VDMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla", "f32",
4406 v2f32, fmul_su, fadd_mlx>,
4407 Requires<[HasNEON, UseFPVMLx]>;
4408 def VMLAfq : N3VQMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACQ, "vmla", "f32",
4409 v4f32, fmul_su, fadd_mlx>,
4410 Requires<[HasNEON, UseFPVMLx]>;
4411 def VMLAhd : N3VDMulOp<0, 0, 0b01, 0b1101, 1, IIC_VMACD, "vmla", "f16",
4412 v4f16, fmul_su, fadd_mlx>,
4413 Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
4414 def VMLAhq : N3VQMulOp<0, 0, 0b01, 0b1101, 1, IIC_VMACQ, "vmla", "f16",
4415 v8f16, fmul_su, fadd_mlx>,
4416 Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
4417 defm VMLAsl : N3VMulOpSL_HS<0b0000, IIC_VMACi16D, IIC_VMACi32D,
4418 IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>;
4419 def VMLAslfd : N3VDMulOpSL<0b10, 0b0001, IIC_VMACD, "vmla", "f32",
4420 v2f32, fmul_su, fadd_mlx>,
4421 Requires<[HasNEON, UseFPVMLx]>;
4422 def VMLAslfq : N3VQMulOpSL<0b10, 0b0001, IIC_VMACQ, "vmla", "f32",
4423 v4f32, v2f32, fmul_su, fadd_mlx>,
4424 Requires<[HasNEON, UseFPVMLx]>;
4425 def VMLAslhd : N3VDMulOpSL16<0b01, 0b0001, IIC_VMACD, "vmla", "f16",
4427 Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
4428 def VMLAslhq : N3VQMulOpSL16<0b01, 0b0001, IIC_VMACQ, "vmla", "f16",
4429 v8f16, v4f16, fmul, fadd>,
4430 Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
4432 let Predicates = [HasNEON] in {
4433 def : Pat<(v8i16 (add (v8i16 QPR:$src1),
4434 (mul (v8i16 QPR:$src2),
4435 (v8i16 (ARMvduplane (v8i16 QPR:$src3), imm:$lane))))),
4436 (v8i16 (VMLAslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2),
4437 (v4i16 (EXTRACT_SUBREG QPR:$src3,
4438 (DSubReg_i16_reg imm:$lane))),
4439 (SubReg_i16_lane imm:$lane)))>;
4441 def : Pat<(v4i32 (add (v4i32 QPR:$src1),
4442 (mul (v4i32 QPR:$src2),
4443 (v4i32 (ARMvduplane (v4i32 QPR:$src3), imm:$lane))))),
4444 (v4i32 (VMLAslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2),
4445 (v2i32 (EXTRACT_SUBREG QPR:$src3,
4446 (DSubReg_i32_reg imm:$lane))),
4447 (SubReg_i32_lane imm:$lane)))>;
4450 def : Pat<(v4f32 (fadd_mlx (v4f32 QPR:$src1),
4451 (fmul_su (v4f32 QPR:$src2),
4452 (v4f32 (ARMvduplane (v4f32 QPR:$src3), imm:$lane))))),
4453 (v4f32 (VMLAslfq (v4f32 QPR:$src1),
4455 (v2f32 (EXTRACT_SUBREG QPR:$src3,
4456 (DSubReg_i32_reg imm:$lane))),
4457 (SubReg_i32_lane imm:$lane)))>,
4458 Requires<[HasNEON, UseFPVMLx]>;
4460 // VMLAL : Vector Multiply Accumulate Long (Q += D * D)
4461 defm VMLALs : N3VLMulOp_QHS<0,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D,
4462 "vmlal", "s", NEONvmulls, add>;
4463 defm VMLALu : N3VLMulOp_QHS<1,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D,
4464 "vmlal", "u", NEONvmullu, add>;
4466 defm VMLALsls : N3VLMulOpSL_HS<0, 0b0010, "vmlal", "s", NEONvmulls, add>;
4467 defm VMLALslu : N3VLMulOpSL_HS<1, 0b0010, "vmlal", "u", NEONvmullu, add>;
4469 let Predicates = [HasNEON, HasV8_1a] in {
4470 // v8.1a Neon Rounding Double Multiply-Op vector operations,
4471 // VQRDMLAH : Vector Saturating Rounding Doubling Multiply Accumulate Long
4473 defm VQRDMLAH : N3VInt3_HS<1, 0, 0b1011, 1, IIC_VMACi16D, IIC_VMACi32D,
4474 IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlah", "s",
4476 def : Pat<(v4i16 (int_arm_neon_vqadds
4478 (v4i16 (int_arm_neon_vqrdmulh (v4i16 DPR:$Vn),
4479 (v4i16 DPR:$Vm))))),
4480 (v4i16 (VQRDMLAHv4i16 DPR:$src1, DPR:$Vn, DPR:$Vm))>;
4481 def : Pat<(v2i32 (int_arm_neon_vqadds
4483 (v2i32 (int_arm_neon_vqrdmulh (v2i32 DPR:$Vn),
4484 (v2i32 DPR:$Vm))))),
4485 (v2i32 (VQRDMLAHv2i32 DPR:$src1, DPR:$Vn, DPR:$Vm))>;
4486 def : Pat<(v8i16 (int_arm_neon_vqadds
4488 (v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$Vn),
4489 (v8i16 QPR:$Vm))))),
4490 (v8i16 (VQRDMLAHv8i16 QPR:$src1, QPR:$Vn, QPR:$Vm))>;
4491 def : Pat<(v4i32 (int_arm_neon_vqadds
4493 (v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$Vn),
4494 (v4i32 QPR:$Vm))))),
4495 (v4i32 (VQRDMLAHv4i32 QPR:$src1, QPR:$Vn, QPR:$Vm))>;
4497 defm VQRDMLAHsl : N3VMulOpSL_HS<0b1110, IIC_VMACi16D, IIC_VMACi32D,
4498 IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlah", "s",
4500 def : Pat<(v4i16 (int_arm_neon_vqadds
4502 (v4i16 (int_arm_neon_vqrdmulh
4504 (v4i16 (ARMvduplane (v4i16 DPR_8:$Vm),
4506 (v4i16 (VQRDMLAHslv4i16 DPR:$src1, DPR:$Vn, DPR_8:$Vm,
4508 def : Pat<(v2i32 (int_arm_neon_vqadds
4510 (v2i32 (int_arm_neon_vqrdmulh
4512 (v2i32 (ARMvduplane (v2i32 DPR_VFP2:$Vm),
4514 (v2i32 (VQRDMLAHslv2i32 DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm,
4516 def : Pat<(v8i16 (int_arm_neon_vqadds
4518 (v8i16 (int_arm_neon_vqrdmulh
4520 (v8i16 (ARMvduplane (v8i16 QPR:$src3),
4522 (v8i16 (VQRDMLAHslv8i16 (v8i16 QPR:$src1),
4524 (v4i16 (EXTRACT_SUBREG
4526 (DSubReg_i16_reg imm:$lane))),
4527 (SubReg_i16_lane imm:$lane)))>;
4528 def : Pat<(v4i32 (int_arm_neon_vqadds
4530 (v4i32 (int_arm_neon_vqrdmulh
4532 (v4i32 (ARMvduplane (v4i32 QPR:$src3),
4534 (v4i32 (VQRDMLAHslv4i32 (v4i32 QPR:$src1),
4536 (v2i32 (EXTRACT_SUBREG
4538 (DSubReg_i32_reg imm:$lane))),
4539 (SubReg_i32_lane imm:$lane)))>;
4541 // VQRDMLSH : Vector Saturating Rounding Doubling Multiply Subtract Long
4543 defm VQRDMLSH : N3VInt3_HS<1, 0, 0b1100, 1, IIC_VMACi16D, IIC_VMACi32D,
4544 IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlsh", "s",
4546 def : Pat<(v4i16 (int_arm_neon_vqsubs
4548 (v4i16 (int_arm_neon_vqrdmulh (v4i16 DPR:$Vn),
4549 (v4i16 DPR:$Vm))))),
4550 (v4i16 (VQRDMLSHv4i16 DPR:$src1, DPR:$Vn, DPR:$Vm))>;
4551 def : Pat<(v2i32 (int_arm_neon_vqsubs
4553 (v2i32 (int_arm_neon_vqrdmulh (v2i32 DPR:$Vn),
4554 (v2i32 DPR:$Vm))))),
4555 (v2i32 (VQRDMLSHv2i32 DPR:$src1, DPR:$Vn, DPR:$Vm))>;
4556 def : Pat<(v8i16 (int_arm_neon_vqsubs
4558 (v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$Vn),
4559 (v8i16 QPR:$Vm))))),
4560 (v8i16 (VQRDMLSHv8i16 QPR:$src1, QPR:$Vn, QPR:$Vm))>;
4561 def : Pat<(v4i32 (int_arm_neon_vqsubs
4563 (v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$Vn),
4564 (v4i32 QPR:$Vm))))),
4565 (v4i32 (VQRDMLSHv4i32 QPR:$src1, QPR:$Vn, QPR:$Vm))>;
4567 defm VQRDMLSHsl : N3VMulOpSL_HS<0b1111, IIC_VMACi16D, IIC_VMACi32D,
4568 IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlsh", "s",
4570 def : Pat<(v4i16 (int_arm_neon_vqsubs
4572 (v4i16 (int_arm_neon_vqrdmulh
4574 (v4i16 (ARMvduplane (v4i16 DPR_8:$Vm),
4576 (v4i16 (VQRDMLSHslv4i16 DPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane))>;
4577 def : Pat<(v2i32 (int_arm_neon_vqsubs
4579 (v2i32 (int_arm_neon_vqrdmulh
4581 (v2i32 (ARMvduplane (v2i32 DPR_VFP2:$Vm),
4583 (v2i32 (VQRDMLSHslv2i32 DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm,
4585 def : Pat<(v8i16 (int_arm_neon_vqsubs
4587 (v8i16 (int_arm_neon_vqrdmulh
4589 (v8i16 (ARMvduplane (v8i16 QPR:$src3),
4591 (v8i16 (VQRDMLSHslv8i16 (v8i16 QPR:$src1),
4593 (v4i16 (EXTRACT_SUBREG
4595 (DSubReg_i16_reg imm:$lane))),
4596 (SubReg_i16_lane imm:$lane)))>;
4597 def : Pat<(v4i32 (int_arm_neon_vqsubs
4599 (v4i32 (int_arm_neon_vqrdmulh
4601 (v4i32 (ARMvduplane (v4i32 QPR:$src3),
4603 (v4i32 (VQRDMLSHslv4i32 (v4i32 QPR:$src1),
4605 (v2i32 (EXTRACT_SUBREG
4607 (DSubReg_i32_reg imm:$lane))),
4608 (SubReg_i32_lane imm:$lane)))>;
4610 // VQDMLAL : Vector Saturating Doubling Multiply Accumulate Long (Q += D * D)
4611 defm VQDMLAL : N3VLInt3_HS<0, 1, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
4612 "vqdmlal", "s", null_frag>;
4613 defm VQDMLALsl: N3VLInt3SL_HS<0, 0b0011, "vqdmlal", "s", null_frag>;
4615 let Predicates = [HasNEON] in {
4616 def : Pat<(v4i32 (int_arm_neon_vqadds (v4i32 QPR:$src1),
4617 (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn),
4618 (v4i16 DPR:$Vm))))),
4619 (VQDMLALv4i32 QPR:$src1, DPR:$Vn, DPR:$Vm)>;
4620 def : Pat<(v2i64 (int_arm_neon_vqadds (v2i64 QPR:$src1),
4621 (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn),
4622 (v2i32 DPR:$Vm))))),
4623 (VQDMLALv2i64 QPR:$src1, DPR:$Vn, DPR:$Vm)>;
4624 def : Pat<(v4i32 (int_arm_neon_vqadds (v4i32 QPR:$src1),
4625 (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn),
4626 (v4i16 (ARMvduplane (v4i16 DPR_8:$Vm),
4628 (VQDMLALslv4i16 QPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane)>;
4629 def : Pat<(v2i64 (int_arm_neon_vqadds (v2i64 QPR:$src1),
4630 (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn),
4631 (v2i32 (ARMvduplane (v2i32 DPR_VFP2:$Vm),
4633 (VQDMLALslv2i32 QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, imm:$lane)>;
4636 // VMLS : Vector Multiply Subtract (integer and floating-point)
4637 defm VMLS : N3VMulOp_QHS<1, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
4638 IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>;
4639 def VMLSfd : N3VDMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls", "f32",
4640 v2f32, fmul_su, fsub_mlx>,
4641 Requires<[HasNEON, UseFPVMLx]>;
4642 def VMLSfq : N3VQMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACQ, "vmls", "f32",
4643 v4f32, fmul_su, fsub_mlx>,
4644 Requires<[HasNEON, UseFPVMLx]>;
4645 def VMLShd : N3VDMulOp<0, 0, 0b11, 0b1101, 1, IIC_VMACD, "vmls", "f16",
4647 Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
4648 def VMLShq : N3VQMulOp<0, 0, 0b11, 0b1101, 1, IIC_VMACQ, "vmls", "f16",
4650 Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
4651 defm VMLSsl : N3VMulOpSL_HS<0b0100, IIC_VMACi16D, IIC_VMACi32D,
4652 IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>;
4653 def VMLSslfd : N3VDMulOpSL<0b10, 0b0101, IIC_VMACD, "vmls", "f32",
4654 v2f32, fmul_su, fsub_mlx>,
4655 Requires<[HasNEON, UseFPVMLx]>;
4656 def VMLSslfq : N3VQMulOpSL<0b10, 0b0101, IIC_VMACQ, "vmls", "f32",
4657 v4f32, v2f32, fmul_su, fsub_mlx>,
4658 Requires<[HasNEON, UseFPVMLx]>;
4659 def VMLSslhd : N3VDMulOpSL16<0b01, 0b0101, IIC_VMACD, "vmls", "f16",
4661 Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
4662 def VMLSslhq : N3VQMulOpSL16<0b01, 0b0101, IIC_VMACQ, "vmls", "f16",
4663 v8f16, v4f16, fmul, fsub>,
4664 Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
4666 let Predicates = [HasNEON] in {
4667 def : Pat<(v8i16 (sub (v8i16 QPR:$src1),
4668 (mul (v8i16 QPR:$src2),
4669 (v8i16 (ARMvduplane (v8i16 QPR:$src3), imm:$lane))))),
4670 (v8i16 (VMLSslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2),
4671 (v4i16 (EXTRACT_SUBREG QPR:$src3,
4672 (DSubReg_i16_reg imm:$lane))),
4673 (SubReg_i16_lane imm:$lane)))>;
4675 def : Pat<(v4i32 (sub (v4i32 QPR:$src1),
4676 (mul (v4i32 QPR:$src2),
4677 (v4i32 (ARMvduplane (v4i32 QPR:$src3), imm:$lane))))),
4678 (v4i32 (VMLSslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2),
4679 (v2i32 (EXTRACT_SUBREG QPR:$src3,
4680 (DSubReg_i32_reg imm:$lane))),
4681 (SubReg_i32_lane imm:$lane)))>;
4684 def : Pat<(v4f32 (fsub_mlx (v4f32 QPR:$src1),
4685 (fmul_su (v4f32 QPR:$src2),
4686 (v4f32 (ARMvduplane (v4f32 QPR:$src3), imm:$lane))))),
4687 (v4f32 (VMLSslfq (v4f32 QPR:$src1), (v4f32 QPR:$src2),
4688 (v2f32 (EXTRACT_SUBREG QPR:$src3,
4689 (DSubReg_i32_reg imm:$lane))),
4690 (SubReg_i32_lane imm:$lane)))>,
4691 Requires<[HasNEON, UseFPVMLx]>;
4693 // VMLSL : Vector Multiply Subtract Long (Q -= D * D)
4694 defm VMLSLs : N3VLMulOp_QHS<0,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D,
4695 "vmlsl", "s", NEONvmulls, sub>;
4696 defm VMLSLu : N3VLMulOp_QHS<1,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D,
4697 "vmlsl", "u", NEONvmullu, sub>;
4699 defm VMLSLsls : N3VLMulOpSL_HS<0, 0b0110, "vmlsl", "s", NEONvmulls, sub>;
4700 defm VMLSLslu : N3VLMulOpSL_HS<1, 0b0110, "vmlsl", "u", NEONvmullu, sub>;
4702 // VQDMLSL : Vector Saturating Doubling Multiply Subtract Long (Q -= D * D)
4703 defm VQDMLSL : N3VLInt3_HS<0, 1, 0b1011, 0, IIC_VMACi16D, IIC_VMACi32D,
4704 "vqdmlsl", "s", null_frag>;
4705 defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b0111, "vqdmlsl", "s", null_frag>;
4707 let Predicates = [HasNEON] in {
4708 def : Pat<(v4i32 (int_arm_neon_vqsubs (v4i32 QPR:$src1),
4709 (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn),
4710 (v4i16 DPR:$Vm))))),
4711 (VQDMLSLv4i32 QPR:$src1, DPR:$Vn, DPR:$Vm)>;
4712 def : Pat<(v2i64 (int_arm_neon_vqsubs (v2i64 QPR:$src1),
4713 (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn),
4714 (v2i32 DPR:$Vm))))),
4715 (VQDMLSLv2i64 QPR:$src1, DPR:$Vn, DPR:$Vm)>;
4716 def : Pat<(v4i32 (int_arm_neon_vqsubs (v4i32 QPR:$src1),
4717 (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn),
4718 (v4i16 (ARMvduplane (v4i16 DPR_8:$Vm),
4720 (VQDMLSLslv4i16 QPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane)>;
4721 def : Pat<(v2i64 (int_arm_neon_vqsubs (v2i64 QPR:$src1),
4722 (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn),
4723 (v2i32 (ARMvduplane (v2i32 DPR_VFP2:$Vm),
4725 (VQDMLSLslv2i32 QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, imm:$lane)>;
4728 // Fused Vector Multiply-Accumulate and Fused Multiply-Subtract Operations.
4729 def VFMAfd : N3VDMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACD, "vfma", "f32",
4730 v2f32, fmul_su, fadd_mlx>,
4731 Requires<[HasNEON,HasVFP4,UseFusedMAC]>;
4733 def VFMAfq : N3VQMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACQ, "vfma", "f32",
4734 v4f32, fmul_su, fadd_mlx>,
4735 Requires<[HasNEON,HasVFP4,UseFusedMAC]>;
4736 def VFMAhd : N3VDMulOp<0, 0, 0b01, 0b1100, 1, IIC_VFMACD, "vfma", "f16",
4738 Requires<[HasNEON,HasFullFP16,UseFusedMAC]>;
4740 def VFMAhq : N3VQMulOp<0, 0, 0b01, 0b1100, 1, IIC_VFMACQ, "vfma", "f16",
4742 Requires<[HasNEON,HasFullFP16,UseFusedMAC]>;
4744 // Fused Vector Multiply Subtract (floating-point)
4745 def VFMSfd : N3VDMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACD, "vfms", "f32",
4746 v2f32, fmul_su, fsub_mlx>,
4747 Requires<[HasNEON,HasVFP4,UseFusedMAC]>;
4748 def VFMSfq : N3VQMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACQ, "vfms", "f32",
4749 v4f32, fmul_su, fsub_mlx>,
4750 Requires<[HasNEON,HasVFP4,UseFusedMAC]>;
4751 def VFMShd : N3VDMulOp<0, 0, 0b11, 0b1100, 1, IIC_VFMACD, "vfms", "f16",
4753 Requires<[HasNEON,HasFullFP16,UseFusedMAC]>;
4754 def VFMShq : N3VQMulOp<0, 0, 0b11, 0b1100, 1, IIC_VFMACQ, "vfms", "f16",
4756 Requires<[HasNEON,HasFullFP16,UseFusedMAC]>;
4758 // Match @llvm.fma.* intrinsics
4759 def : Pat<(v4f16 (fma DPR:$Vn, DPR:$Vm, DPR:$src1)),
4760 (VFMAhd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
4761 Requires<[HasNEON,HasFullFP16]>;
4762 def : Pat<(v8f16 (fma QPR:$Vn, QPR:$Vm, QPR:$src1)),
4763 (VFMAhq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
4764 Requires<[HasNEON,HasFullFP16]>;
4765 def : Pat<(v2f32 (fma DPR:$Vn, DPR:$Vm, DPR:$src1)),
4766 (VFMAfd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
4767 Requires<[HasNEON,HasVFP4]>;
4768 def : Pat<(v4f32 (fma QPR:$Vn, QPR:$Vm, QPR:$src1)),
4769 (VFMAfq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
4770 Requires<[HasNEON,HasVFP4]>;
4771 def : Pat<(v2f32 (fma (fneg DPR:$Vn), DPR:$Vm, DPR:$src1)),
4772 (VFMSfd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
4773 Requires<[HasNEON,HasVFP4]>;
4774 def : Pat<(v4f32 (fma (fneg QPR:$Vn), QPR:$Vm, QPR:$src1)),
4775 (VFMSfq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
4776 Requires<[HasNEON,HasVFP4]>;
4778 // ARMv8.2a dot product instructions.
4779 // We put them in the VFPV8 decoder namespace because the ARM and Thumb
4780 // encodings are the same and thus no further bit twiddling is necessary
4781 // in the disassembler.
4782 class VDOT<bit op6, bit op4, RegisterClass RegTy, string Asm, string AsmTy,
4783 ValueType AccumTy, ValueType InputTy,
4784 SDPatternOperator OpNode> :
4785 N3Vnp<0b11000, 0b10, 0b1101, op6, op4, (outs RegTy:$dst),
4786 (ins RegTy:$Vd, RegTy:$Vn, RegTy:$Vm), N3RegFrm, IIC_VDOTPROD,
4788 [(set (AccumTy RegTy:$dst),
4789 (OpNode (AccumTy RegTy:$Vd),
4790 (InputTy RegTy:$Vn),
4791 (InputTy RegTy:$Vm)))]> {
4792 let Predicates = [HasDotProd];
4793 let DecoderNamespace = "VFPV8";
4794 let Constraints = "$dst = $Vd";
4797 def VUDOTD : VDOT<0, 1, DPR, "vudot", "u8", v2i32, v8i8, int_arm_neon_udot>;
4798 def VSDOTD : VDOT<0, 0, DPR, "vsdot", "s8", v2i32, v8i8, int_arm_neon_sdot>;
4799 def VUDOTQ : VDOT<1, 1, QPR, "vudot", "u8", v4i32, v16i8, int_arm_neon_udot>;
4800 def VSDOTQ : VDOT<1, 0, QPR, "vsdot", "s8", v4i32, v16i8, int_arm_neon_sdot>;
4802 // Indexed dot product instructions:
4803 multiclass DOTI<string opc, string dt, bit Q, bit U, RegisterClass Ty,
4804 ValueType AccumType, ValueType InputType, SDPatternOperator OpNode,
4806 def "" : N3Vnp<0b11100, 0b10, 0b1101, Q, U, (outs Ty:$dst),
4807 (ins Ty:$Vd, Ty:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
4808 N3RegFrm, IIC_VDOTPROD, opc, dt, []> {
4811 let AsmString = !strconcat(opc, ".", dt, "\t$Vd, $Vn, $Vm$lane");
4812 let Constraints = "$dst = $Vd";
4813 let Predicates = [HasDotProd];
4814 let DecoderNamespace = "VFPV8";
4818 (AccumType (OpNode (AccumType Ty:$Vd),
4820 (InputType (bitconvert (AccumType
4821 (ARMvduplane (AccumType Ty:$Vm),
4822 VectorIndex32:$lane)))))),
4823 (!cast<Instruction>(NAME) Ty:$Vd, Ty:$Vn, RHS, VectorIndex32:$lane)>;
4826 defm VUDOTDI : DOTI<"vudot", "u8", 0b0, 0b1, DPR, v2i32, v8i8,
4827 int_arm_neon_udot, (v2i32 DPR_VFP2:$Vm)>;
4828 defm VSDOTDI : DOTI<"vsdot", "s8", 0b0, 0b0, DPR, v2i32, v8i8,
4829 int_arm_neon_sdot, (v2i32 DPR_VFP2:$Vm)>;
4830 defm VUDOTQI : DOTI<"vudot", "u8", 0b1, 0b1, QPR, v4i32, v16i8,
4831 int_arm_neon_udot, (EXTRACT_SUBREG QPR:$Vm, dsub_0)>;
4832 defm VSDOTQI : DOTI<"vsdot", "s8", 0b1, 0b0, QPR, v4i32, v16i8,
4833 int_arm_neon_sdot, (EXTRACT_SUBREG QPR:$Vm, dsub_0)>;
4836 // ARMv8.3 complex operations
4837 class BaseN3VCP8ComplexTied<bit op21, bit op4, bit s, bit q,
4838 InstrItinClass itin, dag oops, dag iops,
4839 string opc, string dt, list<dag> pattern>
4840 : N3VCP8<{?,?}, {op21,s}, q, op4, oops,
4841 iops, itin, opc, dt, "$Vd, $Vn, $Vm, $rot", "$src1 = $Vd", pattern>{
4843 let Inst{24-23} = rot;
4846 class BaseN3VCP8ComplexOdd<bit op23, bit op21, bit op4, bit s, bit q,
4847 InstrItinClass itin, dag oops, dag iops, string opc,
4848 string dt, list<dag> pattern>
4849 : N3VCP8<{?,op23}, {op21,s}, q, op4, oops,
4850 iops, itin, opc, dt, "$Vd, $Vn, $Vm, $rot", "", pattern> {
4855 class BaseN3VCP8ComplexTiedLane32<bit op4, bit s, bit q, InstrItinClass itin,
4856 dag oops, dag iops, string opc, string dt,
4858 : N3VLaneCP8<s, {?,?}, q, op4, oops, iops, itin, opc, dt,
4859 "$Vd, $Vn, $Vm$lane, $rot", "$src1 = $Vd", pattern> {
4863 let Inst{21-20} = rot;
4867 class BaseN3VCP8ComplexTiedLane64<bit op4, bit s, bit q, InstrItinClass itin,
4868 dag oops, dag iops, string opc, string dt,
4870 : N3VLaneCP8<s, {?,?}, q, op4, oops, iops, itin, opc, dt,
4871 "$Vd, $Vn, $Vm$lane, $rot", "$src1 = $Vd", pattern> {
4875 let Inst{21-20} = rot;
4876 let Inst{5} = Vm{4};
4877 // This is needed because the lane operand does not have any bits in the
4878 // encoding (it only has one possible value), so we need to manually set it
4879 // to it's default value.
4880 let DecoderMethod = "DecodeNEONComplexLane64Instruction";
4883 multiclass N3VCP8ComplexTied<bit op21, bit op4,
4884 string OpcodeStr, SDPatternOperator Op> {
4885 let Predicates = [HasNEON,HasV8_3a,HasFullFP16] in {
4886 def v4f16 : BaseN3VCP8ComplexTied<op21, op4, 0, 0, IIC_VMACD, (outs DPR:$Vd),
4887 (ins DPR:$src1, DPR:$Vn, DPR:$Vm, complexrotateop:$rot),
4888 OpcodeStr, "f16", []>;
4889 def v8f16 : BaseN3VCP8ComplexTied<op21, op4, 0, 1, IIC_VMACQ, (outs QPR:$Vd),
4890 (ins QPR:$src1, QPR:$Vn, QPR:$Vm, complexrotateop:$rot),
4891 OpcodeStr, "f16", []>;
4893 let Predicates = [HasNEON,HasV8_3a] in {
4894 def v2f32 : BaseN3VCP8ComplexTied<op21, op4, 1, 0, IIC_VMACD, (outs DPR:$Vd),
4895 (ins DPR:$src1, DPR:$Vn, DPR:$Vm, complexrotateop:$rot),
4896 OpcodeStr, "f32", []>;
4897 def v4f32 : BaseN3VCP8ComplexTied<op21, op4, 1, 1, IIC_VMACQ, (outs QPR:$Vd),
4898 (ins QPR:$src1, QPR:$Vn, QPR:$Vm, complexrotateop:$rot),
4899 OpcodeStr, "f32", []>;
4903 multiclass N3VCP8ComplexOdd<bit op23, bit op21, bit op4,
4904 string OpcodeStr, SDPatternOperator Op> {
4905 let Predicates = [HasNEON,HasV8_3a,HasFullFP16] in {
4906 def v4f16 : BaseN3VCP8ComplexOdd<op23, op21, op4, 0, 0, IIC_VMACD,
4908 (ins DPR:$Vn, DPR:$Vm, complexrotateopodd:$rot),
4909 OpcodeStr, "f16", []>;
4910 def v8f16 : BaseN3VCP8ComplexOdd<op23, op21, op4, 0, 1, IIC_VMACQ,
4912 (ins QPR:$Vn, QPR:$Vm, complexrotateopodd:$rot),
4913 OpcodeStr, "f16", []>;
4915 let Predicates = [HasNEON,HasV8_3a] in {
4916 def v2f32 : BaseN3VCP8ComplexOdd<op23, op21, op4, 1, 0, IIC_VMACD,
4918 (ins DPR:$Vn, DPR:$Vm, complexrotateopodd:$rot),
4919 OpcodeStr, "f32", []>;
4920 def v4f32 : BaseN3VCP8ComplexOdd<op23, op21, op4, 1, 1, IIC_VMACQ,
4922 (ins QPR:$Vn, QPR:$Vm, complexrotateopodd:$rot),
4923 OpcodeStr, "f32", []>;
4927 // These instructions index by pairs of lanes, so the VectorIndexes are twice
4928 // as wide as the data types.
4929 multiclass N3VCP8ComplexTiedLane<bit op4, string OpcodeStr,
4930 SDPatternOperator Op> {
4931 let Predicates = [HasNEON,HasV8_3a,HasFullFP16] in {
4932 def v4f16_indexed : BaseN3VCP8ComplexTiedLane32<op4, 0, 0, IIC_VMACD,
4934 (ins DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm,
4935 VectorIndex32:$lane, complexrotateop:$rot),
4936 OpcodeStr, "f16", []>;
4937 def v8f16_indexed : BaseN3VCP8ComplexTiedLane32<op4, 0, 1, IIC_VMACQ,
4939 (ins QPR:$src1, QPR:$Vn, DPR_VFP2:$Vm,
4940 VectorIndex32:$lane, complexrotateop:$rot),
4941 OpcodeStr, "f16", []>;
4943 let Predicates = [HasNEON,HasV8_3a] in {
4944 def v2f32_indexed : BaseN3VCP8ComplexTiedLane64<op4, 1, 0, IIC_VMACD,
4946 (ins DPR:$src1, DPR:$Vn, DPR:$Vm, VectorIndex64:$lane,
4947 complexrotateop:$rot),
4948 OpcodeStr, "f32", []>;
4949 def v4f32_indexed : BaseN3VCP8ComplexTiedLane64<op4, 1, 1, IIC_VMACQ,
4951 (ins QPR:$src1, QPR:$Vn, DPR:$Vm, VectorIndex64:$lane,
4952 complexrotateop:$rot),
4953 OpcodeStr, "f32", []>;
4957 defm VCMLA : N3VCP8ComplexTied<1, 0, "vcmla", null_frag>;
4958 defm VCADD : N3VCP8ComplexOdd<1, 0, 0, "vcadd", null_frag>;
4959 defm VCMLA : N3VCP8ComplexTiedLane<0, "vcmla", null_frag>;
4961 // Vector Subtract Operations.
4963 // VSUB : Vector Subtract (integer and floating-point)
4964 defm VSUB : N3V_QHSD<1, 0, 0b1000, 0, IIC_VSUBiD, IIC_VSUBiQ,
4965 "vsub", "i", sub, 0>;
4966 def VSUBfd : N3VD<0, 0, 0b10, 0b1101, 0, IIC_VBIND, "vsub", "f32",
4967 v2f32, v2f32, fsub, 0>;
4968 def VSUBfq : N3VQ<0, 0, 0b10, 0b1101, 0, IIC_VBINQ, "vsub", "f32",
4969 v4f32, v4f32, fsub, 0>;
4970 def VSUBhd : N3VD<0, 0, 0b11, 0b1101, 0, IIC_VBIND, "vsub", "f16",
4971 v4f16, v4f16, fsub, 0>,
4972 Requires<[HasNEON,HasFullFP16]>;
4973 def VSUBhq : N3VQ<0, 0, 0b11, 0b1101, 0, IIC_VBINQ, "vsub", "f16",
4974 v8f16, v8f16, fsub, 0>,
4975 Requires<[HasNEON,HasFullFP16]>;
4976 // VSUBL : Vector Subtract Long (Q = D - D)
4977 defm VSUBLs : N3VLExt_QHS<0,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD,
4978 "vsubl", "s", sub, sext, 0>;
4979 defm VSUBLu : N3VLExt_QHS<1,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD,
4980 "vsubl", "u", sub, zext, 0>;
4981 // VSUBW : Vector Subtract Wide (Q = Q - D)
4982 defm VSUBWs : N3VW_QHS<0,1,0b0011,0, "vsubw", "s", sub, sext, 0>;
4983 defm VSUBWu : N3VW_QHS<1,1,0b0011,0, "vsubw", "u", sub, zext, 0>;
4984 // VHSUB : Vector Halving Subtract
4985 defm VHSUBs : N3VInt_QHS<0, 0, 0b0010, 0, N3RegFrm,
4986 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
4987 "vhsub", "s", int_arm_neon_vhsubs, 0>;
4988 defm VHSUBu : N3VInt_QHS<1, 0, 0b0010, 0, N3RegFrm,
4989 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
4990 "vhsub", "u", int_arm_neon_vhsubu, 0>;
4991 // VQSUB : Vector Saturing Subtract
4992 defm VQSUBs : N3VInt_QHSD<0, 0, 0b0010, 1, N3RegFrm,
4993 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
4994 "vqsub", "s", int_arm_neon_vqsubs, 0>;
4995 defm VQSUBu : N3VInt_QHSD<1, 0, 0b0010, 1, N3RegFrm,
4996 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
4997 "vqsub", "u", int_arm_neon_vqsubu, 0>;
4998 // VSUBHN : Vector Subtract and Narrow Returning High Half (D = Q - Q)
4999 defm VSUBHN : N3VNInt_HSD<0,1,0b0110,0, "vsubhn", "i", null_frag, 0>;
5000 // VRSUBHN : Vector Rounding Subtract and Narrow Returning High Half (D=Q-Q)
5001 defm VRSUBHN : N3VNInt_HSD<1,1,0b0110,0, "vrsubhn", "i",
5002 int_arm_neon_vrsubhn, 0>;
5004 let Predicates = [HasNEON] in {
5005 def : Pat<(v8i8 (trunc (ARMvshruImm (sub (v8i16 QPR:$Vn), QPR:$Vm), 8))),
5006 (VSUBHNv8i8 QPR:$Vn, QPR:$Vm)>;
5007 def : Pat<(v4i16 (trunc (ARMvshruImm (sub (v4i32 QPR:$Vn), QPR:$Vm), 16))),
5008 (VSUBHNv4i16 QPR:$Vn, QPR:$Vm)>;
5009 def : Pat<(v2i32 (trunc (ARMvshruImm (sub (v2i64 QPR:$Vn), QPR:$Vm), 32))),
5010 (VSUBHNv2i32 QPR:$Vn, QPR:$Vm)>;
5013 // Vector Comparisons.
5015 // VCEQ : Vector Compare Equal
5016 defm VCEQ : N3V_QHS<1, 0, 0b1000, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
5017 IIC_VSUBi4Q, "vceq", "i", ARMvceq, 1>;
5018 def VCEQfd : N3VD<0,0,0b00,0b1110,0, IIC_VBIND, "vceq", "f32", v2i32, v2f32,
5020 def VCEQfq : N3VQ<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq", "f32", v4i32, v4f32,
5022 def VCEQhd : N3VD<0,0,0b01,0b1110,0, IIC_VBIND, "vceq", "f16", v4i16, v4f16,
5024 Requires<[HasNEON, HasFullFP16]>;
5025 def VCEQhq : N3VQ<0,0,0b01,0b1110,0, IIC_VBINQ, "vceq", "f16", v8i16, v8f16,
5027 Requires<[HasNEON, HasFullFP16]>;
5029 let TwoOperandAliasConstraint = "$Vm = $Vd" in
5030 defm VCEQz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00010, 0, "vceq", "i",
5031 "$Vd, $Vm, #0", ARMvceqz>;
5033 // VCGE : Vector Compare Greater Than or Equal
5034 defm VCGEs : N3V_QHS<0, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
5035 IIC_VSUBi4Q, "vcge", "s", ARMvcge, 0>;
5036 defm VCGEu : N3V_QHS<1, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
5037 IIC_VSUBi4Q, "vcge", "u", ARMvcgeu, 0>;
5038 def VCGEfd : N3VD<1,0,0b00,0b1110,0, IIC_VBIND, "vcge", "f32", v2i32, v2f32,
5040 def VCGEfq : N3VQ<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge", "f32", v4i32, v4f32,
5042 def VCGEhd : N3VD<1,0,0b01,0b1110,0, IIC_VBIND, "vcge", "f16", v4i16, v4f16,
5044 Requires<[HasNEON, HasFullFP16]>;
5045 def VCGEhq : N3VQ<1,0,0b01,0b1110,0, IIC_VBINQ, "vcge", "f16", v8i16, v8f16,
5047 Requires<[HasNEON, HasFullFP16]>;
5049 let TwoOperandAliasConstraint = "$Vm = $Vd" in {
5050 defm VCGEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00001, 0, "vcge", "s",
5051 "$Vd, $Vm, #0", ARMvcgez>;
5052 defm VCLEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00011, 0, "vcle", "s",
5053 "$Vd, $Vm, #0", ARMvclez>;
5056 // VCGT : Vector Compare Greater Than
5057 defm VCGTs : N3V_QHS<0, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
5058 IIC_VSUBi4Q, "vcgt", "s", ARMvcgt, 0>;
5059 defm VCGTu : N3V_QHS<1, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
5060 IIC_VSUBi4Q, "vcgt", "u", ARMvcgtu, 0>;
5061 def VCGTfd : N3VD<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt", "f32", v2i32, v2f32,
5063 def VCGTfq : N3VQ<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt", "f32", v4i32, v4f32,
5065 def VCGThd : N3VD<1,0,0b11,0b1110,0, IIC_VBIND, "vcgt", "f16", v4i16, v4f16,
5067 Requires<[HasNEON, HasFullFP16]>;
5068 def VCGThq : N3VQ<1,0,0b11,0b1110,0, IIC_VBINQ, "vcgt", "f16", v8i16, v8f16,
5070 Requires<[HasNEON, HasFullFP16]>;
5072 let TwoOperandAliasConstraint = "$Vm = $Vd" in {
5073 defm VCGTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00000, 0, "vcgt", "s",
5074 "$Vd, $Vm, #0", ARMvcgtz>;
5075 defm VCLTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00100, 0, "vclt", "s",
5076 "$Vd, $Vm, #0", ARMvcltz>;
5079 // VACGE : Vector Absolute Compare Greater Than or Equal (aka VCAGE)
5080 def VACGEfd : N3VDInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge",
5081 "f32", v2i32, v2f32, int_arm_neon_vacge, 0>;
5082 def VACGEfq : N3VQInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacge",
5083 "f32", v4i32, v4f32, int_arm_neon_vacge, 0>;
5084 def VACGEhd : N3VDInt<1, 0, 0b01, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge",
5085 "f16", v4i16, v4f16, int_arm_neon_vacge, 0>,
5086 Requires<[HasNEON, HasFullFP16]>;
5087 def VACGEhq : N3VQInt<1, 0, 0b01, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacge",
5088 "f16", v8i16, v8f16, int_arm_neon_vacge, 0>,
5089 Requires<[HasNEON, HasFullFP16]>;
5090 // VACGT : Vector Absolute Compare Greater Than (aka VCAGT)
5091 def VACGTfd : N3VDInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacgt",
5092 "f32", v2i32, v2f32, int_arm_neon_vacgt, 0>;
5093 def VACGTfq : N3VQInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt",
5094 "f32", v4i32, v4f32, int_arm_neon_vacgt, 0>;
5095 def VACGThd : N3VDInt<1, 0, 0b11, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacgt",
5096 "f16", v4i16, v4f16, int_arm_neon_vacgt, 0>,
5097 Requires<[HasNEON, HasFullFP16]>;
5098 def VACGThq : N3VQInt<1, 0, 0b11, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt",
5099 "f16", v8i16, v8f16, int_arm_neon_vacgt, 0>,
5100 Requires<[HasNEON, HasFullFP16]>;
5101 // VTST : Vector Test Bits
5102 defm VTST : N3V_QHS<0, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
5103 IIC_VBINi4Q, "vtst", "", NEONvtst, 1>;
5105 def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vn, $Vm",
5106 (VACGTfd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>;
5107 def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vn, $Vm",
5108 (VACGTfq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>;
5109 def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vn, $Vm",
5110 (VACGEfd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>;
5111 def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vn, $Vm",
5112 (VACGEfq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>;
5113 let Predicates = [HasNEON, HasFullFP16] in {
5114 def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vn, $Vm",
5115 (VACGThd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>;
5116 def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vn, $Vm",
5117 (VACGThq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>;
5118 def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vn, $Vm",
5119 (VACGEhd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>;
5120 def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vn, $Vm",
5121 (VACGEhq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>;
5124 // +fp16fml Floating Point Multiplication Variants
5125 let Predicates = [HasNEON, HasFP16FML], DecoderNamespace= "VFPV8" in {
5127 class N3VCP8F16Q1<string asm, RegisterClass Td, RegisterClass Tn,
5128 RegisterClass Tm, bits<2> op1, bits<2> op2, bit op3>
5129 : N3VCP8<op1, op2, 1, op3, (outs Td:$Vd), (ins Tn:$Vn, Tm:$Vm), NoItinerary,
5130 asm, "f16", "$Vd, $Vn, $Vm", "", []>;
5132 class N3VCP8F16Q0<string asm, RegisterClass Td, RegisterClass Tn,
5133 RegisterClass Tm, bits<2> op1, bits<2> op2, bit op3>
5134 : N3VCP8Q0<op1, op2, 0, op3, (outs Td:$Vd), (ins Tn:$Vn, Tm:$Vm), NoItinerary,
5135 asm, "f16", "$Vd, $Vn, $Vm", "", []>;
5137 // Vd, Vs, Vs[0-15], Idx[0-1]
5138 class VFMD<string opc, string type, bits<2> S>
5139 : N3VLaneCP8<0, S, 0, 1, (outs DPR:$Vd),
5140 (ins SPR:$Vn, SPR_8:$Vm, VectorIndex32:$idx),
5141 IIC_VMACD, opc, type, "$Vd, $Vn, $Vm$idx", "", []> {
5144 let Inst{19-16} = Vn{4-1};
5145 let Inst{7} = Vn{0};
5146 let Inst{5} = Vm{0};
5147 let Inst{2-0} = Vm{3-1};
5150 // Vq, Vd, Vd[0-7], Idx[0-3]
5151 class VFMQ<string opc, string type, bits<2> S>
5152 : N3VLaneCP8<0, S, 1, 1, (outs QPR:$Vd),
5153 (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$idx),
5154 IIC_VMACD, opc, type, "$Vd, $Vn, $Vm$idx", "", []> {
5156 let Inst{5} = idx{1};
5157 let Inst{3} = idx{0};
5160 let hasNoSchedulingInfo = 1 in {
5162 def VFMALD : N3VCP8F16Q0<"vfmal", DPR, SPR, SPR, 0b00, 0b10, 1>;
5163 def VFMSLD : N3VCP8F16Q0<"vfmsl", DPR, SPR, SPR, 0b01, 0b10, 1>;
5164 def VFMALQ : N3VCP8F16Q1<"vfmal", QPR, DPR, DPR, 0b00, 0b10, 1>;
5165 def VFMSLQ : N3VCP8F16Q1<"vfmsl", QPR, DPR, DPR, 0b01, 0b10, 1>;
5166 def VFMALDI : VFMD<"vfmal", "f16", 0b00>;
5167 def VFMSLDI : VFMD<"vfmsl", "f16", 0b01>;
5168 def VFMALQI : VFMQ<"vfmal", "f16", 0b00>;
5169 def VFMSLQI : VFMQ<"vfmsl", "f16", 0b01>;
5171 } // HasNEON, HasFP16FML
5174 def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vm",
5175 (VACGTfd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>;
5176 def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vm",
5177 (VACGTfq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>;
5178 def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vm",
5179 (VACGEfd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>;
5180 def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vm",
5181 (VACGEfq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>;
5182 let Predicates = [HasNEON, HasFullFP16] in {
5183 def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vm",
5184 (VACGThd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>;
5185 def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vm",
5186 (VACGThq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>;
5187 def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vm",
5188 (VACGEhd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>;
5189 def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vm",
5190 (VACGEhq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>;
5193 // Vector Bitwise Operations.
5195 def vnotd : PatFrag<(ops node:$in),
5196 (xor node:$in, (bitconvert (v8i8 NEONimmAllOnesV)))>;
5197 def vnotq : PatFrag<(ops node:$in),
5198 (xor node:$in, (bitconvert (v16i8 NEONimmAllOnesV)))>;
5201 // VAND : Vector Bitwise AND
5202 def VANDd : N3VDX<0, 0, 0b00, 0b0001, 1, IIC_VBINiD, "vand",
5203 v2i32, v2i32, and, 1>;
5204 def VANDq : N3VQX<0, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "vand",
5205 v4i32, v4i32, and, 1>;
5207 // VEOR : Vector Bitwise Exclusive OR
5208 def VEORd : N3VDX<1, 0, 0b00, 0b0001, 1, IIC_VBINiD, "veor",
5209 v2i32, v2i32, xor, 1>;
5210 def VEORq : N3VQX<1, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "veor",
5211 v4i32, v4i32, xor, 1>;
5213 // VORR : Vector Bitwise OR
5214 def VORRd : N3VDX<0, 0, 0b10, 0b0001, 1, IIC_VBINiD, "vorr",
5215 v2i32, v2i32, or, 1>;
5216 def VORRq : N3VQX<0, 0, 0b10, 0b0001, 1, IIC_VBINiQ, "vorr",
5217 v4i32, v4i32, or, 1>;
5219 def VORRiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 0, 1,
5220 (outs DPR:$Vd), (ins nImmSplatI16:$SIMM, DPR:$src),
5222 "vorr", "i16", "$Vd, $SIMM", "$src = $Vd",
5224 (v4i16 (NEONvorrImm DPR:$src, timm:$SIMM)))]> {
5225 let Inst{9} = SIMM{9};
5228 def VORRiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 0, 1,
5229 (outs DPR:$Vd), (ins nImmSplatI32:$SIMM, DPR:$src),
5231 "vorr", "i32", "$Vd, $SIMM", "$src = $Vd",
5233 (v2i32 (NEONvorrImm DPR:$src, timm:$SIMM)))]> {
5234 let Inst{10-9} = SIMM{10-9};
5237 def VORRiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 0, 1,
5238 (outs QPR:$Vd), (ins nImmSplatI16:$SIMM, QPR:$src),
5240 "vorr", "i16", "$Vd, $SIMM", "$src = $Vd",
5242 (v8i16 (NEONvorrImm QPR:$src, timm:$SIMM)))]> {
5243 let Inst{9} = SIMM{9};
5246 def VORRiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 0, 1,
5247 (outs QPR:$Vd), (ins nImmSplatI32:$SIMM, QPR:$src),
5249 "vorr", "i32", "$Vd, $SIMM", "$src = $Vd",
5251 (v4i32 (NEONvorrImm QPR:$src, timm:$SIMM)))]> {
5252 let Inst{10-9} = SIMM{10-9};
5256 // VBIC : Vector Bitwise Bit Clear (AND NOT)
5257 let TwoOperandAliasConstraint = "$Vn = $Vd" in {
5258 def VBICd : N3VX<0, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd),
5259 (ins DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VBINiD,
5260 "vbic", "$Vd, $Vn, $Vm", "",
5261 [(set DPR:$Vd, (v2i32 (and DPR:$Vn,
5262 (vnotd DPR:$Vm))))]>;
5263 def VBICq : N3VX<0, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd),
5264 (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINiQ,
5265 "vbic", "$Vd, $Vn, $Vm", "",
5266 [(set QPR:$Vd, (v4i32 (and QPR:$Vn,
5267 (vnotq QPR:$Vm))))]>;
5270 def VBICiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 1, 1,
5271 (outs DPR:$Vd), (ins nImmSplatI16:$SIMM, DPR:$src),
5273 "vbic", "i16", "$Vd, $SIMM", "$src = $Vd",
5275 (v4i16 (NEONvbicImm DPR:$src, timm:$SIMM)))]> {
5276 let Inst{9} = SIMM{9};
5279 def VBICiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 1, 1,
5280 (outs DPR:$Vd), (ins nImmSplatI32:$SIMM, DPR:$src),
5282 "vbic", "i32", "$Vd, $SIMM", "$src = $Vd",
5284 (v2i32 (NEONvbicImm DPR:$src, timm:$SIMM)))]> {
5285 let Inst{10-9} = SIMM{10-9};
5288 def VBICiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 1, 1,
5289 (outs QPR:$Vd), (ins nImmSplatI16:$SIMM, QPR:$src),
5291 "vbic", "i16", "$Vd, $SIMM", "$src = $Vd",
5293 (v8i16 (NEONvbicImm QPR:$src, timm:$SIMM)))]> {
5294 let Inst{9} = SIMM{9};
5297 def VBICiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 1, 1,
5298 (outs QPR:$Vd), (ins nImmSplatI32:$SIMM, QPR:$src),
5300 "vbic", "i32", "$Vd, $SIMM", "$src = $Vd",
5302 (v4i32 (NEONvbicImm QPR:$src, timm:$SIMM)))]> {
5303 let Inst{10-9} = SIMM{10-9};
5306 // VORN : Vector Bitwise OR NOT
5307 def VORNd : N3VX<0, 0, 0b11, 0b0001, 0, 1, (outs DPR:$Vd),
5308 (ins DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VBINiD,
5309 "vorn", "$Vd, $Vn, $Vm", "",
5310 [(set DPR:$Vd, (v2i32 (or DPR:$Vn,
5311 (vnotd DPR:$Vm))))]>;
5312 def VORNq : N3VX<0, 0, 0b11, 0b0001, 1, 1, (outs QPR:$Vd),
5313 (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINiQ,
5314 "vorn", "$Vd, $Vn, $Vm", "",
5315 [(set QPR:$Vd, (v4i32 (or QPR:$Vn,
5316 (vnotq QPR:$Vm))))]>;
5318 // VMVN : Vector Bitwise NOT (Immediate)
5320 let isReMaterializable = 1 in {
5322 def VMVNv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 1, 1, (outs DPR:$Vd),
5323 (ins nImmSplatI16:$SIMM), IIC_VMOVImm,
5324 "vmvn", "i16", "$Vd, $SIMM", "",
5325 [(set DPR:$Vd, (v4i16 (ARMvmvnImm timm:$SIMM)))]> {
5326 let Inst{9} = SIMM{9};
5329 def VMVNv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 1, 1, (outs QPR:$Vd),
5330 (ins nImmSplatI16:$SIMM), IIC_VMOVImm,
5331 "vmvn", "i16", "$Vd, $SIMM", "",
5332 [(set QPR:$Vd, (v8i16 (ARMvmvnImm timm:$SIMM)))]> {
5333 let Inst{9} = SIMM{9};
5336 def VMVNv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 1, 1, (outs DPR:$Vd),
5337 (ins nImmVMOVI32:$SIMM), IIC_VMOVImm,
5338 "vmvn", "i32", "$Vd, $SIMM", "",
5339 [(set DPR:$Vd, (v2i32 (ARMvmvnImm timm:$SIMM)))]> {
5340 let Inst{11-8} = SIMM{11-8};
5343 def VMVNv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 1, 1, (outs QPR:$Vd),
5344 (ins nImmVMOVI32:$SIMM), IIC_VMOVImm,
5345 "vmvn", "i32", "$Vd, $SIMM", "",
5346 [(set QPR:$Vd, (v4i32 (ARMvmvnImm timm:$SIMM)))]> {
5347 let Inst{11-8} = SIMM{11-8};
5351 // VMVN : Vector Bitwise NOT
5352 def VMVNd : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 0, 0,
5353 (outs DPR:$Vd), (ins DPR:$Vm), IIC_VSUBiD,
5354 "vmvn", "$Vd, $Vm", "",
5355 [(set DPR:$Vd, (v2i32 (vnotd DPR:$Vm)))]>;
5356 def VMVNq : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 1, 0,
5357 (outs QPR:$Vd), (ins QPR:$Vm), IIC_VSUBiD,
5358 "vmvn", "$Vd, $Vm", "",
5359 [(set QPR:$Vd, (v4i32 (vnotq QPR:$Vm)))]>;
5360 let Predicates = [HasNEON] in {
5361 def : Pat<(v2i32 (vnotd DPR:$src)), (VMVNd DPR:$src)>;
5362 def : Pat<(v4i32 (vnotq QPR:$src)), (VMVNq QPR:$src)>;
5365 // VBSL : Vector Bitwise Select
5366 def VBSLd : N3VX<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd),
5367 (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
5368 N3RegFrm, IIC_VCNTiD,
5369 "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd",
5371 (v2i32 (NEONvbsl DPR:$src1, DPR:$Vn, DPR:$Vm)))]>;
5372 let Predicates = [HasNEON] in {
5373 def : Pat<(v8i8 (int_arm_neon_vbsl (v8i8 DPR:$src1),
5374 (v8i8 DPR:$Vn), (v8i8 DPR:$Vm))),
5375 (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
5376 def : Pat<(v4i16 (int_arm_neon_vbsl (v4i16 DPR:$src1),
5377 (v4i16 DPR:$Vn), (v4i16 DPR:$Vm))),
5378 (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
5379 def : Pat<(v2i32 (int_arm_neon_vbsl (v2i32 DPR:$src1),
5380 (v2i32 DPR:$Vn), (v2i32 DPR:$Vm))),
5381 (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
5382 def : Pat<(v2f32 (int_arm_neon_vbsl (v2f32 DPR:$src1),
5383 (v2f32 DPR:$Vn), (v2f32 DPR:$Vm))),
5384 (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
5385 def : Pat<(v1i64 (int_arm_neon_vbsl (v1i64 DPR:$src1),
5386 (v1i64 DPR:$Vn), (v1i64 DPR:$Vm))),
5387 (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
5389 def : Pat<(v2i32 (or (and DPR:$Vn, DPR:$Vd),
5390 (and DPR:$Vm, (vnotd DPR:$Vd)))),
5391 (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm)>;
5393 def : Pat<(v1i64 (or (and DPR:$Vn, DPR:$Vd),
5394 (and DPR:$Vm, (vnotd DPR:$Vd)))),
5395 (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm)>;
5398 def VBSLq : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd),
5399 (ins QPR:$src1, QPR:$Vn, QPR:$Vm),
5400 N3RegFrm, IIC_VCNTiQ,
5401 "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd",
5403 (v4i32 (NEONvbsl QPR:$src1, QPR:$Vn, QPR:$Vm)))]>;
5405 let Predicates = [HasNEON] in {
5406 def : Pat<(v16i8 (int_arm_neon_vbsl (v16i8 QPR:$src1),
5407 (v16i8 QPR:$Vn), (v16i8 QPR:$Vm))),
5408 (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
5409 def : Pat<(v8i16 (int_arm_neon_vbsl (v8i16 QPR:$src1),
5410 (v8i16 QPR:$Vn), (v8i16 QPR:$Vm))),
5411 (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
5412 def : Pat<(v4i32 (int_arm_neon_vbsl (v4i32 QPR:$src1),
5413 (v4i32 QPR:$Vn), (v4i32 QPR:$Vm))),
5414 (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
5415 def : Pat<(v4f32 (int_arm_neon_vbsl (v4f32 QPR:$src1),
5416 (v4f32 QPR:$Vn), (v4f32 QPR:$Vm))),
5417 (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
5418 def : Pat<(v2i64 (int_arm_neon_vbsl (v2i64 QPR:$src1),
5419 (v2i64 QPR:$Vn), (v2i64 QPR:$Vm))),
5420 (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
5422 def : Pat<(v4i32 (or (and QPR:$Vn, QPR:$Vd),
5423 (and QPR:$Vm, (vnotq QPR:$Vd)))),
5424 (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm)>;
5425 def : Pat<(v2i64 (or (and QPR:$Vn, QPR:$Vd),
5426 (and QPR:$Vm, (vnotq QPR:$Vd)))),
5427 (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm)>;
5430 // VBIF : Vector Bitwise Insert if False
5431 // like VBSL but with: "vbif $dst, $src3, $src1", "$src2 = $dst",
5432 // FIXME: This instruction's encoding MAY NOT BE correct.
5433 def VBIFd : N3VX<1, 0, 0b11, 0b0001, 0, 1,
5434 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
5435 N3RegFrm, IIC_VBINiD,
5436 "vbif", "$Vd, $Vn, $Vm", "$src1 = $Vd",
5438 def VBIFq : N3VX<1, 0, 0b11, 0b0001, 1, 1,
5439 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm),
5440 N3RegFrm, IIC_VBINiQ,
5441 "vbif", "$Vd, $Vn, $Vm", "$src1 = $Vd",
5444 // VBIT : Vector Bitwise Insert if True
5445 // like VBSL but with: "vbit $dst, $src2, $src1", "$src3 = $dst",
5446 // FIXME: This instruction's encoding MAY NOT BE correct.
5447 def VBITd : N3VX<1, 0, 0b10, 0b0001, 0, 1,
5448 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
5449 N3RegFrm, IIC_VBINiD,
5450 "vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd",
5452 def VBITq : N3VX<1, 0, 0b10, 0b0001, 1, 1,
5453 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm),
5454 N3RegFrm, IIC_VBINiQ,
5455 "vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd",
5458 // VBIT/VBIF are not yet implemented. The TwoAddress pass will not go looking
5459 // for equivalent operations with different register constraints; it just
5462 // Vector Absolute Differences.
5464 // VABD : Vector Absolute Difference
5465 defm VABDs : N3VInt_QHS<0, 0, 0b0111, 0, N3RegFrm,
5466 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
5467 "vabd", "s", int_arm_neon_vabds, 1>;
5468 defm VABDu : N3VInt_QHS<1, 0, 0b0111, 0, N3RegFrm,
5469 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
5470 "vabd", "u", int_arm_neon_vabdu, 1>;
5471 def VABDfd : N3VDInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBIND,
5472 "vabd", "f32", v2f32, v2f32, int_arm_neon_vabds, 1>;
5473 def VABDfq : N3VQInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBINQ,
5474 "vabd", "f32", v4f32, v4f32, int_arm_neon_vabds, 1>;
5475 def VABDhd : N3VDInt<1, 0, 0b11, 0b1101, 0, N3RegFrm, IIC_VBIND,
5476 "vabd", "f16", v4f16, v4f16, int_arm_neon_vabds, 1>,
5477 Requires<[HasNEON, HasFullFP16]>;
5478 def VABDhq : N3VQInt<1, 0, 0b11, 0b1101, 0, N3RegFrm, IIC_VBINQ,
5479 "vabd", "f16", v8f16, v8f16, int_arm_neon_vabds, 1>,
5480 Requires<[HasNEON, HasFullFP16]>;
5482 // VABDL : Vector Absolute Difference Long (Q = | D - D |)
5483 defm VABDLs : N3VLIntExt_QHS<0,1,0b0111,0, IIC_VSUBi4Q,
5484 "vabdl", "s", int_arm_neon_vabds, zext, 1>;
5485 defm VABDLu : N3VLIntExt_QHS<1,1,0b0111,0, IIC_VSUBi4Q,
5486 "vabdl", "u", int_arm_neon_vabdu, zext, 1>;
5488 let Predicates = [HasNEON] in {
5489 def : Pat<(v8i16 (abs (sub (zext (v8i8 DPR:$opA)), (zext (v8i8 DPR:$opB))))),
5490 (VABDLuv8i16 DPR:$opA, DPR:$opB)>;
5491 def : Pat<(v4i32 (abs (sub (zext (v4i16 DPR:$opA)), (zext (v4i16 DPR:$opB))))),
5492 (VABDLuv4i32 DPR:$opA, DPR:$opB)>;
5495 // ISD::ABS is not legal for v2i64, so VABDL needs to be matched from the
5496 // shift/xor pattern for ABS.
5499 PatFrag<(ops node:$in1, node:$in2, node:$shift),
5500 (ARMvshrsImm (sub (zext node:$in1),
5501 (zext node:$in2)), (i32 $shift))>;
5503 let Predicates = [HasNEON] in {
5504 def : Pat<(xor (v4i32 (bitconvert (v2i64 (abd_shr (v2i32 DPR:$opA), (v2i32 DPR:$opB), 63)))),
5505 (v4i32 (bitconvert (v2i64 (add (sub (zext (v2i32 DPR:$opA)),
5506 (zext (v2i32 DPR:$opB))),
5507 (abd_shr (v2i32 DPR:$opA), (v2i32 DPR:$opB), 63)))))),
5508 (VABDLuv2i64 DPR:$opA, DPR:$opB)>;
5511 // VABA : Vector Absolute Difference and Accumulate
5512 defm VABAs : N3VIntOp_QHS<0,0,0b0111,1, IIC_VABAD, IIC_VABAQ,
5513 "vaba", "s", int_arm_neon_vabds, add>;
5514 defm VABAu : N3VIntOp_QHS<1,0,0b0111,1, IIC_VABAD, IIC_VABAQ,
5515 "vaba", "u", int_arm_neon_vabdu, add>;
5517 // VABAL : Vector Absolute Difference and Accumulate Long (Q += | D - D |)
5518 defm VABALs : N3VLIntExtOp_QHS<0,1,0b0101,0, IIC_VABAD,
5519 "vabal", "s", int_arm_neon_vabds, zext, add>;
5520 defm VABALu : N3VLIntExtOp_QHS<1,1,0b0101,0, IIC_VABAD,
5521 "vabal", "u", int_arm_neon_vabdu, zext, add>;
5523 // Vector Maximum and Minimum.
5525 // VMAX : Vector Maximum
5526 defm VMAXs : N3VInt_QHS<0, 0, 0b0110, 0, N3RegFrm,
5527 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
5528 "vmax", "s", smax, 1>;
5529 defm VMAXu : N3VInt_QHS<1, 0, 0b0110, 0, N3RegFrm,
5530 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
5531 "vmax", "u", umax, 1>;
5532 def VMAXfd : N3VDInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBIND,
5534 v2f32, v2f32, fmaximum, 1>;
5535 def VMAXfq : N3VQInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBINQ,
5537 v4f32, v4f32, fmaximum, 1>;
5538 def VMAXhd : N3VDInt<0, 0, 0b01, 0b1111, 0, N3RegFrm, IIC_VBIND,
5540 v4f16, v4f16, fmaximum, 1>,
5541 Requires<[HasNEON, HasFullFP16]>;
5542 def VMAXhq : N3VQInt<0, 0, 0b01, 0b1111, 0, N3RegFrm, IIC_VBINQ,
5544 v8f16, v8f16, fmaximum, 1>,
5545 Requires<[HasNEON, HasFullFP16]>;
5548 let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in {
5549 def NEON_VMAXNMNDf : N3VDIntnp<0b00110, 0b00, 0b1111, 0, 1,
5550 N3RegFrm, NoItinerary, "vmaxnm", "f32",
5551 v2f32, v2f32, fmaxnum, 1>,
5552 Requires<[HasV8, HasNEON]>;
5553 def NEON_VMAXNMNQf : N3VQIntnp<0b00110, 0b00, 0b1111, 1, 1,
5554 N3RegFrm, NoItinerary, "vmaxnm", "f32",
5555 v4f32, v4f32, fmaxnum, 1>,
5556 Requires<[HasV8, HasNEON]>;
5557 def NEON_VMAXNMNDh : N3VDIntnp<0b00110, 0b01, 0b1111, 0, 1,
5558 N3RegFrm, NoItinerary, "vmaxnm", "f16",
5559 v4f16, v4f16, fmaxnum, 1>,
5560 Requires<[HasV8, HasNEON, HasFullFP16]>;
5561 def NEON_VMAXNMNQh : N3VQIntnp<0b00110, 0b01, 0b1111, 1, 1,
5562 N3RegFrm, NoItinerary, "vmaxnm", "f16",
5563 v8f16, v8f16, fmaxnum, 1>,
5564 Requires<[HasV8, HasNEON, HasFullFP16]>;
5567 // VMIN : Vector Minimum
5568 defm VMINs : N3VInt_QHS<0, 0, 0b0110, 1, N3RegFrm,
5569 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
5570 "vmin", "s", smin, 1>;
5571 defm VMINu : N3VInt_QHS<1, 0, 0b0110, 1, N3RegFrm,
5572 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
5573 "vmin", "u", umin, 1>;
5574 def VMINfd : N3VDInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBIND,
5576 v2f32, v2f32, fminimum, 1>;
5577 def VMINfq : N3VQInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBINQ,
5579 v4f32, v4f32, fminimum, 1>;
5580 def VMINhd : N3VDInt<0, 0, 0b11, 0b1111, 0, N3RegFrm, IIC_VBIND,
5582 v4f16, v4f16, fminimum, 1>,
5583 Requires<[HasNEON, HasFullFP16]>;
5584 def VMINhq : N3VQInt<0, 0, 0b11, 0b1111, 0, N3RegFrm, IIC_VBINQ,
5586 v8f16, v8f16, fminimum, 1>,
5587 Requires<[HasNEON, HasFullFP16]>;
5590 let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in {
5591 def NEON_VMINNMNDf : N3VDIntnp<0b00110, 0b10, 0b1111, 0, 1,
5592 N3RegFrm, NoItinerary, "vminnm", "f32",
5593 v2f32, v2f32, fminnum, 1>,
5594 Requires<[HasV8, HasNEON]>;
5595 def NEON_VMINNMNQf : N3VQIntnp<0b00110, 0b10, 0b1111, 1, 1,
5596 N3RegFrm, NoItinerary, "vminnm", "f32",
5597 v4f32, v4f32, fminnum, 1>,
5598 Requires<[HasV8, HasNEON]>;
5599 def NEON_VMINNMNDh : N3VDIntnp<0b00110, 0b11, 0b1111, 0, 1,
5600 N3RegFrm, NoItinerary, "vminnm", "f16",
5601 v4f16, v4f16, fminnum, 1>,
5602 Requires<[HasV8, HasNEON, HasFullFP16]>;
5603 def NEON_VMINNMNQh : N3VQIntnp<0b00110, 0b11, 0b1111, 1, 1,
5604 N3RegFrm, NoItinerary, "vminnm", "f16",
5605 v8f16, v8f16, fminnum, 1>,
5606 Requires<[HasV8, HasNEON, HasFullFP16]>;
5609 // Vector Pairwise Operations.
5611 // VPADD : Vector Pairwise Add
5612 def VPADDi8 : N3VDInt<0, 0, 0b00, 0b1011, 1, N3RegFrm, IIC_VSHLiD,
5614 v8i8, v8i8, int_arm_neon_vpadd, 0>;
5615 def VPADDi16 : N3VDInt<0, 0, 0b01, 0b1011, 1, N3RegFrm, IIC_VSHLiD,
5617 v4i16, v4i16, int_arm_neon_vpadd, 0>;
5618 def VPADDi32 : N3VDInt<0, 0, 0b10, 0b1011, 1, N3RegFrm, IIC_VSHLiD,
5620 v2i32, v2i32, int_arm_neon_vpadd, 0>;
5621 def VPADDf : N3VDInt<1, 0, 0b00, 0b1101, 0, N3RegFrm,
5622 IIC_VPBIND, "vpadd", "f32",
5623 v2f32, v2f32, int_arm_neon_vpadd, 0>;
5624 def VPADDh : N3VDInt<1, 0, 0b01, 0b1101, 0, N3RegFrm,
5625 IIC_VPBIND, "vpadd", "f16",
5626 v4f16, v4f16, int_arm_neon_vpadd, 0>,
5627 Requires<[HasNEON, HasFullFP16]>;
5629 // VPADDL : Vector Pairwise Add Long
5630 defm VPADDLs : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00100, 0, "vpaddl", "s",
5631 int_arm_neon_vpaddls>;
5632 defm VPADDLu : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00101, 0, "vpaddl", "u",
5633 int_arm_neon_vpaddlu>;
5635 // VPADAL : Vector Pairwise Add and Accumulate Long
5636 defm VPADALs : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01100, 0, "vpadal", "s",
5637 int_arm_neon_vpadals>;
5638 defm VPADALu : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01101, 0, "vpadal", "u",
5639 int_arm_neon_vpadalu>;
5641 // VPMAX : Vector Pairwise Maximum
5642 def VPMAXs8 : N3VDInt<0, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
5643 "s8", v8i8, v8i8, int_arm_neon_vpmaxs, 0>;
5644 def VPMAXs16 : N3VDInt<0, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
5645 "s16", v4i16, v4i16, int_arm_neon_vpmaxs, 0>;
5646 def VPMAXs32 : N3VDInt<0, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
5647 "s32", v2i32, v2i32, int_arm_neon_vpmaxs, 0>;
5648 def VPMAXu8 : N3VDInt<1, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
5649 "u8", v8i8, v8i8, int_arm_neon_vpmaxu, 0>;
5650 def VPMAXu16 : N3VDInt<1, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
5651 "u16", v4i16, v4i16, int_arm_neon_vpmaxu, 0>;
5652 def VPMAXu32 : N3VDInt<1, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
5653 "u32", v2i32, v2i32, int_arm_neon_vpmaxu, 0>;
5654 def VPMAXf : N3VDInt<1, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmax",
5655 "f32", v2f32, v2f32, int_arm_neon_vpmaxs, 0>;
5656 def VPMAXh : N3VDInt<1, 0, 0b01, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmax",
5657 "f16", v4f16, v4f16, int_arm_neon_vpmaxs, 0>,
5658 Requires<[HasNEON, HasFullFP16]>;
5660 // VPMIN : Vector Pairwise Minimum
5661 def VPMINs8 : N3VDInt<0, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
5662 "s8", v8i8, v8i8, int_arm_neon_vpmins, 0>;
5663 def VPMINs16 : N3VDInt<0, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
5664 "s16", v4i16, v4i16, int_arm_neon_vpmins, 0>;
5665 def VPMINs32 : N3VDInt<0, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
5666 "s32", v2i32, v2i32, int_arm_neon_vpmins, 0>;
5667 def VPMINu8 : N3VDInt<1, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
5668 "u8", v8i8, v8i8, int_arm_neon_vpminu, 0>;
5669 def VPMINu16 : N3VDInt<1, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
5670 "u16", v4i16, v4i16, int_arm_neon_vpminu, 0>;
5671 def VPMINu32 : N3VDInt<1, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
5672 "u32", v2i32, v2i32, int_arm_neon_vpminu, 0>;
5673 def VPMINf : N3VDInt<1, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmin",
5674 "f32", v2f32, v2f32, int_arm_neon_vpmins, 0>;
5675 def VPMINh : N3VDInt<1, 0, 0b11, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmin",
5676 "f16", v4f16, v4f16, int_arm_neon_vpmins, 0>,
5677 Requires<[HasNEON, HasFullFP16]>;
5679 // Vector Reciprocal and Reciprocal Square Root Estimate and Step.
5681 // VRECPE : Vector Reciprocal Estimate
5682 def VRECPEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0,
5683 IIC_VUNAD, "vrecpe", "u32",
5684 v2i32, v2i32, int_arm_neon_vrecpe>;
5685 def VRECPEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0,
5686 IIC_VUNAQ, "vrecpe", "u32",
5687 v4i32, v4i32, int_arm_neon_vrecpe>;
5688 def VRECPEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0,
5689 IIC_VUNAD, "vrecpe", "f32",
5690 v2f32, v2f32, int_arm_neon_vrecpe>;
5691 def VRECPEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0,
5692 IIC_VUNAQ, "vrecpe", "f32",
5693 v4f32, v4f32, int_arm_neon_vrecpe>;
5694 def VRECPEhd : N2VDInt<0b11, 0b11, 0b01, 0b11, 0b01010, 0,
5695 IIC_VUNAD, "vrecpe", "f16",
5696 v4f16, v4f16, int_arm_neon_vrecpe>,
5697 Requires<[HasNEON, HasFullFP16]>;
5698 def VRECPEhq : N2VQInt<0b11, 0b11, 0b01, 0b11, 0b01010, 0,
5699 IIC_VUNAQ, "vrecpe", "f16",
5700 v8f16, v8f16, int_arm_neon_vrecpe>,
5701 Requires<[HasNEON, HasFullFP16]>;
5703 // VRECPS : Vector Reciprocal Step
5704 def VRECPSfd : N3VDInt<0, 0, 0b00, 0b1111, 1, N3RegFrm,
5705 IIC_VRECSD, "vrecps", "f32",
5706 v2f32, v2f32, int_arm_neon_vrecps, 1>;
5707 def VRECPSfq : N3VQInt<0, 0, 0b00, 0b1111, 1, N3RegFrm,
5708 IIC_VRECSQ, "vrecps", "f32",
5709 v4f32, v4f32, int_arm_neon_vrecps, 1>;
5710 def VRECPShd : N3VDInt<0, 0, 0b01, 0b1111, 1, N3RegFrm,
5711 IIC_VRECSD, "vrecps", "f16",
5712 v4f16, v4f16, int_arm_neon_vrecps, 1>,
5713 Requires<[HasNEON, HasFullFP16]>;
5714 def VRECPShq : N3VQInt<0, 0, 0b01, 0b1111, 1, N3RegFrm,
5715 IIC_VRECSQ, "vrecps", "f16",
5716 v8f16, v8f16, int_arm_neon_vrecps, 1>,
5717 Requires<[HasNEON, HasFullFP16]>;
5719 // VRSQRTE : Vector Reciprocal Square Root Estimate
5720 def VRSQRTEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0,
5721 IIC_VUNAD, "vrsqrte", "u32",
5722 v2i32, v2i32, int_arm_neon_vrsqrte>;
5723 def VRSQRTEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0,
5724 IIC_VUNAQ, "vrsqrte", "u32",
5725 v4i32, v4i32, int_arm_neon_vrsqrte>;
5726 def VRSQRTEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0,
5727 IIC_VUNAD, "vrsqrte", "f32",
5728 v2f32, v2f32, int_arm_neon_vrsqrte>;
5729 def VRSQRTEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0,
5730 IIC_VUNAQ, "vrsqrte", "f32",
5731 v4f32, v4f32, int_arm_neon_vrsqrte>;
5732 def VRSQRTEhd : N2VDInt<0b11, 0b11, 0b01, 0b11, 0b01011, 0,
5733 IIC_VUNAD, "vrsqrte", "f16",
5734 v4f16, v4f16, int_arm_neon_vrsqrte>,
5735 Requires<[HasNEON, HasFullFP16]>;
5736 def VRSQRTEhq : N2VQInt<0b11, 0b11, 0b01, 0b11, 0b01011, 0,
5737 IIC_VUNAQ, "vrsqrte", "f16",
5738 v8f16, v8f16, int_arm_neon_vrsqrte>,
5739 Requires<[HasNEON, HasFullFP16]>;
5741 // VRSQRTS : Vector Reciprocal Square Root Step
5742 def VRSQRTSfd : N3VDInt<0, 0, 0b10, 0b1111, 1, N3RegFrm,
5743 IIC_VRECSD, "vrsqrts", "f32",
5744 v2f32, v2f32, int_arm_neon_vrsqrts, 1>;
5745 def VRSQRTSfq : N3VQInt<0, 0, 0b10, 0b1111, 1, N3RegFrm,
5746 IIC_VRECSQ, "vrsqrts", "f32",
5747 v4f32, v4f32, int_arm_neon_vrsqrts, 1>;
5748 def VRSQRTShd : N3VDInt<0, 0, 0b11, 0b1111, 1, N3RegFrm,
5749 IIC_VRECSD, "vrsqrts", "f16",
5750 v4f16, v4f16, int_arm_neon_vrsqrts, 1>,
5751 Requires<[HasNEON, HasFullFP16]>;
5752 def VRSQRTShq : N3VQInt<0, 0, 0b11, 0b1111, 1, N3RegFrm,
5753 IIC_VRECSQ, "vrsqrts", "f16",
5754 v8f16, v8f16, int_arm_neon_vrsqrts, 1>,
5755 Requires<[HasNEON, HasFullFP16]>;
5759 // VSHL : Vector Shift
5760 defm VSHLs : N3VInt_QHSDSh<0, 0, 0b0100, 0, N3RegVShFrm,
5761 IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ,
5762 "vshl", "s", int_arm_neon_vshifts>;
5763 defm VSHLu : N3VInt_QHSDSh<1, 0, 0b0100, 0, N3RegVShFrm,
5764 IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ,
5765 "vshl", "u", int_arm_neon_vshiftu>;
5767 let Predicates = [HasNEON] in {
5768 def : Pat<(v8i8 (ARMvshls (v8i8 DPR:$Dn), (v8i8 DPR:$Dm))),
5769 (VSHLsv8i8 DPR:$Dn, DPR:$Dm)>;
5770 def : Pat<(v4i16 (ARMvshls (v4i16 DPR:$Dn), (v4i16 DPR:$Dm))),
5771 (VSHLsv4i16 DPR:$Dn, DPR:$Dm)>;
5772 def : Pat<(v2i32 (ARMvshls (v2i32 DPR:$Dn), (v2i32 DPR:$Dm))),
5773 (VSHLsv2i32 DPR:$Dn, DPR:$Dm)>;
5774 def : Pat<(v1i64 (ARMvshls (v1i64 DPR:$Dn), (v1i64 DPR:$Dm))),
5775 (VSHLsv1i64 DPR:$Dn, DPR:$Dm)>;
5776 def : Pat<(v16i8 (ARMvshls (v16i8 QPR:$Dn), (v16i8 QPR:$Dm))),
5777 (VSHLsv16i8 QPR:$Dn, QPR:$Dm)>;
5778 def : Pat<(v8i16 (ARMvshls (v8i16 QPR:$Dn), (v8i16 QPR:$Dm))),
5779 (VSHLsv8i16 QPR:$Dn, QPR:$Dm)>;
5780 def : Pat<(v4i32 (ARMvshls (v4i32 QPR:$Dn), (v4i32 QPR:$Dm))),
5781 (VSHLsv4i32 QPR:$Dn, QPR:$Dm)>;
5782 def : Pat<(v2i64 (ARMvshls (v2i64 QPR:$Dn), (v2i64 QPR:$Dm))),
5783 (VSHLsv2i64 QPR:$Dn, QPR:$Dm)>;
5785 def : Pat<(v8i8 (ARMvshlu (v8i8 DPR:$Dn), (v8i8 DPR:$Dm))),
5786 (VSHLuv8i8 DPR:$Dn, DPR:$Dm)>;
5787 def : Pat<(v4i16 (ARMvshlu (v4i16 DPR:$Dn), (v4i16 DPR:$Dm))),
5788 (VSHLuv4i16 DPR:$Dn, DPR:$Dm)>;
5789 def : Pat<(v2i32 (ARMvshlu (v2i32 DPR:$Dn), (v2i32 DPR:$Dm))),
5790 (VSHLuv2i32 DPR:$Dn, DPR:$Dm)>;
5791 def : Pat<(v1i64 (ARMvshlu (v1i64 DPR:$Dn), (v1i64 DPR:$Dm))),
5792 (VSHLuv1i64 DPR:$Dn, DPR:$Dm)>;
5793 def : Pat<(v16i8 (ARMvshlu (v16i8 QPR:$Dn), (v16i8 QPR:$Dm))),
5794 (VSHLuv16i8 QPR:$Dn, QPR:$Dm)>;
5795 def : Pat<(v8i16 (ARMvshlu (v8i16 QPR:$Dn), (v8i16 QPR:$Dm))),
5796 (VSHLuv8i16 QPR:$Dn, QPR:$Dm)>;
5797 def : Pat<(v4i32 (ARMvshlu (v4i32 QPR:$Dn), (v4i32 QPR:$Dm))),
5798 (VSHLuv4i32 QPR:$Dn, QPR:$Dm)>;
5799 def : Pat<(v2i64 (ARMvshlu (v2i64 QPR:$Dn), (v2i64 QPR:$Dm))),
5800 (VSHLuv2i64 QPR:$Dn, QPR:$Dm)>;
5804 // VSHL : Vector Shift Left (Immediate)
5805 defm VSHLi : N2VShL_QHSD<0, 1, 0b0101, 1, IIC_VSHLiD, "vshl", "i", ARMvshlImm>;
5807 // VSHR : Vector Shift Right (Immediate)
5808 defm VSHRs : N2VShR_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "s", "VSHRs",
5810 defm VSHRu : N2VShR_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "u", "VSHRu",
5813 // VSHLL : Vector Shift Left Long
5814 defm VSHLLs : N2VLSh_QHS<0, 1, 0b1010, 0, 0, 1, "vshll", "s",
5815 PatFrag<(ops node:$LHS, node:$RHS), (ARMvshlImm (sext node:$LHS), node:$RHS)>>;
5816 defm VSHLLu : N2VLSh_QHS<1, 1, 0b1010, 0, 0, 1, "vshll", "u",
5817 PatFrag<(ops node:$LHS, node:$RHS), (ARMvshlImm (zext node:$LHS), node:$RHS)>>;
5819 // VSHLL : Vector Shift Left Long (with maximum shift count)
5820 class N2VLShMax<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7,
5821 bit op6, bit op4, string OpcodeStr, string Dt, ValueType ResTy,
5822 ValueType OpTy, Operand ImmTy>
5823 : N2VLSh<op24, op23, op11_8, op7, op6, op4, OpcodeStr, Dt,
5824 ResTy, OpTy, ImmTy, null_frag> {
5825 let Inst{21-16} = op21_16;
5826 let DecoderMethod = "DecodeVSHLMaxInstruction";
5828 def VSHLLi8 : N2VLShMax<1, 1, 0b110010, 0b0011, 0, 0, 0, "vshll", "i8",
5830 def VSHLLi16 : N2VLShMax<1, 1, 0b110110, 0b0011, 0, 0, 0, "vshll", "i16",
5831 v4i32, v4i16, imm16>;
5832 def VSHLLi32 : N2VLShMax<1, 1, 0b111010, 0b0011, 0, 0, 0, "vshll", "i32",
5833 v2i64, v2i32, imm32>;
5835 let Predicates = [HasNEON] in {
5836 def : Pat<(v8i16 (ARMvshlImm (zext (v8i8 DPR:$Rn)), (i32 8))),
5837 (VSHLLi8 DPR:$Rn, 8)>;
5838 def : Pat<(v4i32 (ARMvshlImm (zext (v4i16 DPR:$Rn)), (i32 16))),
5839 (VSHLLi16 DPR:$Rn, 16)>;
5840 def : Pat<(v2i64 (ARMvshlImm (zext (v2i32 DPR:$Rn)), (i32 32))),
5841 (VSHLLi32 DPR:$Rn, 32)>;
5842 def : Pat<(v8i16 (ARMvshlImm (sext (v8i8 DPR:$Rn)), (i32 8))),
5843 (VSHLLi8 DPR:$Rn, 8)>;
5844 def : Pat<(v4i32 (ARMvshlImm (sext (v4i16 DPR:$Rn)), (i32 16))),
5845 (VSHLLi16 DPR:$Rn, 16)>;
5846 def : Pat<(v2i64 (ARMvshlImm (sext (v2i32 DPR:$Rn)), (i32 32))),
5847 (VSHLLi32 DPR:$Rn, 32)>;
5848 def : Pat<(v8i16 (ARMvshlImm (anyext (v8i8 DPR:$Rn)), (i32 8))),
5849 (VSHLLi8 DPR:$Rn, 8)>;
5850 def : Pat<(v4i32 (ARMvshlImm (anyext (v4i16 DPR:$Rn)), (i32 16))),
5851 (VSHLLi16 DPR:$Rn, 16)>;
5852 def : Pat<(v2i64 (ARMvshlImm (anyext (v2i32 DPR:$Rn)), (i32 32))),
5853 (VSHLLi32 DPR:$Rn, 32)>;
5856 // VSHRN : Vector Shift Right and Narrow
5857 defm VSHRN : N2VNSh_HSD<0,1,0b1000,0,0,1, IIC_VSHLiD, "vshrn", "i",
5858 PatFrag<(ops node:$Rn, node:$amt),
5859 (trunc (ARMvshrsImm node:$Rn, node:$amt))>>;
5861 let Predicates = [HasNEON] in {
5862 def : Pat<(v8i8 (trunc (ARMvshruImm (v8i16 QPR:$Vn), shr_imm8:$amt))),
5863 (VSHRNv8i8 QPR:$Vn, shr_imm8:$amt)>;
5864 def : Pat<(v4i16 (trunc (ARMvshruImm (v4i32 QPR:$Vn), shr_imm16:$amt))),
5865 (VSHRNv4i16 QPR:$Vn, shr_imm16:$amt)>;
5866 def : Pat<(v2i32 (trunc (ARMvshruImm (v2i64 QPR:$Vn), shr_imm32:$amt))),
5867 (VSHRNv2i32 QPR:$Vn, shr_imm32:$amt)>;
5870 // VRSHL : Vector Rounding Shift
5871 defm VRSHLs : N3VInt_QHSDSh<0, 0, 0b0101, 0, N3RegVShFrm,
5872 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
5873 "vrshl", "s", int_arm_neon_vrshifts>;
5874 defm VRSHLu : N3VInt_QHSDSh<1, 0, 0b0101, 0, N3RegVShFrm,
5875 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
5876 "vrshl", "u", int_arm_neon_vrshiftu>;
5877 // VRSHR : Vector Rounding Shift Right
5878 defm VRSHRs : N2VShR_QHSD<0,1,0b0010,1, IIC_VSHLi4D, "vrshr", "s", "VRSHRs",
5880 defm VRSHRu : N2VShR_QHSD<1,1,0b0010,1, IIC_VSHLi4D, "vrshr", "u", "VRSHRu",
5883 // VRSHRN : Vector Rounding Shift Right and Narrow
5884 defm VRSHRN : N2VNSh_HSD<0, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vrshrn", "i",
5887 // VQSHL : Vector Saturating Shift
5888 defm VQSHLs : N3VInt_QHSDSh<0, 0, 0b0100, 1, N3RegVShFrm,
5889 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
5890 "vqshl", "s", int_arm_neon_vqshifts>;
5891 defm VQSHLu : N3VInt_QHSDSh<1, 0, 0b0100, 1, N3RegVShFrm,
5892 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
5893 "vqshl", "u", int_arm_neon_vqshiftu>;
5894 // VQSHL : Vector Saturating Shift Left (Immediate)
5895 defm VQSHLsi : N2VShL_QHSD<0,1,0b0111,1, IIC_VSHLi4D, "vqshl", "s",NEONvqshlsImm>;
5896 defm VQSHLui : N2VShL_QHSD<1,1,0b0111,1, IIC_VSHLi4D, "vqshl", "u",NEONvqshluImm>;
5898 // VQSHLU : Vector Saturating Shift Left (Immediate, Unsigned)
5899 defm VQSHLsu : N2VShL_QHSD<1,1,0b0110,1, IIC_VSHLi4D,"vqshlu","s",NEONvqshlsuImm>;
5901 // VQSHRN : Vector Saturating Shift Right and Narrow
5902 defm VQSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "s",
5904 defm VQSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "u",
5907 // VQSHRUN : Vector Saturating Shift Right and Narrow (Unsigned)
5908 defm VQSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 0, 1, IIC_VSHLi4D, "vqshrun", "s",
5911 // VQRSHL : Vector Saturating Rounding Shift
5912 defm VQRSHLs : N3VInt_QHSDSh<0, 0, 0b0101, 1, N3RegVShFrm,
5913 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
5914 "vqrshl", "s", int_arm_neon_vqrshifts>;
5915 defm VQRSHLu : N3VInt_QHSDSh<1, 0, 0b0101, 1, N3RegVShFrm,
5916 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
5917 "vqrshl", "u", int_arm_neon_vqrshiftu>;
5919 // VQRSHRN : Vector Saturating Rounding Shift Right and Narrow
5920 defm VQRSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "s",
5922 defm VQRSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "u",
5925 // VQRSHRUN : Vector Saturating Rounding Shift Right and Narrow (Unsigned)
5926 defm VQRSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vqrshrun", "s",
5929 // VSRA : Vector Shift Right and Accumulate
5930 defm VSRAs : N2VShAdd_QHSD<0, 1, 0b0001, 1, "vsra", "s", ARMvshrsImm>;
5931 defm VSRAu : N2VShAdd_QHSD<1, 1, 0b0001, 1, "vsra", "u", ARMvshruImm>;
5932 // VRSRA : Vector Rounding Shift Right and Accumulate
5933 defm VRSRAs : N2VShAdd_QHSD<0, 1, 0b0011, 1, "vrsra", "s", NEONvrshrsImm>;
5934 defm VRSRAu : N2VShAdd_QHSD<1, 1, 0b0011, 1, "vrsra", "u", NEONvrshruImm>;
5936 // VSLI : Vector Shift Left and Insert
5937 defm VSLI : N2VShInsL_QHSD<1, 1, 0b0101, 1, "vsli">;
5939 // VSRI : Vector Shift Right and Insert
5940 defm VSRI : N2VShInsR_QHSD<1, 1, 0b0100, 1, "vsri">;
5942 // Vector Absolute and Saturating Absolute.
5944 // VABS : Vector Absolute Value
5945 defm VABS : N2VInt_QHS<0b11, 0b11, 0b01, 0b00110, 0,
5946 IIC_VUNAiD, IIC_VUNAiQ, "vabs", "s", abs>;
5947 def VABSfd : N2VD<0b11, 0b11, 0b10, 0b01, 0b01110, 0,
5949 v2f32, v2f32, fabs>;
5950 def VABSfq : N2VQ<0b11, 0b11, 0b10, 0b01, 0b01110, 0,
5952 v4f32, v4f32, fabs>;
5953 def VABShd : N2VD<0b11, 0b11, 0b01, 0b01, 0b01110, 0,
5955 v4f16, v4f16, fabs>,
5956 Requires<[HasNEON, HasFullFP16]>;
5957 def VABShq : N2VQ<0b11, 0b11, 0b01, 0b01, 0b01110, 0,
5959 v8f16, v8f16, fabs>,
5960 Requires<[HasNEON, HasFullFP16]>;
5962 // VQABS : Vector Saturating Absolute Value
5963 defm VQABS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01110, 0,
5964 IIC_VQUNAiD, IIC_VQUNAiQ, "vqabs", "s",
5965 int_arm_neon_vqabs>;
5969 def vnegd : PatFrag<(ops node:$in),
5970 (sub (bitconvert (v2i32 NEONimmAllZerosV)), node:$in)>;
5971 def vnegq : PatFrag<(ops node:$in),
5972 (sub (bitconvert (v4i32 NEONimmAllZerosV)), node:$in)>;
5974 class VNEGD<bits<2> size, string OpcodeStr, string Dt, ValueType Ty>
5975 : N2V<0b11, 0b11, size, 0b01, 0b00111, 0, 0, (outs DPR:$Vd), (ins DPR:$Vm),
5976 IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "",
5977 [(set DPR:$Vd, (Ty (vnegd DPR:$Vm)))]>;
5978 class VNEGQ<bits<2> size, string OpcodeStr, string Dt, ValueType Ty>
5979 : N2V<0b11, 0b11, size, 0b01, 0b00111, 1, 0, (outs QPR:$Vd), (ins QPR:$Vm),
5980 IIC_VSHLiQ, OpcodeStr, Dt, "$Vd, $Vm", "",
5981 [(set QPR:$Vd, (Ty (vnegq QPR:$Vm)))]>;
5983 // VNEG : Vector Negate (integer)
5984 def VNEGs8d : VNEGD<0b00, "vneg", "s8", v8i8>;
5985 def VNEGs16d : VNEGD<0b01, "vneg", "s16", v4i16>;
5986 def VNEGs32d : VNEGD<0b10, "vneg", "s32", v2i32>;
5987 def VNEGs8q : VNEGQ<0b00, "vneg", "s8", v16i8>;
5988 def VNEGs16q : VNEGQ<0b01, "vneg", "s16", v8i16>;
5989 def VNEGs32q : VNEGQ<0b10, "vneg", "s32", v4i32>;
5991 // VNEG : Vector Negate (floating-point)
5992 def VNEGfd : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0,
5993 (outs DPR:$Vd), (ins DPR:$Vm), IIC_VUNAD,
5994 "vneg", "f32", "$Vd, $Vm", "",
5995 [(set DPR:$Vd, (v2f32 (fneg DPR:$Vm)))]>;
5996 def VNEGf32q : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 1, 0,
5997 (outs QPR:$Vd), (ins QPR:$Vm), IIC_VUNAQ,
5998 "vneg", "f32", "$Vd, $Vm", "",
5999 [(set QPR:$Vd, (v4f32 (fneg QPR:$Vm)))]>;
6000 def VNEGhd : N2V<0b11, 0b11, 0b01, 0b01, 0b01111, 0, 0,
6001 (outs DPR:$Vd), (ins DPR:$Vm), IIC_VUNAD,
6002 "vneg", "f16", "$Vd, $Vm", "",
6003 [(set DPR:$Vd, (v4f16 (fneg DPR:$Vm)))]>,
6004 Requires<[HasNEON, HasFullFP16]>;
6005 def VNEGhq : N2V<0b11, 0b11, 0b01, 0b01, 0b01111, 1, 0,
6006 (outs QPR:$Vd), (ins QPR:$Vm), IIC_VUNAQ,
6007 "vneg", "f16", "$Vd, $Vm", "",
6008 [(set QPR:$Vd, (v8f16 (fneg QPR:$Vm)))]>,
6009 Requires<[HasNEON, HasFullFP16]>;
6011 let Predicates = [HasNEON] in {
6012 def : Pat<(v8i8 (vnegd DPR:$src)), (VNEGs8d DPR:$src)>;
6013 def : Pat<(v4i16 (vnegd DPR:$src)), (VNEGs16d DPR:$src)>;
6014 def : Pat<(v2i32 (vnegd DPR:$src)), (VNEGs32d DPR:$src)>;
6015 def : Pat<(v16i8 (vnegq QPR:$src)), (VNEGs8q QPR:$src)>;
6016 def : Pat<(v8i16 (vnegq QPR:$src)), (VNEGs16q QPR:$src)>;
6017 def : Pat<(v4i32 (vnegq QPR:$src)), (VNEGs32q QPR:$src)>;
6020 // VQNEG : Vector Saturating Negate
6021 defm VQNEG : N2VInt_QHS<0b11, 0b11, 0b00, 0b01111, 0,
6022 IIC_VQUNAiD, IIC_VQUNAiQ, "vqneg", "s",
6023 int_arm_neon_vqneg>;
6025 // Vector Bit Counting Operations.
6027 // VCLS : Vector Count Leading Sign Bits
6028 defm VCLS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01000, 0,
6029 IIC_VCNTiD, IIC_VCNTiQ, "vcls", "s",
6031 // VCLZ : Vector Count Leading Zeros
6032 defm VCLZ : N2VInt_QHS<0b11, 0b11, 0b00, 0b01001, 0,
6033 IIC_VCNTiD, IIC_VCNTiQ, "vclz", "i",
6035 // VCNT : Vector Count One Bits
6036 def VCNTd : N2VDInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0,
6037 IIC_VCNTiD, "vcnt", "8",
6039 def VCNTq : N2VQInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0,
6040 IIC_VCNTiQ, "vcnt", "8",
6041 v16i8, v16i8, ctpop>;
6044 def VSWPd : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 0, 0,
6045 (outs DPR:$Vd, DPR:$Vm), (ins DPR:$in1, DPR:$in2),
6046 NoItinerary, "vswp", "$Vd, $Vm", "$in1 = $Vd, $in2 = $Vm",
6048 def VSWPq : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 1, 0,
6049 (outs QPR:$Vd, QPR:$Vm), (ins QPR:$in1, QPR:$in2),
6050 NoItinerary, "vswp", "$Vd, $Vm", "$in1 = $Vd, $in2 = $Vm",
6053 // Vector Move Operations.
6055 // VMOV : Vector Move (Register)
6056 def : NEONInstAlias<"vmov${p} $Vd, $Vm",
6057 (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>;
6058 def : NEONInstAlias<"vmov${p} $Vd, $Vm",
6059 (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>;
6061 // VMOV : Vector Move (Immediate)
6063 // Although VMOVs are not strictly speaking cheap, they are as expensive
6064 // as their copies counterpart (VORR), so we should prefer rematerialization
6065 // over splitting when it applies.
6066 let isReMaterializable = 1, isAsCheapAsAMove=1 in {
6067 def VMOVv8i8 : N1ModImm<1, 0b000, 0b1110, 0, 0, 0, 1, (outs DPR:$Vd),
6068 (ins nImmSplatI8:$SIMM), IIC_VMOVImm,
6069 "vmov", "i8", "$Vd, $SIMM", "",
6070 [(set DPR:$Vd, (v8i8 (ARMvmovImm timm:$SIMM)))]>;
6071 def VMOVv16i8 : N1ModImm<1, 0b000, 0b1110, 0, 1, 0, 1, (outs QPR:$Vd),
6072 (ins nImmSplatI8:$SIMM), IIC_VMOVImm,
6073 "vmov", "i8", "$Vd, $SIMM", "",
6074 [(set QPR:$Vd, (v16i8 (ARMvmovImm timm:$SIMM)))]>;
6076 def VMOVv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 0, 1, (outs DPR:$Vd),
6077 (ins nImmSplatI16:$SIMM), IIC_VMOVImm,
6078 "vmov", "i16", "$Vd, $SIMM", "",
6079 [(set DPR:$Vd, (v4i16 (ARMvmovImm timm:$SIMM)))]> {
6080 let Inst{9} = SIMM{9};
6083 def VMOVv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 0, 1, (outs QPR:$Vd),
6084 (ins nImmSplatI16:$SIMM), IIC_VMOVImm,
6085 "vmov", "i16", "$Vd, $SIMM", "",
6086 [(set QPR:$Vd, (v8i16 (ARMvmovImm timm:$SIMM)))]> {
6087 let Inst{9} = SIMM{9};
6090 def VMOVv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 0, 1, (outs DPR:$Vd),
6091 (ins nImmVMOVI32:$SIMM), IIC_VMOVImm,
6092 "vmov", "i32", "$Vd, $SIMM", "",
6093 [(set DPR:$Vd, (v2i32 (ARMvmovImm timm:$SIMM)))]> {
6094 let Inst{11-8} = SIMM{11-8};
6097 def VMOVv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 0, 1, (outs QPR:$Vd),
6098 (ins nImmVMOVI32:$SIMM), IIC_VMOVImm,
6099 "vmov", "i32", "$Vd, $SIMM", "",
6100 [(set QPR:$Vd, (v4i32 (ARMvmovImm timm:$SIMM)))]> {
6101 let Inst{11-8} = SIMM{11-8};
6104 def VMOVv1i64 : N1ModImm<1, 0b000, 0b1110, 0, 0, 1, 1, (outs DPR:$Vd),
6105 (ins nImmSplatI64:$SIMM), IIC_VMOVImm,
6106 "vmov", "i64", "$Vd, $SIMM", "",
6107 [(set DPR:$Vd, (v1i64 (ARMvmovImm timm:$SIMM)))]>;
6108 def VMOVv2i64 : N1ModImm<1, 0b000, 0b1110, 0, 1, 1, 1, (outs QPR:$Vd),
6109 (ins nImmSplatI64:$SIMM), IIC_VMOVImm,
6110 "vmov", "i64", "$Vd, $SIMM", "",
6111 [(set QPR:$Vd, (v2i64 (ARMvmovImm timm:$SIMM)))]>;
6113 def VMOVv2f32 : N1ModImm<1, 0b000, 0b1111, 0, 0, 0, 1, (outs DPR:$Vd),
6114 (ins nImmVMOVF32:$SIMM), IIC_VMOVImm,
6115 "vmov", "f32", "$Vd, $SIMM", "",
6116 [(set DPR:$Vd, (v2f32 (ARMvmovFPImm timm:$SIMM)))]>;
6117 def VMOVv4f32 : N1ModImm<1, 0b000, 0b1111, 0, 1, 0, 1, (outs QPR:$Vd),
6118 (ins nImmVMOVF32:$SIMM), IIC_VMOVImm,
6119 "vmov", "f32", "$Vd, $SIMM", "",
6120 [(set QPR:$Vd, (v4f32 (ARMvmovFPImm timm:$SIMM)))]>;
6121 } // isReMaterializable, isAsCheapAsAMove
6123 // Add support for bytes replication feature, so it could be GAS compatible.
6124 multiclass NEONImmReplicateI8InstAlias<ValueType To> {
6125 // E.g. instructions below:
6126 // "vmov.i32 d0, #0xffffffff"
6127 // "vmov.i32 d0, #0xabababab"
6128 // "vmov.i16 d0, #0xabab"
6129 // are incorrect, but we could deal with such cases.
6130 // For last two instructions, for example, it should emit:
6131 // "vmov.i8 d0, #0xab"
6132 def : NEONInstAlias<"vmov${p}.i" # To.Size # " $Vd, $Vm",
6133 (VMOVv8i8 DPR:$Vd, nImmVMOVIReplicate<i8, To>:$Vm, pred:$p)>;
6134 def : NEONInstAlias<"vmov${p}.i" # To.Size # " $Vd, $Vm",
6135 (VMOVv16i8 QPR:$Vd, nImmVMOVIReplicate<i8, To>:$Vm, pred:$p)>;
6136 // Also add same support for VMVN instructions. So instruction:
6137 // "vmvn.i32 d0, #0xabababab"
6139 // "vmov.i8 d0, #0x54"
6140 def : NEONInstAlias<"vmvn${p}.i" # To.Size # " $Vd, $Vm",
6141 (VMOVv8i8 DPR:$Vd, nImmVINVIReplicate<i8, To>:$Vm, pred:$p)>;
6142 def : NEONInstAlias<"vmvn${p}.i" # To.Size # " $Vd, $Vm",
6143 (VMOVv16i8 QPR:$Vd, nImmVINVIReplicate<i8, To>:$Vm, pred:$p)>;
6146 defm : NEONImmReplicateI8InstAlias<i16>;
6147 defm : NEONImmReplicateI8InstAlias<i32>;
6148 defm : NEONImmReplicateI8InstAlias<i64>;
6150 // Similar to above for types other than i8, e.g.:
6151 // "vmov.i32 d0, #0xab00ab00" -> "vmov.i16 d0, #0xab00"
6152 // "vmvn.i64 q0, #0xab000000ab000000" -> "vmvn.i32 q0, #0xab000000"
6153 // In this case we do not canonicalize VMVN to VMOV
6154 multiclass NEONImmReplicateInstAlias<ValueType From, NeonI V8, NeonI V16,
6155 NeonI NV8, NeonI NV16, ValueType To> {
6156 def : NEONInstAlias<"vmov${p}.i" # To.Size # " $Vd, $Vm",
6157 (V8 DPR:$Vd, nImmVMOVIReplicate<From, To>:$Vm, pred:$p)>;
6158 def : NEONInstAlias<"vmov${p}.i" # To.Size # " $Vd, $Vm",
6159 (V16 QPR:$Vd, nImmVMOVIReplicate<From, To>:$Vm, pred:$p)>;
6160 def : NEONInstAlias<"vmvn${p}.i" # To.Size # " $Vd, $Vm",
6161 (NV8 DPR:$Vd, nImmVMOVIReplicate<From, To>:$Vm, pred:$p)>;
6162 def : NEONInstAlias<"vmvn${p}.i" # To.Size # " $Vd, $Vm",
6163 (NV16 QPR:$Vd, nImmVMOVIReplicate<From, To>:$Vm, pred:$p)>;
6166 defm : NEONImmReplicateInstAlias<i16, VMOVv4i16, VMOVv8i16,
6167 VMVNv4i16, VMVNv8i16, i32>;
6168 defm : NEONImmReplicateInstAlias<i16, VMOVv4i16, VMOVv8i16,
6169 VMVNv4i16, VMVNv8i16, i64>;
6170 defm : NEONImmReplicateInstAlias<i32, VMOVv2i32, VMOVv4i32,
6171 VMVNv2i32, VMVNv4i32, i64>;
6172 // TODO: add "VMOV <-> VMVN" conversion for cases like
6173 // "vmov.i32 d0, #0xffaaffaa" -> "vmvn.i16 d0, #0x55"
6174 // "vmvn.i32 d0, #0xaaffaaff" -> "vmov.i16 d0, #0xff00"
6176 // On some CPUs the two instructions "vmov.i32 dD, #0" and "vmov.i32 qD, #0"
6177 // require zero cycles to execute so they should be used wherever possible for
6178 // setting a register to zero.
6180 // Even without these pseudo-insts we would probably end up with the correct
6181 // instruction, but we could not mark the general ones with "isAsCheapAsAMove"
6182 // since they are sometimes rather expensive (in general).
6184 let AddedComplexity = 50, isAsCheapAsAMove = 1, isReMaterializable = 1 in {
6185 def VMOVD0 : ARMPseudoExpand<(outs DPR:$Vd), (ins), 4, IIC_VMOVImm,
6186 [(set DPR:$Vd, (v2i32 NEONimmAllZerosV))],
6187 (VMOVv2i32 DPR:$Vd, 0, (ops 14, zero_reg))>,
6189 def VMOVQ0 : ARMPseudoExpand<(outs QPR:$Vd), (ins), 4, IIC_VMOVImm,
6190 [(set QPR:$Vd, (v4i32 NEONimmAllZerosV))],
6191 (VMOVv4i32 QPR:$Vd, 0, (ops 14, zero_reg))>,
6195 // VMOV : Vector Get Lane (move scalar to ARM core register)
6197 def VGETLNs8 : NVGetLane<{1,1,1,0,0,1,?,1}, 0b1011, {?,?},
6198 (outs GPR:$R), (ins DPR:$V, VectorIndex8:$lane),
6199 IIC_VMOVSI, "vmov", "s8", "$R, $V$lane",
6200 [(set GPR:$R, (ARMvgetlanes (v8i8 DPR:$V),
6202 let Inst{21} = lane{2};
6203 let Inst{6-5} = lane{1-0};
6205 def VGETLNs16 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, {?,1},
6206 (outs GPR:$R), (ins DPR:$V, VectorIndex16:$lane),
6207 IIC_VMOVSI, "vmov", "s16", "$R, $V$lane",
6208 [(set GPR:$R, (ARMvgetlanes (v4i16 DPR:$V),
6210 let Inst{21} = lane{1};
6211 let Inst{6} = lane{0};
6213 def VGETLNu8 : NVGetLane<{1,1,1,0,1,1,?,1}, 0b1011, {?,?},
6214 (outs GPR:$R), (ins DPR:$V, VectorIndex8:$lane),
6215 IIC_VMOVSI, "vmov", "u8", "$R, $V$lane",
6216 [(set GPR:$R, (ARMvgetlaneu (v8i8 DPR:$V),
6218 let Inst{21} = lane{2};
6219 let Inst{6-5} = lane{1-0};
6221 def VGETLNu16 : NVGetLane<{1,1,1,0,1,0,?,1}, 0b1011, {?,1},
6222 (outs GPR:$R), (ins DPR:$V, VectorIndex16:$lane),
6223 IIC_VMOVSI, "vmov", "u16", "$R, $V$lane",
6224 [(set GPR:$R, (ARMvgetlaneu (v4i16 DPR:$V),
6226 let Inst{21} = lane{1};
6227 let Inst{6} = lane{0};
6229 def VGETLNi32 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, 0b00,
6230 (outs GPR:$R), (ins DPR:$V, VectorIndex32:$lane),
6231 IIC_VMOVSI, "vmov", "32", "$R, $V$lane",
6232 [(set GPR:$R, (extractelt (v2i32 DPR:$V),
6234 Requires<[HasFPRegs, HasFastVGETLNi32]> {
6235 let Inst{21} = lane{0};
6237 let Predicates = [HasNEON] in {
6238 // def VGETLNf32: see FMRDH and FMRDL in ARMInstrVFP.td
6239 def : Pat<(ARMvgetlanes (v16i8 QPR:$src), imm:$lane),
6240 (VGETLNs8 (v8i8 (EXTRACT_SUBREG QPR:$src,
6241 (DSubReg_i8_reg imm:$lane))),
6242 (SubReg_i8_lane imm:$lane))>;
6243 def : Pat<(ARMvgetlanes (v8i16 QPR:$src), imm:$lane),
6244 (VGETLNs16 (v4i16 (EXTRACT_SUBREG QPR:$src,
6245 (DSubReg_i16_reg imm:$lane))),
6246 (SubReg_i16_lane imm:$lane))>;
6247 def : Pat<(ARMvgetlaneu (v16i8 QPR:$src), imm:$lane),
6248 (VGETLNu8 (v8i8 (EXTRACT_SUBREG QPR:$src,
6249 (DSubReg_i8_reg imm:$lane))),
6250 (SubReg_i8_lane imm:$lane))>;
6251 def : Pat<(ARMvgetlaneu (v8i16 QPR:$src), imm:$lane),
6252 (VGETLNu16 (v4i16 (EXTRACT_SUBREG QPR:$src,
6253 (DSubReg_i16_reg imm:$lane))),
6254 (SubReg_i16_lane imm:$lane))>;
6256 def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane),
6257 (VGETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src,
6258 (DSubReg_i32_reg imm:$lane))),
6259 (SubReg_i32_lane imm:$lane))>,
6260 Requires<[HasNEON, HasFastVGETLNi32]>;
6261 def : Pat<(extractelt (v2i32 DPR:$src), imm:$lane),
6263 (i32 (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane))), GPR)>,
6264 Requires<[HasNEON, HasSlowVGETLNi32]>;
6265 def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane),
6267 (i32 (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane))), GPR)>,
6268 Requires<[HasNEON, HasSlowVGETLNi32]>;
6269 let Predicates = [HasNEON] in {
6270 def : Pat<(extractelt (v2f32 DPR:$src1), imm:$src2),
6271 (EXTRACT_SUBREG (v2f32 (COPY_TO_REGCLASS (v2f32 DPR:$src1),DPR_VFP2)),
6272 (SSubReg_f32_reg imm:$src2))>;
6273 def : Pat<(extractelt (v4f32 QPR:$src1), imm:$src2),
6274 (EXTRACT_SUBREG (v4f32 (COPY_TO_REGCLASS (v4f32 QPR:$src1),QPR_VFP2)),
6275 (SSubReg_f32_reg imm:$src2))>;
6276 //def : Pat<(extractelt (v2i64 QPR:$src1), imm:$src2),
6277 // (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>;
6278 def : Pat<(extractelt (v2f64 QPR:$src1), imm:$src2),
6279 (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>;
6282 def imm_even : ImmLeaf<i32, [{ return (Imm & 1) == 0; }]>;
6283 def imm_odd : ImmLeaf<i32, [{ return (Imm & 1) == 1; }]>;
6285 let Predicates = [HasNEON] in {
6286 def : Pat<(extractelt (v4f16 DPR:$src), imm_even:$lane),
6288 (v2f32 (COPY_TO_REGCLASS (v4f16 DPR:$src), DPR_VFP2)),
6289 (SSubReg_f16_reg imm_even:$lane))>;
6291 def : Pat<(extractelt (v4f16 DPR:$src), imm_odd:$lane),
6293 (VMOVH (EXTRACT_SUBREG
6294 (v2f32 (COPY_TO_REGCLASS (v4f16 DPR:$src), DPR_VFP2)),
6295 (SSubReg_f16_reg imm_odd:$lane))),
6298 def : Pat<(extractelt (v8f16 QPR:$src), imm_even:$lane),
6300 (v4f32 (COPY_TO_REGCLASS (v8f16 QPR:$src), QPR_VFP2)),
6301 (SSubReg_f16_reg imm_even:$lane))>;
6303 def : Pat<(extractelt (v8f16 QPR:$src), imm_odd:$lane),
6305 (VMOVH (EXTRACT_SUBREG
6306 (v4f32 (COPY_TO_REGCLASS (v8f16 QPR:$src), QPR_VFP2)),
6307 (SSubReg_f16_reg imm_odd:$lane))),
6311 // VMOV : Vector Set Lane (move ARM core register to scalar)
6313 let Constraints = "$src1 = $V" in {
6314 def VSETLNi8 : NVSetLane<{1,1,1,0,0,1,?,0}, 0b1011, {?,?}, (outs DPR:$V),
6315 (ins DPR:$src1, GPR:$R, VectorIndex8:$lane),
6316 IIC_VMOVISL, "vmov", "8", "$V$lane, $R",
6317 [(set DPR:$V, (vector_insert (v8i8 DPR:$src1),
6318 GPR:$R, imm:$lane))]> {
6319 let Inst{21} = lane{2};
6320 let Inst{6-5} = lane{1-0};
6322 def VSETLNi16 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, {?,1}, (outs DPR:$V),
6323 (ins DPR:$src1, GPR:$R, VectorIndex16:$lane),
6324 IIC_VMOVISL, "vmov", "16", "$V$lane, $R",
6325 [(set DPR:$V, (vector_insert (v4i16 DPR:$src1),
6326 GPR:$R, imm:$lane))]> {
6327 let Inst{21} = lane{1};
6328 let Inst{6} = lane{0};
6330 def VSETLNi32 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, 0b00, (outs DPR:$V),
6331 (ins DPR:$src1, GPR:$R, VectorIndex32:$lane),
6332 IIC_VMOVISL, "vmov", "32", "$V$lane, $R",
6333 [(set DPR:$V, (insertelt (v2i32 DPR:$src1),
6334 GPR:$R, imm:$lane))]>,
6335 Requires<[HasVFP2]> {
6336 let Inst{21} = lane{0};
6337 // This instruction is equivalent as
6338 // $V = INSERT_SUBREG $src1, $R, translateImmToSubIdx($imm)
6339 let isInsertSubreg = 1;
6343 let Predicates = [HasNEON] in {
6344 def : Pat<(vector_insert (v16i8 QPR:$src1), GPR:$src2, imm:$lane),
6345 (v16i8 (INSERT_SUBREG QPR:$src1,
6346 (v8i8 (VSETLNi8 (v8i8 (EXTRACT_SUBREG QPR:$src1,
6347 (DSubReg_i8_reg imm:$lane))),
6348 GPR:$src2, (SubReg_i8_lane imm:$lane))),
6349 (DSubReg_i8_reg imm:$lane)))>;
6350 def : Pat<(vector_insert (v8i16 QPR:$src1), GPR:$src2, imm:$lane),
6351 (v8i16 (INSERT_SUBREG QPR:$src1,
6352 (v4i16 (VSETLNi16 (v4i16 (EXTRACT_SUBREG QPR:$src1,
6353 (DSubReg_i16_reg imm:$lane))),
6354 GPR:$src2, (SubReg_i16_lane imm:$lane))),
6355 (DSubReg_i16_reg imm:$lane)))>;
6356 def : Pat<(insertelt (v4i32 QPR:$src1), GPR:$src2, imm:$lane),
6357 (v4i32 (INSERT_SUBREG QPR:$src1,
6358 (v2i32 (VSETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src1,
6359 (DSubReg_i32_reg imm:$lane))),
6360 GPR:$src2, (SubReg_i32_lane imm:$lane))),
6361 (DSubReg_i32_reg imm:$lane)))>;
6363 def : Pat<(v2f32 (insertelt DPR:$src1, SPR:$src2, imm:$src3)),
6364 (INSERT_SUBREG (v2f32 (COPY_TO_REGCLASS DPR:$src1, DPR_VFP2)),
6365 SPR:$src2, (SSubReg_f32_reg imm:$src3))>;
6366 def : Pat<(v4f32 (insertelt QPR:$src1, SPR:$src2, imm:$src3)),
6367 (INSERT_SUBREG (v4f32 (COPY_TO_REGCLASS QPR:$src1, QPR_VFP2)),
6368 SPR:$src2, (SSubReg_f32_reg imm:$src3))>;
6370 def : Pat<(insertelt (v4f16 DPR:$src1), HPR:$src2, imm:$lane),
6371 (v4f16 (VSETLNi16 DPR:$src1, (VMOVRH $src2), imm:$lane))>;
6372 def : Pat<(insertelt (v8f16 QPR:$src1), HPR:$src2, imm:$lane),
6373 (v8f16 (INSERT_SUBREG QPR:$src1,
6374 (v4i16 (VSETLNi16 (v4i16 (EXTRACT_SUBREG QPR:$src1,
6375 (DSubReg_i16_reg imm:$lane))),
6376 (VMOVRH $src2), (SubReg_i16_lane imm:$lane))),
6377 (DSubReg_i16_reg imm:$lane)))>;
6379 //def : Pat<(v2i64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)),
6380 // (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>;
6381 def : Pat<(v2f64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)),
6382 (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>;
6384 def : Pat<(v2f32 (scalar_to_vector SPR:$src)),
6385 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>;
6386 def : Pat<(v2f64 (scalar_to_vector (f64 DPR:$src))),
6387 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), DPR:$src, dsub_0)>;
6388 def : Pat<(v4f32 (scalar_to_vector SPR:$src)),
6389 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>;
6391 def : Pat<(v8i8 (scalar_to_vector GPR:$src)),
6392 (VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0))>;
6393 def : Pat<(v4i16 (scalar_to_vector GPR:$src)),
6394 (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0))>;
6395 def : Pat<(v2i32 (scalar_to_vector GPR:$src)),
6396 (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0))>;
6398 def : Pat<(v16i8 (scalar_to_vector GPR:$src)),
6399 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
6400 (VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0)),
6402 def : Pat<(v8i16 (scalar_to_vector GPR:$src)),
6403 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)),
6404 (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0)),
6406 def : Pat<(v4i32 (scalar_to_vector GPR:$src)),
6407 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
6408 (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0)),
6412 // VDUP : Vector Duplicate (from ARM core register to all elements)
6414 class VDUPD<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty>
6415 : NVDup<opcod1, 0b1011, opcod3, (outs DPR:$V), (ins GPR:$R),
6416 IIC_VMOVIS, "vdup", Dt, "$V, $R",
6417 [(set DPR:$V, (Ty (ARMvdup (i32 GPR:$R))))]>;
6418 class VDUPQ<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty>
6419 : NVDup<opcod1, 0b1011, opcod3, (outs QPR:$V), (ins GPR:$R),
6420 IIC_VMOVIS, "vdup", Dt, "$V, $R",
6421 [(set QPR:$V, (Ty (ARMvdup (i32 GPR:$R))))]>;
6423 def VDUP8d : VDUPD<0b11101100, 0b00, "8", v8i8>;
6424 def VDUP16d : VDUPD<0b11101000, 0b01, "16", v4i16>;
6425 def VDUP32d : VDUPD<0b11101000, 0b00, "32", v2i32>,
6426 Requires<[HasNEON, HasFastVDUP32]>;
6427 def VDUP8q : VDUPQ<0b11101110, 0b00, "8", v16i8>;
6428 def VDUP16q : VDUPQ<0b11101010, 0b01, "16", v8i16>;
6429 def VDUP32q : VDUPQ<0b11101010, 0b00, "32", v4i32>;
6431 // ARMvdup patterns for uarchs with fast VDUP.32.
6432 def : Pat<(v2f32 (ARMvdup (f32 (bitconvert GPR:$R)))), (VDUP32d GPR:$R)>,
6433 Requires<[HasNEON,HasFastVDUP32]>;
6434 def : Pat<(v4f32 (ARMvdup (f32 (bitconvert GPR:$R)))), (VDUP32q GPR:$R)>,
6435 Requires<[HasNEON]>;
6437 // ARMvdup patterns for uarchs with slow VDUP.32 - use VMOVDRR instead.
6438 def : Pat<(v2i32 (ARMvdup (i32 GPR:$R))), (VMOVDRR GPR:$R, GPR:$R)>,
6439 Requires<[HasNEON,HasSlowVDUP32]>;
6440 def : Pat<(v2f32 (ARMvdup (f32 (bitconvert GPR:$R)))), (VMOVDRR GPR:$R, GPR:$R)>,
6441 Requires<[HasNEON,HasSlowVDUP32]>;
6443 // VDUP : Vector Duplicate Lane (from scalar to all elements)
6445 class VDUPLND<bits<4> op19_16, string OpcodeStr, string Dt,
6446 ValueType Ty, Operand IdxTy>
6447 : NVDupLane<op19_16, 0, (outs DPR:$Vd), (ins DPR:$Vm, IdxTy:$lane),
6448 IIC_VMOVD, OpcodeStr, Dt, "$Vd, $Vm$lane",
6449 [(set DPR:$Vd, (Ty (ARMvduplane (Ty DPR:$Vm), imm:$lane)))]>;
6451 class VDUPLNQ<bits<4> op19_16, string OpcodeStr, string Dt,
6452 ValueType ResTy, ValueType OpTy, Operand IdxTy>
6453 : NVDupLane<op19_16, 1, (outs QPR:$Vd), (ins DPR:$Vm, IdxTy:$lane),
6454 IIC_VMOVQ, OpcodeStr, Dt, "$Vd, $Vm$lane",
6455 [(set QPR:$Vd, (ResTy (ARMvduplane (OpTy DPR:$Vm),
6456 VectorIndex32:$lane)))]>;
6458 // Inst{19-16} is partially specified depending on the element size.
6460 def VDUPLN8d : VDUPLND<{?,?,?,1}, "vdup", "8", v8i8, VectorIndex8> {
6462 let Inst{19-17} = lane{2-0};
6464 def VDUPLN16d : VDUPLND<{?,?,1,0}, "vdup", "16", v4i16, VectorIndex16> {
6466 let Inst{19-18} = lane{1-0};
6468 def VDUPLN32d : VDUPLND<{?,1,0,0}, "vdup", "32", v2i32, VectorIndex32> {
6470 let Inst{19} = lane{0};
6472 def VDUPLN8q : VDUPLNQ<{?,?,?,1}, "vdup", "8", v16i8, v8i8, VectorIndex8> {
6474 let Inst{19-17} = lane{2-0};
6476 def VDUPLN16q : VDUPLNQ<{?,?,1,0}, "vdup", "16", v8i16, v4i16, VectorIndex16> {
6478 let Inst{19-18} = lane{1-0};
6480 def VDUPLN32q : VDUPLNQ<{?,1,0,0}, "vdup", "32", v4i32, v2i32, VectorIndex32> {
6482 let Inst{19} = lane{0};
6485 let Predicates = [HasNEON] in {
6486 def : Pat<(v4f16 (ARMvduplane (v4f16 DPR:$Vm), imm:$lane)),
6487 (VDUPLN32d DPR:$Vm, imm:$lane)>;
6489 def : Pat<(v2f32 (ARMvduplane (v2f32 DPR:$Vm), imm:$lane)),
6490 (VDUPLN32d DPR:$Vm, imm:$lane)>;
6492 def : Pat<(v4f32 (ARMvduplane (v2f32 DPR:$Vm), imm:$lane)),
6493 (VDUPLN32q DPR:$Vm, imm:$lane)>;
6495 def : Pat<(v16i8 (ARMvduplane (v16i8 QPR:$src), imm:$lane)),
6496 (v16i8 (VDUPLN8q (v8i8 (EXTRACT_SUBREG QPR:$src,
6497 (DSubReg_i8_reg imm:$lane))),
6498 (SubReg_i8_lane imm:$lane)))>;
6499 def : Pat<(v8i16 (ARMvduplane (v8i16 QPR:$src), imm:$lane)),
6500 (v8i16 (VDUPLN16q (v4i16 (EXTRACT_SUBREG QPR:$src,
6501 (DSubReg_i16_reg imm:$lane))),
6502 (SubReg_i16_lane imm:$lane)))>;
6503 def : Pat<(v8f16 (ARMvduplane (v8f16 QPR:$src), imm:$lane)),
6504 (v8f16 (VDUPLN16q (v4f16 (EXTRACT_SUBREG QPR:$src,
6505 (DSubReg_i16_reg imm:$lane))),
6506 (SubReg_i16_lane imm:$lane)))>;
6507 def : Pat<(v4i32 (ARMvduplane (v4i32 QPR:$src), imm:$lane)),
6508 (v4i32 (VDUPLN32q (v2i32 (EXTRACT_SUBREG QPR:$src,
6509 (DSubReg_i32_reg imm:$lane))),
6510 (SubReg_i32_lane imm:$lane)))>;
6511 def : Pat<(v4f32 (ARMvduplane (v4f32 QPR:$src), imm:$lane)),
6512 (v4f32 (VDUPLN32q (v2f32 (EXTRACT_SUBREG QPR:$src,
6513 (DSubReg_i32_reg imm:$lane))),
6514 (SubReg_i32_lane imm:$lane)))>;
6516 def : Pat<(v4f16 (ARMvdup HPR:$src)),
6517 (v4f16 (VDUPLN16d (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)),
6518 HPR:$src, ssub_0), (i32 0)))>;
6519 def : Pat<(v2f32 (ARMvdup (f32 SPR:$src))),
6520 (v2f32 (VDUPLN32d (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
6521 SPR:$src, ssub_0), (i32 0)))>;
6522 def : Pat<(v4f32 (ARMvdup (f32 SPR:$src))),
6523 (v4f32 (VDUPLN32q (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
6524 SPR:$src, ssub_0), (i32 0)))>;
6525 def : Pat<(v8f16 (ARMvdup HPR:$src)),
6526 (v8f16 (VDUPLN16q (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)),
6527 HPR:$src, ssub_0), (i32 0)))>;
6530 // VMOVN : Vector Narrowing Move
6531 defm VMOVN : N2VN_HSD<0b11,0b11,0b10,0b00100,0,0, IIC_VMOVN,
6532 "vmovn", "i", trunc>;
6533 // VQMOVN : Vector Saturating Narrowing Move
6534 defm VQMOVNs : N2VNInt_HSD<0b11,0b11,0b10,0b00101,0,0, IIC_VQUNAiD,
6535 "vqmovn", "s", int_arm_neon_vqmovns>;
6536 defm VQMOVNu : N2VNInt_HSD<0b11,0b11,0b10,0b00101,1,0, IIC_VQUNAiD,
6537 "vqmovn", "u", int_arm_neon_vqmovnu>;
6538 defm VQMOVNsu : N2VNInt_HSD<0b11,0b11,0b10,0b00100,1,0, IIC_VQUNAiD,
6539 "vqmovun", "s", int_arm_neon_vqmovnsu>;
6540 // VMOVL : Vector Lengthening Move
6541 defm VMOVLs : N2VL_QHS<0b01,0b10100,0,1, "vmovl", "s", sext>;
6542 defm VMOVLu : N2VL_QHS<0b11,0b10100,0,1, "vmovl", "u", zext>;
6544 let Predicates = [HasNEON] in {
6545 def : Pat<(v8i16 (anyext (v8i8 DPR:$Vm))), (VMOVLuv8i16 DPR:$Vm)>;
6546 def : Pat<(v4i32 (anyext (v4i16 DPR:$Vm))), (VMOVLuv4i32 DPR:$Vm)>;
6547 def : Pat<(v2i64 (anyext (v2i32 DPR:$Vm))), (VMOVLuv2i64 DPR:$Vm)>;
6550 // Vector Conversions.
6552 // VCVT : Vector Convert Between Floating-Point and Integers
6553 def VCVTf2sd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32",
6554 v2i32, v2f32, fp_to_sint>;
6555 def VCVTf2ud : N2VD<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32",
6556 v2i32, v2f32, fp_to_uint>;
6557 def VCVTs2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32",
6558 v2f32, v2i32, sint_to_fp>;
6559 def VCVTu2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32",
6560 v2f32, v2i32, uint_to_fp>;
6562 def VCVTf2sq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32",
6563 v4i32, v4f32, fp_to_sint>;
6564 def VCVTf2uq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32",
6565 v4i32, v4f32, fp_to_uint>;
6566 def VCVTs2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32",
6567 v4f32, v4i32, sint_to_fp>;
6568 def VCVTu2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32",
6569 v4f32, v4i32, uint_to_fp>;
6571 def VCVTh2sd : N2VD<0b11, 0b11, 0b01, 0b11, 0b01110, 0, "vcvt", "s16.f16",
6572 v4i16, v4f16, fp_to_sint>,
6573 Requires<[HasNEON, HasFullFP16]>;
6574 def VCVTh2ud : N2VD<0b11, 0b11, 0b01, 0b11, 0b01111, 0, "vcvt", "u16.f16",
6575 v4i16, v4f16, fp_to_uint>,
6576 Requires<[HasNEON, HasFullFP16]>;
6577 def VCVTs2hd : N2VD<0b11, 0b11, 0b01, 0b11, 0b01100, 0, "vcvt", "f16.s16",
6578 v4f16, v4i16, sint_to_fp>,
6579 Requires<[HasNEON, HasFullFP16]>;
6580 def VCVTu2hd : N2VD<0b11, 0b11, 0b01, 0b11, 0b01101, 0, "vcvt", "f16.u16",
6581 v4f16, v4i16, uint_to_fp>,
6582 Requires<[HasNEON, HasFullFP16]>;
6584 def VCVTh2sq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01110, 0, "vcvt", "s16.f16",
6585 v8i16, v8f16, fp_to_sint>,
6586 Requires<[HasNEON, HasFullFP16]>;
6587 def VCVTh2uq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01111, 0, "vcvt", "u16.f16",
6588 v8i16, v8f16, fp_to_uint>,
6589 Requires<[HasNEON, HasFullFP16]>;
6590 def VCVTs2hq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01100, 0, "vcvt", "f16.s16",
6591 v8f16, v8i16, sint_to_fp>,
6592 Requires<[HasNEON, HasFullFP16]>;
6593 def VCVTu2hq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01101, 0, "vcvt", "f16.u16",
6594 v8f16, v8i16, uint_to_fp>,
6595 Requires<[HasNEON, HasFullFP16]>;
6598 multiclass VCVT_FPI<string op, bits<3> op10_8, SDPatternOperator IntS,
6599 SDPatternOperator IntU> {
6600 let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in {
6601 def SDf : N2VDIntnp<0b10, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op),
6602 "s32.f32", v2i32, v2f32, IntS>, Requires<[HasV8, HasNEON]>;
6603 def SQf : N2VQIntnp<0b10, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op),
6604 "s32.f32", v4i32, v4f32, IntS>, Requires<[HasV8, HasNEON]>;
6605 def UDf : N2VDIntnp<0b10, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op),
6606 "u32.f32", v2i32, v2f32, IntU>, Requires<[HasV8, HasNEON]>;
6607 def UQf : N2VQIntnp<0b10, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op),
6608 "u32.f32", v4i32, v4f32, IntU>, Requires<[HasV8, HasNEON]>;
6609 def SDh : N2VDIntnp<0b01, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op),
6610 "s16.f16", v4i16, v4f16, IntS>,
6611 Requires<[HasV8, HasNEON, HasFullFP16]>;
6612 def SQh : N2VQIntnp<0b01, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op),
6613 "s16.f16", v8i16, v8f16, IntS>,
6614 Requires<[HasV8, HasNEON, HasFullFP16]>;
6615 def UDh : N2VDIntnp<0b01, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op),
6616 "u16.f16", v4i16, v4f16, IntU>,
6617 Requires<[HasV8, HasNEON, HasFullFP16]>;
6618 def UQh : N2VQIntnp<0b01, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op),
6619 "u16.f16", v8i16, v8f16, IntU>,
6620 Requires<[HasV8, HasNEON, HasFullFP16]>;
6624 defm VCVTAN : VCVT_FPI<"a", 0b000, int_arm_neon_vcvtas, int_arm_neon_vcvtau>;
6625 defm VCVTNN : VCVT_FPI<"n", 0b001, int_arm_neon_vcvtns, int_arm_neon_vcvtnu>;
6626 defm VCVTPN : VCVT_FPI<"p", 0b010, int_arm_neon_vcvtps, int_arm_neon_vcvtpu>;
6627 defm VCVTMN : VCVT_FPI<"m", 0b011, int_arm_neon_vcvtms, int_arm_neon_vcvtmu>;
6629 // VCVT : Vector Convert Between Floating-Point and Fixed-Point.
6630 let DecoderMethod = "DecodeVCVTD" in {
6631 def VCVTf2xsd : N2VCvtD<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32",
6632 v2i32, v2f32, int_arm_neon_vcvtfp2fxs>;
6633 def VCVTf2xud : N2VCvtD<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32",
6634 v2i32, v2f32, int_arm_neon_vcvtfp2fxu>;
6635 def VCVTxs2fd : N2VCvtD<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32",
6636 v2f32, v2i32, int_arm_neon_vcvtfxs2fp>;
6637 def VCVTxu2fd : N2VCvtD<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32",
6638 v2f32, v2i32, int_arm_neon_vcvtfxu2fp>;
6639 let Predicates = [HasNEON, HasFullFP16] in {
6640 def VCVTh2xsd : N2VCvtD<0, 1, 0b1101, 0, 1, "vcvt", "s16.f16",
6641 v4i16, v4f16, int_arm_neon_vcvtfp2fxs>;
6642 def VCVTh2xud : N2VCvtD<1, 1, 0b1101, 0, 1, "vcvt", "u16.f16",
6643 v4i16, v4f16, int_arm_neon_vcvtfp2fxu>;
6644 def VCVTxs2hd : N2VCvtD<0, 1, 0b1100, 0, 1, "vcvt", "f16.s16",
6645 v4f16, v4i16, int_arm_neon_vcvtfxs2fp>;
6646 def VCVTxu2hd : N2VCvtD<1, 1, 0b1100, 0, 1, "vcvt", "f16.u16",
6647 v4f16, v4i16, int_arm_neon_vcvtfxu2fp>;
6648 } // Predicates = [HasNEON, HasFullFP16]
6651 let DecoderMethod = "DecodeVCVTQ" in {
6652 def VCVTf2xsq : N2VCvtQ<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32",
6653 v4i32, v4f32, int_arm_neon_vcvtfp2fxs>;
6654 def VCVTf2xuq : N2VCvtQ<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32",
6655 v4i32, v4f32, int_arm_neon_vcvtfp2fxu>;
6656 def VCVTxs2fq : N2VCvtQ<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32",
6657 v4f32, v4i32, int_arm_neon_vcvtfxs2fp>;
6658 def VCVTxu2fq : N2VCvtQ<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32",
6659 v4f32, v4i32, int_arm_neon_vcvtfxu2fp>;
6660 let Predicates = [HasNEON, HasFullFP16] in {
6661 def VCVTh2xsq : N2VCvtQ<0, 1, 0b1101, 0, 1, "vcvt", "s16.f16",
6662 v8i16, v8f16, int_arm_neon_vcvtfp2fxs>;
6663 def VCVTh2xuq : N2VCvtQ<1, 1, 0b1101, 0, 1, "vcvt", "u16.f16",
6664 v8i16, v8f16, int_arm_neon_vcvtfp2fxu>;
6665 def VCVTxs2hq : N2VCvtQ<0, 1, 0b1100, 0, 1, "vcvt", "f16.s16",
6666 v8f16, v8i16, int_arm_neon_vcvtfxs2fp>;
6667 def VCVTxu2hq : N2VCvtQ<1, 1, 0b1100, 0, 1, "vcvt", "f16.u16",
6668 v8f16, v8i16, int_arm_neon_vcvtfxu2fp>;
6669 } // Predicates = [HasNEON, HasFullFP16]
6672 def : NEONInstAlias<"vcvt${p}.s32.f32 $Dd, $Dm, #0",
6673 (VCVTf2sd DPR:$Dd, DPR:$Dm, pred:$p)>;
6674 def : NEONInstAlias<"vcvt${p}.u32.f32 $Dd, $Dm, #0",
6675 (VCVTf2ud DPR:$Dd, DPR:$Dm, pred:$p)>;
6676 def : NEONInstAlias<"vcvt${p}.f32.s32 $Dd, $Dm, #0",
6677 (VCVTs2fd DPR:$Dd, DPR:$Dm, pred:$p)>;
6678 def : NEONInstAlias<"vcvt${p}.f32.u32 $Dd, $Dm, #0",
6679 (VCVTu2fd DPR:$Dd, DPR:$Dm, pred:$p)>;
6681 def : NEONInstAlias<"vcvt${p}.s32.f32 $Qd, $Qm, #0",
6682 (VCVTf2sq QPR:$Qd, QPR:$Qm, pred:$p)>;
6683 def : NEONInstAlias<"vcvt${p}.u32.f32 $Qd, $Qm, #0",
6684 (VCVTf2uq QPR:$Qd, QPR:$Qm, pred:$p)>;
6685 def : NEONInstAlias<"vcvt${p}.f32.s32 $Qd, $Qm, #0",
6686 (VCVTs2fq QPR:$Qd, QPR:$Qm, pred:$p)>;
6687 def : NEONInstAlias<"vcvt${p}.f32.u32 $Qd, $Qm, #0",
6688 (VCVTu2fq QPR:$Qd, QPR:$Qm, pred:$p)>;
6690 def : NEONInstAlias<"vcvt${p}.s16.f16 $Dd, $Dm, #0",
6691 (VCVTh2sd DPR:$Dd, DPR:$Dm, pred:$p)>;
6692 def : NEONInstAlias<"vcvt${p}.u16.f16 $Dd, $Dm, #0",
6693 (VCVTh2ud DPR:$Dd, DPR:$Dm, pred:$p)>;
6694 def : NEONInstAlias<"vcvt${p}.f16.s16 $Dd, $Dm, #0",
6695 (VCVTs2hd DPR:$Dd, DPR:$Dm, pred:$p)>;
6696 def : NEONInstAlias<"vcvt${p}.f16.u16 $Dd, $Dm, #0",
6697 (VCVTu2hd DPR:$Dd, DPR:$Dm, pred:$p)>;
6699 def : NEONInstAlias<"vcvt${p}.s16.f16 $Qd, $Qm, #0",
6700 (VCVTh2sq QPR:$Qd, QPR:$Qm, pred:$p)>;
6701 def : NEONInstAlias<"vcvt${p}.u16.f16 $Qd, $Qm, #0",
6702 (VCVTh2uq QPR:$Qd, QPR:$Qm, pred:$p)>;
6703 def : NEONInstAlias<"vcvt${p}.f16.s16 $Qd, $Qm, #0",
6704 (VCVTs2hq QPR:$Qd, QPR:$Qm, pred:$p)>;
6705 def : NEONInstAlias<"vcvt${p}.f16.u16 $Qd, $Qm, #0",
6706 (VCVTu2hq QPR:$Qd, QPR:$Qm, pred:$p)>;
6709 // VCVT : Vector Convert Between Half-Precision and Single-Precision.
6710 def VCVTf2h : N2VNInt<0b11, 0b11, 0b01, 0b10, 0b01100, 0, 0,
6711 IIC_VUNAQ, "vcvt", "f16.f32",
6712 v4i16, v4f32, int_arm_neon_vcvtfp2hf>,
6713 Requires<[HasNEON, HasFP16]>;
6714 def VCVTh2f : N2VLInt<0b11, 0b11, 0b01, 0b10, 0b01110, 0, 0,
6715 IIC_VUNAQ, "vcvt", "f32.f16",
6716 v4f32, v4i16, int_arm_neon_vcvthf2fp>,
6717 Requires<[HasNEON, HasFP16]>;
6721 // VREV64 : Vector Reverse elements within 64-bit doublewords
6723 class VREV64D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
6724 : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 0, 0, (outs DPR:$Vd),
6725 (ins DPR:$Vm), IIC_VMOVD,
6726 OpcodeStr, Dt, "$Vd, $Vm", "",
6727 [(set DPR:$Vd, (Ty (ARMvrev64 (Ty DPR:$Vm))))]>;
6728 class VREV64Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
6729 : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 1, 0, (outs QPR:$Vd),
6730 (ins QPR:$Vm), IIC_VMOVQ,
6731 OpcodeStr, Dt, "$Vd, $Vm", "",
6732 [(set QPR:$Vd, (Ty (ARMvrev64 (Ty QPR:$Vm))))]>;
6734 def VREV64d8 : VREV64D<0b00, "vrev64", "8", v8i8>;
6735 def VREV64d16 : VREV64D<0b01, "vrev64", "16", v4i16>;
6736 def VREV64d32 : VREV64D<0b10, "vrev64", "32", v2i32>;
6737 let Predicates = [HasNEON] in {
6738 def : Pat<(v2f32 (ARMvrev64 (v2f32 DPR:$Vm))), (VREV64d32 DPR:$Vm)>;
6741 def VREV64q8 : VREV64Q<0b00, "vrev64", "8", v16i8>;
6742 def VREV64q16 : VREV64Q<0b01, "vrev64", "16", v8i16>;
6743 def VREV64q32 : VREV64Q<0b10, "vrev64", "32", v4i32>;
6745 let Predicates = [HasNEON] in {
6746 def : Pat<(v4f32 (ARMvrev64 (v4f32 QPR:$Vm))), (VREV64q32 QPR:$Vm)>;
6747 def : Pat<(v8f16 (ARMvrev64 (v8f16 QPR:$Vm))), (VREV64q16 QPR:$Vm)>;
6748 def : Pat<(v4f16 (ARMvrev64 (v4f16 DPR:$Vm))), (VREV64d16 DPR:$Vm)>;
6751 // VREV32 : Vector Reverse elements within 32-bit words
6753 class VREV32D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
6754 : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 0, 0, (outs DPR:$Vd),
6755 (ins DPR:$Vm), IIC_VMOVD,
6756 OpcodeStr, Dt, "$Vd, $Vm", "",
6757 [(set DPR:$Vd, (Ty (ARMvrev32 (Ty DPR:$Vm))))]>;
6758 class VREV32Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
6759 : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 1, 0, (outs QPR:$Vd),
6760 (ins QPR:$Vm), IIC_VMOVQ,
6761 OpcodeStr, Dt, "$Vd, $Vm", "",
6762 [(set QPR:$Vd, (Ty (ARMvrev32 (Ty QPR:$Vm))))]>;
6764 def VREV32d8 : VREV32D<0b00, "vrev32", "8", v8i8>;
6765 def VREV32d16 : VREV32D<0b01, "vrev32", "16", v4i16>;
6767 def VREV32q8 : VREV32Q<0b00, "vrev32", "8", v16i8>;
6768 def VREV32q16 : VREV32Q<0b01, "vrev32", "16", v8i16>;
6770 // VREV16 : Vector Reverse elements within 16-bit halfwords
6772 class VREV16D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
6773 : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 0, 0, (outs DPR:$Vd),
6774 (ins DPR:$Vm), IIC_VMOVD,
6775 OpcodeStr, Dt, "$Vd, $Vm", "",
6776 [(set DPR:$Vd, (Ty (ARMvrev16 (Ty DPR:$Vm))))]>;
6777 class VREV16Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
6778 : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 1, 0, (outs QPR:$Vd),
6779 (ins QPR:$Vm), IIC_VMOVQ,
6780 OpcodeStr, Dt, "$Vd, $Vm", "",
6781 [(set QPR:$Vd, (Ty (ARMvrev16 (Ty QPR:$Vm))))]>;
6783 def VREV16d8 : VREV16D<0b00, "vrev16", "8", v8i8>;
6784 def VREV16q8 : VREV16Q<0b00, "vrev16", "8", v16i8>;
6786 // Other Vector Shuffles.
6788 // Aligned extractions: really just dropping registers
6790 class AlignedVEXTq<ValueType DestTy, ValueType SrcTy, SDNodeXForm LaneCVT>
6791 : Pat<(DestTy (vector_extract_subvec (SrcTy QPR:$src), (i32 imm:$start))),
6792 (EXTRACT_SUBREG (SrcTy QPR:$src), (LaneCVT imm:$start))>,
6793 Requires<[HasNEON]>;
6795 def : AlignedVEXTq<v8i8, v16i8, DSubReg_i8_reg>;
6797 def : AlignedVEXTq<v4i16, v8i16, DSubReg_i16_reg>;
6799 def : AlignedVEXTq<v2i32, v4i32, DSubReg_i32_reg>;
6801 def : AlignedVEXTq<v1i64, v2i64, DSubReg_f64_reg>;
6803 def : AlignedVEXTq<v2f32, v4f32, DSubReg_i32_reg>;
6805 def : AlignedVEXTq<v4f16, v8f16, DSubReg_i16_reg>; // v8f16 -> v4f16
6807 // VEXT : Vector Extract
6810 // All of these have a two-operand InstAlias.
6811 let TwoOperandAliasConstraint = "$Vn = $Vd" in {
6812 class VEXTd<string OpcodeStr, string Dt, ValueType Ty, Operand immTy>
6813 : N3V<0,1,0b11,{?,?,?,?},0,0, (outs DPR:$Vd),
6814 (ins DPR:$Vn, DPR:$Vm, immTy:$index), NVExtFrm,
6815 IIC_VEXTD, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "",
6816 [(set DPR:$Vd, (Ty (NEONvext (Ty DPR:$Vn),
6817 (Ty DPR:$Vm), imm:$index)))]> {
6820 let Inst{10-8} = index{2-0};
6823 class VEXTq<string OpcodeStr, string Dt, ValueType Ty, Operand immTy>
6824 : N3V<0,1,0b11,{?,?,?,?},1,0, (outs QPR:$Vd),
6825 (ins QPR:$Vn, QPR:$Vm, imm0_15:$index), NVExtFrm,
6826 IIC_VEXTQ, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "",
6827 [(set QPR:$Vd, (Ty (NEONvext (Ty QPR:$Vn),
6828 (Ty QPR:$Vm), imm:$index)))]> {
6830 let Inst{11-8} = index{3-0};
6834 def VEXTd8 : VEXTd<"vext", "8", v8i8, imm0_7> {
6835 let Inst{10-8} = index{2-0};
6837 def VEXTd16 : VEXTd<"vext", "16", v4i16, imm0_3> {
6838 let Inst{10-9} = index{1-0};
6841 let Predicates = [HasNEON] in {
6842 def : Pat<(v4f16 (NEONvext (v4f16 DPR:$Vn), (v4f16 DPR:$Vm), (i32 imm:$index))),
6843 (VEXTd16 DPR:$Vn, DPR:$Vm, imm:$index)>;
6846 def VEXTd32 : VEXTd<"vext", "32", v2i32, imm0_1> {
6847 let Inst{10} = index{0};
6848 let Inst{9-8} = 0b00;
6850 let Predicates = [HasNEON] in {
6851 def : Pat<(v2f32 (NEONvext (v2f32 DPR:$Vn), (v2f32 DPR:$Vm), (i32 imm:$index))),
6852 (VEXTd32 DPR:$Vn, DPR:$Vm, imm:$index)>;
6855 def VEXTq8 : VEXTq<"vext", "8", v16i8, imm0_15> {
6856 let Inst{11-8} = index{3-0};
6858 def VEXTq16 : VEXTq<"vext", "16", v8i16, imm0_7> {
6859 let Inst{11-9} = index{2-0};
6862 let Predicates = [HasNEON] in {
6863 def : Pat<(v8f16 (NEONvext (v8f16 QPR:$Vn), (v8f16 QPR:$Vm), (i32 imm:$index))),
6864 (VEXTq16 QPR:$Vn, QPR:$Vm, imm:$index)>;
6867 def VEXTq32 : VEXTq<"vext", "32", v4i32, imm0_3> {
6868 let Inst{11-10} = index{1-0};
6869 let Inst{9-8} = 0b00;
6871 def VEXTq64 : VEXTq<"vext", "64", v2i64, imm0_1> {
6872 let Inst{11} = index{0};
6873 let Inst{10-8} = 0b000;
6875 let Predicates = [HasNEON] in {
6876 def : Pat<(v4f32 (NEONvext (v4f32 QPR:$Vn), (v4f32 QPR:$Vm), (i32 imm:$index))),
6877 (VEXTq32 QPR:$Vn, QPR:$Vm, imm:$index)>;
6880 // VTRN : Vector Transpose
6882 def VTRNd8 : N2VDShuffle<0b00, 0b00001, "vtrn", "8">;
6883 def VTRNd16 : N2VDShuffle<0b01, 0b00001, "vtrn", "16">;
6884 def VTRNd32 : N2VDShuffle<0b10, 0b00001, "vtrn", "32">;
6886 def VTRNq8 : N2VQShuffle<0b00, 0b00001, IIC_VPERMQ, "vtrn", "8">;
6887 def VTRNq16 : N2VQShuffle<0b01, 0b00001, IIC_VPERMQ, "vtrn", "16">;
6888 def VTRNq32 : N2VQShuffle<0b10, 0b00001, IIC_VPERMQ, "vtrn", "32">;
6890 // VUZP : Vector Unzip (Deinterleave)
6892 def VUZPd8 : N2VDShuffle<0b00, 0b00010, "vuzp", "8">;
6893 def VUZPd16 : N2VDShuffle<0b01, 0b00010, "vuzp", "16">;
6894 // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
6895 def : NEONInstAlias<"vuzp${p}.32 $Dd, $Dm",
6896 (VTRNd32 DPR:$Dd, DPR:$Dm, pred:$p)>;
6898 def VUZPq8 : N2VQShuffle<0b00, 0b00010, IIC_VPERMQ3, "vuzp", "8">;
6899 def VUZPq16 : N2VQShuffle<0b01, 0b00010, IIC_VPERMQ3, "vuzp", "16">;
6900 def VUZPq32 : N2VQShuffle<0b10, 0b00010, IIC_VPERMQ3, "vuzp", "32">;
6902 // VZIP : Vector Zip (Interleave)
6904 def VZIPd8 : N2VDShuffle<0b00, 0b00011, "vzip", "8">;
6905 def VZIPd16 : N2VDShuffle<0b01, 0b00011, "vzip", "16">;
6906 // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
6907 def : NEONInstAlias<"vzip${p}.32 $Dd, $Dm",
6908 (VTRNd32 DPR:$Dd, DPR:$Dm, pred:$p)>;
6910 def VZIPq8 : N2VQShuffle<0b00, 0b00011, IIC_VPERMQ3, "vzip", "8">;
6911 def VZIPq16 : N2VQShuffle<0b01, 0b00011, IIC_VPERMQ3, "vzip", "16">;
6912 def VZIPq32 : N2VQShuffle<0b10, 0b00011, IIC_VPERMQ3, "vzip", "32">;
6914 // Vector Table Lookup and Table Extension.
6916 // VTBL : Vector Table Lookup
6917 let DecoderMethod = "DecodeTBLInstruction" in {
6919 : N3V<1,1,0b11,0b1000,0,0, (outs DPR:$Vd),
6920 (ins VecListOneD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB1,
6921 "vtbl", "8", "$Vd, $Vn, $Vm", "",
6922 [(set DPR:$Vd, (v8i8 (NEONvtbl1 VecListOneD:$Vn, DPR:$Vm)))]>;
6924 let hasExtraSrcRegAllocReq = 1 in {
6926 : N3V<1,1,0b11,0b1001,0,0, (outs DPR:$Vd),
6927 (ins VecListDPair:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB2,
6928 "vtbl", "8", "$Vd, $Vn, $Vm", "", []>;
6930 : N3V<1,1,0b11,0b1010,0,0, (outs DPR:$Vd),
6931 (ins VecListThreeD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB3,
6932 "vtbl", "8", "$Vd, $Vn, $Vm", "", []>;
6934 : N3V<1,1,0b11,0b1011,0,0, (outs DPR:$Vd),
6935 (ins VecListFourD:$Vn, DPR:$Vm),
6937 "vtbl", "8", "$Vd, $Vn, $Vm", "", []>;
6938 } // hasExtraSrcRegAllocReq = 1
6941 : PseudoNeonI<(outs DPR:$dst), (ins QQPR:$tbl, DPR:$src), IIC_VTB3, "", []>;
6943 : PseudoNeonI<(outs DPR:$dst), (ins QQPR:$tbl, DPR:$src), IIC_VTB4, "", []>;
6945 // VTBX : Vector Table Extension
6947 : N3V<1,1,0b11,0b1000,1,0, (outs DPR:$Vd),
6948 (ins DPR:$orig, VecListOneD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX1,
6949 "vtbx", "8", "$Vd, $Vn, $Vm", "$orig = $Vd",
6950 [(set DPR:$Vd, (v8i8 (int_arm_neon_vtbx1
6951 DPR:$orig, VecListOneD:$Vn, DPR:$Vm)))]>;
6952 let hasExtraSrcRegAllocReq = 1 in {
6954 : N3V<1,1,0b11,0b1001,1,0, (outs DPR:$Vd),
6955 (ins DPR:$orig, VecListDPair:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX2,
6956 "vtbx", "8", "$Vd, $Vn, $Vm", "$orig = $Vd", []>;
6958 : N3V<1,1,0b11,0b1010,1,0, (outs DPR:$Vd),
6959 (ins DPR:$orig, VecListThreeD:$Vn, DPR:$Vm),
6960 NVTBLFrm, IIC_VTBX3,
6961 "vtbx", "8", "$Vd, $Vn, $Vm",
6964 : N3V<1,1,0b11,0b1011,1,0, (outs DPR:$Vd),
6965 (ins DPR:$orig, VecListFourD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX4,
6966 "vtbx", "8", "$Vd, $Vn, $Vm",
6968 } // hasExtraSrcRegAllocReq = 1
6971 : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src),
6972 IIC_VTBX3, "$orig = $dst", []>;
6974 : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src),
6975 IIC_VTBX4, "$orig = $dst", []>;
6976 } // DecoderMethod = "DecodeTBLInstruction"
6978 let Predicates = [HasNEON] in {
6979 def : Pat<(v8i8 (NEONvtbl2 v8i8:$Vn0, v8i8:$Vn1, v8i8:$Vm)),
6980 (v8i8 (VTBL2 (REG_SEQUENCE DPair, v8i8:$Vn0, dsub_0,
6983 def : Pat<(v8i8 (int_arm_neon_vtbx2 v8i8:$orig, v8i8:$Vn0, v8i8:$Vn1,
6985 (v8i8 (VTBX2 v8i8:$orig,
6986 (REG_SEQUENCE DPair, v8i8:$Vn0, dsub_0,
6990 def : Pat<(v8i8 (int_arm_neon_vtbl3 v8i8:$Vn0, v8i8:$Vn1,
6991 v8i8:$Vn2, v8i8:$Vm)),
6992 (v8i8 (VTBL3Pseudo (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0,
6995 (v8i8 (IMPLICIT_DEF)), dsub_3),
6997 def : Pat<(v8i8 (int_arm_neon_vtbx3 v8i8:$orig, v8i8:$Vn0, v8i8:$Vn1,
6998 v8i8:$Vn2, v8i8:$Vm)),
6999 (v8i8 (VTBX3Pseudo v8i8:$orig,
7000 (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0,
7003 (v8i8 (IMPLICIT_DEF)), dsub_3),
7006 def : Pat<(v8i8 (int_arm_neon_vtbl4 v8i8:$Vn0, v8i8:$Vn1,
7007 v8i8:$Vn2, v8i8:$Vn3, v8i8:$Vm)),
7008 (v8i8 (VTBL4Pseudo (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0,
7013 def : Pat<(v8i8 (int_arm_neon_vtbx4 v8i8:$orig, v8i8:$Vn0, v8i8:$Vn1,
7014 v8i8:$Vn2, v8i8:$Vn3, v8i8:$Vm)),
7015 (v8i8 (VTBX4Pseudo v8i8:$orig,
7016 (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0,
7023 // VRINT : Vector Rounding
7024 multiclass VRINT_FPI<string op, bits<3> op9_7, SDPatternOperator Int> {
7025 let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in {
7026 def Df : N2VDIntnp<0b10, 0b10, 0b100, 0, NoItinerary,
7027 !strconcat("vrint", op), "f32",
7028 v2f32, v2f32, Int>, Requires<[HasV8, HasNEON]> {
7029 let Inst{9-7} = op9_7;
7031 def Qf : N2VQIntnp<0b10, 0b10, 0b100, 0, NoItinerary,
7032 !strconcat("vrint", op), "f32",
7033 v4f32, v4f32, Int>, Requires<[HasV8, HasNEON]> {
7034 let Inst{9-7} = op9_7;
7036 def Dh : N2VDIntnp<0b01, 0b10, 0b100, 0, NoItinerary,
7037 !strconcat("vrint", op), "f16",
7039 Requires<[HasV8, HasNEON, HasFullFP16]> {
7040 let Inst{9-7} = op9_7;
7042 def Qh : N2VQIntnp<0b01, 0b10, 0b100, 0, NoItinerary,
7043 !strconcat("vrint", op), "f16",
7045 Requires<[HasV8, HasNEON, HasFullFP16]> {
7046 let Inst{9-7} = op9_7;
7050 def : NEONInstAlias<!strconcat("vrint", op, ".f32.f32\t$Dd, $Dm"),
7051 (!cast<Instruction>(NAME#"Df") DPR:$Dd, DPR:$Dm)>;
7052 def : NEONInstAlias<!strconcat("vrint", op, ".f32.f32\t$Qd, $Qm"),
7053 (!cast<Instruction>(NAME#"Qf") QPR:$Qd, QPR:$Qm)>;
7054 let Predicates = [HasNEON, HasFullFP16] in {
7055 def : NEONInstAlias<!strconcat("vrint", op, ".f16.f16\t$Dd, $Dm"),
7056 (!cast<Instruction>(NAME#"Dh") DPR:$Dd, DPR:$Dm)>;
7057 def : NEONInstAlias<!strconcat("vrint", op, ".f16.f16\t$Qd, $Qm"),
7058 (!cast<Instruction>(NAME#"Qh") QPR:$Qd, QPR:$Qm)>;
7062 defm VRINTNN : VRINT_FPI<"n", 0b000, int_arm_neon_vrintn>;
7063 defm VRINTXN : VRINT_FPI<"x", 0b001, int_arm_neon_vrintx>;
7064 defm VRINTAN : VRINT_FPI<"a", 0b010, int_arm_neon_vrinta>;
7065 defm VRINTZN : VRINT_FPI<"z", 0b011, int_arm_neon_vrintz>;
7066 defm VRINTMN : VRINT_FPI<"m", 0b101, int_arm_neon_vrintm>;
7067 defm VRINTPN : VRINT_FPI<"p", 0b111, int_arm_neon_vrintp>;
7069 // Cryptography instructions
7070 let PostEncoderMethod = "NEONThumb2DataIPostEncoder",
7071 DecoderNamespace = "v8Crypto", hasSideEffects = 0 in {
7072 class AES<string op, bit op7, bit op6, SDPatternOperator Int>
7073 : N2VQIntXnp<0b00, 0b00, 0b011, op6, op7, NoItinerary,
7074 !strconcat("aes", op), "8", v16i8, v16i8, Int>,
7075 Requires<[HasV8, HasCrypto]>;
7076 class AES2Op<string op, bit op7, bit op6, SDPatternOperator Int>
7077 : N2VQIntX2np<0b00, 0b00, 0b011, op6, op7, NoItinerary,
7078 !strconcat("aes", op), "8", v16i8, v16i8, Int>,
7079 Requires<[HasV8, HasCrypto]>;
7080 class N2SHA<string op, bits<2> op17_16, bits<3> op10_8, bit op7, bit op6,
7081 SDPatternOperator Int>
7082 : N2VQIntXnp<0b10, op17_16, op10_8, op6, op7, NoItinerary,
7083 !strconcat("sha", op), "32", v4i32, v4i32, Int>,
7084 Requires<[HasV8, HasCrypto]>;
7085 class N2SHA2Op<string op, bits<2> op17_16, bits<3> op10_8, bit op7, bit op6,
7086 SDPatternOperator Int>
7087 : N2VQIntX2np<0b10, op17_16, op10_8, op6, op7, NoItinerary,
7088 !strconcat("sha", op), "32", v4i32, v4i32, Int>,
7089 Requires<[HasV8, HasCrypto]>;
7090 class N3SHA3Op<string op, bits<5> op27_23, bits<2> op21_20, SDPatternOperator Int>
7091 : N3VQInt3np<op27_23, op21_20, 0b1100, 1, 0, N3RegFrm, NoItinerary,
7092 !strconcat("sha", op), "32", v4i32, v4i32, Int, 0>,
7093 Requires<[HasV8, HasCrypto]>;
7096 def AESD : AES2Op<"d", 0, 1, int_arm_neon_aesd>;
7097 def AESE : AES2Op<"e", 0, 0, int_arm_neon_aese>;
7098 def AESIMC : AES<"imc", 1, 1, int_arm_neon_aesimc>;
7099 def AESMC : AES<"mc", 1, 0, int_arm_neon_aesmc>;
7101 def SHA1H : N2SHA<"1h", 0b01, 0b010, 1, 1, null_frag>;
7102 def SHA1SU1 : N2SHA2Op<"1su1", 0b10, 0b011, 1, 0, int_arm_neon_sha1su1>;
7103 def SHA256SU0 : N2SHA2Op<"256su0", 0b10, 0b011, 1, 1, int_arm_neon_sha256su0>;
7104 def SHA1C : N3SHA3Op<"1c", 0b00100, 0b00, null_frag>;
7105 def SHA1M : N3SHA3Op<"1m", 0b00100, 0b10, null_frag>;
7106 def SHA1P : N3SHA3Op<"1p", 0b00100, 0b01, null_frag>;
7107 def SHA1SU0 : N3SHA3Op<"1su0", 0b00100, 0b11, int_arm_neon_sha1su0>;
7108 def SHA256H : N3SHA3Op<"256h", 0b00110, 0b00, int_arm_neon_sha256h>;
7109 def SHA256H2 : N3SHA3Op<"256h2", 0b00110, 0b01, int_arm_neon_sha256h2>;
7110 def SHA256SU1 : N3SHA3Op<"256su1", 0b00110, 0b10, int_arm_neon_sha256su1>;
7112 let Predicates = [HasNEON] in {
7113 def : Pat<(i32 (int_arm_neon_sha1h i32:$Rn)),
7114 (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG
7115 (SHA1H (SUBREG_TO_REG (i64 0),
7116 (f32 (COPY_TO_REGCLASS i32:$Rn, SPR)),
7120 def : Pat<(v4i32 (int_arm_neon_sha1c v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk)),
7121 (SHA1C v4i32:$hash_abcd,
7122 (SUBREG_TO_REG (i64 0),
7123 (f32 (COPY_TO_REGCLASS i32:$hash_e, SPR)),
7127 def : Pat<(v4i32 (int_arm_neon_sha1m v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk)),
7128 (SHA1M v4i32:$hash_abcd,
7129 (SUBREG_TO_REG (i64 0),
7130 (f32 (COPY_TO_REGCLASS i32:$hash_e, SPR)),
7134 def : Pat<(v4i32 (int_arm_neon_sha1p v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk)),
7135 (SHA1P v4i32:$hash_abcd,
7136 (SUBREG_TO_REG (i64 0),
7137 (f32 (COPY_TO_REGCLASS i32:$hash_e, SPR)),
7142 //===----------------------------------------------------------------------===//
7143 // NEON instructions for single-precision FP math
7144 //===----------------------------------------------------------------------===//
7146 class N2VSPat<SDNode OpNode, NeonI Inst>
7147 : NEONFPPat<(f32 (OpNode SPR:$a)),
7149 (v2f32 (COPY_TO_REGCLASS (Inst
7151 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
7152 SPR:$a, ssub_0)), DPR_VFP2)), ssub_0)>;
7154 class N3VSPat<SDNode OpNode, NeonI Inst>
7155 : NEONFPPat<(f32 (OpNode SPR:$a, SPR:$b)),
7157 (v2f32 (COPY_TO_REGCLASS (Inst
7159 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
7162 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
7163 SPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>;
7165 class N3VSPatFP16<SDNode OpNode, NeonI Inst>
7166 : NEONFPPat<(f16 (OpNode HPR:$a, HPR:$b)),
7168 (v4f16 (COPY_TO_REGCLASS (Inst
7170 (v4f16 (COPY_TO_REGCLASS (v4f16 (IMPLICIT_DEF)), DPR_VFP2)),
7173 (v4f16 (COPY_TO_REGCLASS (v4f16 (IMPLICIT_DEF)), DPR_VFP2)),
7174 HPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>;
7176 class N3VSMulOpPat<SDNode MulNode, SDNode OpNode, NeonI Inst>
7177 : NEONFPPat<(f32 (OpNode SPR:$acc, (f32 (MulNode SPR:$a, SPR:$b)))),
7179 (v2f32 (COPY_TO_REGCLASS (Inst
7181 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
7184 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
7187 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
7188 SPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>;
7190 class NVCVTIFPat<SDNode OpNode, NeonI Inst>
7191 : NEONFPPat<(f32 (OpNode GPR:$a)),
7192 (f32 (EXTRACT_SUBREG
7195 (v2f32 (IMPLICIT_DEF)),
7196 (i32 (COPY_TO_REGCLASS GPR:$a, SPR)), ssub_0))),
7198 class NVCVTFIPat<SDNode OpNode, NeonI Inst>
7199 : NEONFPPat<(i32 (OpNode SPR:$a)),
7200 (i32 (EXTRACT_SUBREG
7201 (v2f32 (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
7205 def : N3VSPat<fadd, VADDfd>;
7206 def : N3VSPat<fsub, VSUBfd>;
7207 def : N3VSPat<fmul, VMULfd>;
7208 def : N3VSMulOpPat<fmul, fadd, VMLAfd>,
7209 Requires<[HasNEON, UseNEONForFP, UseFPVMLx]>;
7210 def : N3VSMulOpPat<fmul, fsub, VMLSfd>,
7211 Requires<[HasNEON, UseNEONForFP, UseFPVMLx]>;
7212 def : N3VSMulOpPat<fmul, fadd, VFMAfd>,
7213 Requires<[HasVFP4, UseNEONForFP, UseFusedMAC]>;
7214 def : N3VSMulOpPat<fmul, fsub, VFMSfd>,
7215 Requires<[HasVFP4, UseNEONForFP, UseFusedMAC]>;
7216 def : N2VSPat<fabs, VABSfd>;
7217 def : N2VSPat<fneg, VNEGfd>;
7218 def : N3VSPatFP16<fmaximum, VMAXhd>, Requires<[HasFullFP16]>;
7219 def : N3VSPatFP16<fminimum, VMINhd>, Requires<[HasFullFP16]>;
7220 def : N3VSPat<fmaximum, VMAXfd>, Requires<[HasNEON]>;
7221 def : N3VSPat<fminimum, VMINfd>, Requires<[HasNEON]>;
7222 def : NVCVTFIPat<fp_to_sint, VCVTf2sd>;
7223 def : NVCVTFIPat<fp_to_uint, VCVTf2ud>;
7224 def : NVCVTIFPat<sint_to_fp, VCVTs2fd>;
7225 def : NVCVTIFPat<uint_to_fp, VCVTu2fd>;
7227 // NEON doesn't have any f64 conversions, so provide patterns to make
7228 // sure the VFP conversions match when extracting from a vector.
7229 def : VFPPat<(f64 (sint_to_fp (extractelt (v2i32 DPR:$src), imm:$lane))),
7230 (VSITOD (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>;
7231 def : VFPPat<(f64 (sint_to_fp (extractelt (v4i32 QPR:$src), imm:$lane))),
7232 (VSITOD (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane)))>;
7233 def : VFPPat<(f64 (uint_to_fp (extractelt (v2i32 DPR:$src), imm:$lane))),
7234 (VUITOD (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>;
7235 def : VFPPat<(f64 (uint_to_fp (extractelt (v4i32 QPR:$src), imm:$lane))),
7236 (VUITOD (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane)))>;
7239 // Prefer VMOVDRR for i32 -> f32 bitcasts, it can write all DPR registers.
7240 def : Pat<(f32 (bitconvert GPR:$a)),
7241 (EXTRACT_SUBREG (VMOVDRR GPR:$a, GPR:$a), ssub_0)>,
7242 Requires<[HasNEON, DontUseVMOVSR]>;
7243 def : Pat<(arm_vmovsr GPR:$a),
7244 (EXTRACT_SUBREG (VMOVDRR GPR:$a, GPR:$a), ssub_0)>,
7245 Requires<[HasNEON, DontUseVMOVSR]>;
7247 //===----------------------------------------------------------------------===//
7248 // Non-Instruction Patterns or Endiness - Revert Patterns
7249 //===----------------------------------------------------------------------===//
7252 // 64 bit conversions
7253 let Predicates = [HasNEON] in {
7254 def : Pat<(f64 (bitconvert (v1i64 DPR:$src))), (f64 DPR:$src)>;
7255 def : Pat<(v1i64 (bitconvert (f64 DPR:$src))), (v1i64 DPR:$src)>;
7257 def : Pat<(v2f32 (bitconvert (v2i32 DPR:$src))), (v2f32 DPR:$src)>;
7258 def : Pat<(v2i32 (bitconvert (v2f32 DPR:$src))), (v2i32 DPR:$src)>;
7260 def : Pat<(v4i16 (bitconvert (v4f16 DPR:$src))), (v4i16 DPR:$src)>;
7261 def : Pat<(v4f16 (bitconvert (v4i16 DPR:$src))), (v4f16 DPR:$src)>;
7263 // 128 bit conversions
7264 def : Pat<(v2f64 (bitconvert (v2i64 QPR:$src))), (v2f64 QPR:$src)>;
7265 def : Pat<(v2i64 (bitconvert (v2f64 QPR:$src))), (v2i64 QPR:$src)>;
7267 def : Pat<(v4i32 (bitconvert (v4f32 QPR:$src))), (v4i32 QPR:$src)>;
7268 def : Pat<(v4f32 (bitconvert (v4i32 QPR:$src))), (v4f32 QPR:$src)>;
7270 def : Pat<(v8i16 (bitconvert (v8f16 QPR:$src))), (v8i16 QPR:$src)>;
7271 def : Pat<(v8f16 (bitconvert (v8i16 QPR:$src))), (v8f16 QPR:$src)>;
7274 let Predicates = [IsLE,HasNEON] in {
7275 // 64 bit conversions
7276 def : Pat<(f64 (bitconvert (v2f32 DPR:$src))), (f64 DPR:$src)>;
7277 def : Pat<(f64 (bitconvert (v2i32 DPR:$src))), (f64 DPR:$src)>;
7278 def : Pat<(f64 (bitconvert (v4f16 DPR:$src))), (f64 DPR:$src)>;
7279 def : Pat<(f64 (bitconvert (v4i16 DPR:$src))), (f64 DPR:$src)>;
7280 def : Pat<(f64 (bitconvert (v8i8 DPR:$src))), (f64 DPR:$src)>;
7282 def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (v1i64 DPR:$src)>;
7283 def : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (v1i64 DPR:$src)>;
7284 def : Pat<(v1i64 (bitconvert (v4f16 DPR:$src))), (v1i64 DPR:$src)>;
7285 def : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (v1i64 DPR:$src)>;
7286 def : Pat<(v1i64 (bitconvert (v8i8 DPR:$src))), (v1i64 DPR:$src)>;
7288 def : Pat<(v2f32 (bitconvert (f64 DPR:$src))), (v2f32 DPR:$src)>;
7289 def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (v2f32 DPR:$src)>;
7290 def : Pat<(v2f32 (bitconvert (v4f16 DPR:$src))), (v2f32 DPR:$src)>;
7291 def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (v2f32 DPR:$src)>;
7292 def : Pat<(v2f32 (bitconvert (v8i8 DPR:$src))), (v2f32 DPR:$src)>;
7294 def : Pat<(v2i32 (bitconvert (f64 DPR:$src))), (v2i32 DPR:$src)>;
7295 def : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (v2i32 DPR:$src)>;
7296 def : Pat<(v2i32 (bitconvert (v4f16 DPR:$src))), (v2i32 DPR:$src)>;
7297 def : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (v2i32 DPR:$src)>;
7298 def : Pat<(v2i32 (bitconvert (v8i8 DPR:$src))), (v2i32 DPR:$src)>;
7300 def : Pat<(v4f16 (bitconvert (f64 DPR:$src))), (v4f16 DPR:$src)>;
7301 def : Pat<(v4f16 (bitconvert (v1i64 DPR:$src))), (v4f16 DPR:$src)>;
7302 def : Pat<(v4f16 (bitconvert (v2f32 DPR:$src))), (v4f16 DPR:$src)>;
7303 def : Pat<(v4f16 (bitconvert (v2i32 DPR:$src))), (v4f16 DPR:$src)>;
7304 def : Pat<(v4f16 (bitconvert (v8i8 DPR:$src))), (v4f16 DPR:$src)>;
7306 def : Pat<(v4i16 (bitconvert (f64 DPR:$src))), (v4i16 DPR:$src)>;
7307 def : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (v4i16 DPR:$src)>;
7308 def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (v4i16 DPR:$src)>;
7309 def : Pat<(v4i16 (bitconvert (v2i32 DPR:$src))), (v4i16 DPR:$src)>;
7310 def : Pat<(v4i16 (bitconvert (v8i8 DPR:$src))), (v4i16 DPR:$src)>;
7312 def : Pat<(v8i8 (bitconvert (f64 DPR:$src))), (v8i8 DPR:$src)>;
7313 def : Pat<(v8i8 (bitconvert (v1i64 DPR:$src))), (v8i8 DPR:$src)>;
7314 def : Pat<(v8i8 (bitconvert (v2f32 DPR:$src))), (v8i8 DPR:$src)>;
7315 def : Pat<(v8i8 (bitconvert (v2i32 DPR:$src))), (v8i8 DPR:$src)>;
7316 def : Pat<(v8i8 (bitconvert (v4f16 DPR:$src))), (v8i8 DPR:$src)>;
7317 def : Pat<(v8i8 (bitconvert (v4i16 DPR:$src))), (v8i8 DPR:$src)>;
7319 // 128 bit conversions
7320 def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>;
7321 def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (v2f64 QPR:$src)>;
7322 def : Pat<(v2f64 (bitconvert (v8f16 QPR:$src))), (v2f64 QPR:$src)>;
7323 def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (v2f64 QPR:$src)>;
7324 def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (v2f64 QPR:$src)>;
7326 def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (v2i64 QPR:$src)>;
7327 def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (v2i64 QPR:$src)>;
7328 def : Pat<(v2i64 (bitconvert (v8f16 QPR:$src))), (v2i64 QPR:$src)>;
7329 def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (v2i64 QPR:$src)>;
7330 def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (v2i64 QPR:$src)>;
7332 def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (v4f32 QPR:$src)>;
7333 def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (v4f32 QPR:$src)>;
7334 def : Pat<(v4f32 (bitconvert (v8f16 QPR:$src))), (v4f32 QPR:$src)>;
7335 def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (v4f32 QPR:$src)>;
7336 def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (v4f32 QPR:$src)>;
7338 def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (v4i32 QPR:$src)>;
7339 def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (v4i32 QPR:$src)>;
7340 def : Pat<(v4i32 (bitconvert (v8f16 QPR:$src))), (v4i32 QPR:$src)>;
7341 def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (v4i32 QPR:$src)>;
7342 def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (v4i32 QPR:$src)>;
7344 def : Pat<(v8f16 (bitconvert (v2f64 QPR:$src))), (v8f16 QPR:$src)>;
7345 def : Pat<(v8f16 (bitconvert (v2i64 QPR:$src))), (v8f16 QPR:$src)>;
7346 def : Pat<(v8f16 (bitconvert (v4f32 QPR:$src))), (v8f16 QPR:$src)>;
7347 def : Pat<(v8f16 (bitconvert (v4i32 QPR:$src))), (v8f16 QPR:$src)>;
7348 def : Pat<(v8f16 (bitconvert (v16i8 QPR:$src))), (v8f16 QPR:$src)>;
7350 def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (v8i16 QPR:$src)>;
7351 def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (v8i16 QPR:$src)>;
7352 def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (v8i16 QPR:$src)>;
7353 def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (v8i16 QPR:$src)>;
7354 def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (v8i16 QPR:$src)>;
7356 def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (v16i8 QPR:$src)>;
7357 def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (v16i8 QPR:$src)>;
7358 def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (v16i8 QPR:$src)>;
7359 def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (v16i8 QPR:$src)>;
7360 def : Pat<(v16i8 (bitconvert (v8f16 QPR:$src))), (v16i8 QPR:$src)>;
7361 def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (v16i8 QPR:$src)>;
7364 let Predicates = [IsBE,HasNEON] in {
7365 // 64 bit conversions
7366 def : Pat<(f64 (bitconvert (v2f32 DPR:$src))), (VREV64d32 DPR:$src)>;
7367 def : Pat<(f64 (bitconvert (v2i32 DPR:$src))), (VREV64d32 DPR:$src)>;
7368 def : Pat<(f64 (bitconvert (v4f16 DPR:$src))), (VREV64d16 DPR:$src)>;
7369 def : Pat<(f64 (bitconvert (v4i16 DPR:$src))), (VREV64d16 DPR:$src)>;
7370 def : Pat<(f64 (bitconvert (v8i8 DPR:$src))), (VREV64d8 DPR:$src)>;
7372 def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (VREV64d32 DPR:$src)>;
7373 def : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (VREV64d32 DPR:$src)>;
7374 def : Pat<(v1i64 (bitconvert (v4f16 DPR:$src))), (VREV64d16 DPR:$src)>;
7375 def : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (VREV64d16 DPR:$src)>;
7376 def : Pat<(v1i64 (bitconvert (v8i8 DPR:$src))), (VREV64d8 DPR:$src)>;
7378 def : Pat<(v2f32 (bitconvert (f64 DPR:$src))), (VREV64d32 DPR:$src)>;
7379 def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (VREV64d32 DPR:$src)>;
7380 def : Pat<(v2f32 (bitconvert (v4f16 DPR:$src))), (VREV32d16 DPR:$src)>;
7381 def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (VREV32d16 DPR:$src)>;
7382 def : Pat<(v2f32 (bitconvert (v8i8 DPR:$src))), (VREV32d8 DPR:$src)>;
7384 def : Pat<(v2i32 (bitconvert (f64 DPR:$src))), (VREV64d32 DPR:$src)>;
7385 def : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (VREV64d32 DPR:$src)>;
7386 def : Pat<(v2i32 (bitconvert (v4f16 DPR:$src))), (VREV32d16 DPR:$src)>;
7387 def : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (VREV32d16 DPR:$src)>;
7388 def : Pat<(v2i32 (bitconvert (v8i8 DPR:$src))), (VREV32d8 DPR:$src)>;
7390 def : Pat<(v4f16 (bitconvert (f64 DPR:$src))), (VREV64d16 DPR:$src)>;
7391 def : Pat<(v4f16 (bitconvert (v1i64 DPR:$src))), (VREV64d16 DPR:$src)>;
7392 def : Pat<(v4f16 (bitconvert (v2f32 DPR:$src))), (VREV32d16 DPR:$src)>;
7393 def : Pat<(v4f16 (bitconvert (v2i32 DPR:$src))), (VREV32d16 DPR:$src)>;
7394 def : Pat<(v4f16 (bitconvert (v8i8 DPR:$src))), (VREV16d8 DPR:$src)>;
7396 def : Pat<(v4i16 (bitconvert (f64 DPR:$src))), (VREV64d16 DPR:$src)>;
7397 def : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (VREV64d16 DPR:$src)>;
7398 def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (VREV32d16 DPR:$src)>;
7399 def : Pat<(v4i16 (bitconvert (v2i32 DPR:$src))), (VREV32d16 DPR:$src)>;
7400 def : Pat<(v4i16 (bitconvert (v8i8 DPR:$src))), (VREV16d8 DPR:$src)>;
7402 def : Pat<(v8i8 (bitconvert (f64 DPR:$src))), (VREV64d8 DPR:$src)>;
7403 def : Pat<(v8i8 (bitconvert (v1i64 DPR:$src))), (VREV64d8 DPR:$src)>;
7404 def : Pat<(v8i8 (bitconvert (v2f32 DPR:$src))), (VREV32d8 DPR:$src)>;
7405 def : Pat<(v8i8 (bitconvert (v2i32 DPR:$src))), (VREV32d8 DPR:$src)>;
7406 def : Pat<(v8i8 (bitconvert (v4f16 DPR:$src))), (VREV16d8 DPR:$src)>;
7407 def : Pat<(v8i8 (bitconvert (v4i16 DPR:$src))), (VREV16d8 DPR:$src)>;
7409 // 128 bit conversions
7410 def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (VREV64q32 QPR:$src)>;
7411 def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (VREV64q32 QPR:$src)>;
7412 def : Pat<(v2f64 (bitconvert (v8f16 QPR:$src))), (VREV64q16 QPR:$src)>;
7413 def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (VREV64q16 QPR:$src)>;
7414 def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (VREV64q8 QPR:$src)>;
7416 def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (VREV64q32 QPR:$src)>;
7417 def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (VREV64q32 QPR:$src)>;
7418 def : Pat<(v2i64 (bitconvert (v8f16 QPR:$src))), (VREV64q16 QPR:$src)>;
7419 def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (VREV64q16 QPR:$src)>;
7420 def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (VREV64q8 QPR:$src)>;
7422 def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (VREV64q32 QPR:$src)>;
7423 def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (VREV64q32 QPR:$src)>;
7424 def : Pat<(v4f32 (bitconvert (v8f16 QPR:$src))), (VREV32q16 QPR:$src)>;
7425 def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (VREV32q16 QPR:$src)>;
7426 def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (VREV32q8 QPR:$src)>;
7428 def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (VREV64q32 QPR:$src)>;
7429 def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (VREV64q32 QPR:$src)>;
7430 def : Pat<(v4i32 (bitconvert (v8f16 QPR:$src))), (VREV32q16 QPR:$src)>;
7431 def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (VREV32q16 QPR:$src)>;
7432 def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (VREV32q8 QPR:$src)>;
7434 def : Pat<(v8f16 (bitconvert (v2f64 QPR:$src))), (VREV64q16 QPR:$src)>;
7435 def : Pat<(v8f16 (bitconvert (v2i64 QPR:$src))), (VREV64q16 QPR:$src)>;
7436 def : Pat<(v8f16 (bitconvert (v4f32 QPR:$src))), (VREV32q16 QPR:$src)>;
7437 def : Pat<(v8f16 (bitconvert (v4i32 QPR:$src))), (VREV32q16 QPR:$src)>;
7438 def : Pat<(v8f16 (bitconvert (v16i8 QPR:$src))), (VREV16q8 QPR:$src)>;
7440 def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (VREV64q16 QPR:$src)>;
7441 def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (VREV64q16 QPR:$src)>;
7442 def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (VREV32q16 QPR:$src)>;
7443 def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (VREV32q16 QPR:$src)>;
7444 def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (VREV16q8 QPR:$src)>;
7446 def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (VREV64q8 QPR:$src)>;
7447 def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (VREV64q8 QPR:$src)>;
7448 def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (VREV32q8 QPR:$src)>;
7449 def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (VREV32q8 QPR:$src)>;
7450 def : Pat<(v16i8 (bitconvert (v8f16 QPR:$src))), (VREV16q8 QPR:$src)>;
7451 def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (VREV16q8 QPR:$src)>;
7454 // Use VLD1/VST1 + VREV for non-word-aligned v2f64 load/store on Big Endian
7455 let Predicates = [IsBE,HasNEON] in {
7456 def : Pat<(v2f64 (byte_alignedload addrmode6:$addr)),
7457 (VREV64q8 (VLD1q8 addrmode6:$addr))>;
7458 def : Pat<(byte_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
7459 (VST1q8 addrmode6:$addr, (VREV64q8 QPR:$value))>;
7460 def : Pat<(v2f64 (hword_alignedload addrmode6:$addr)),
7461 (VREV64q16 (VLD1q16 addrmode6:$addr))>;
7462 def : Pat<(hword_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
7463 (VST1q16 addrmode6:$addr, (VREV64q16 QPR:$value))>;
7466 // Fold extracting an element out of a v2i32 into a vfp register.
7467 def : Pat<(f32 (bitconvert (i32 (extractelt (v2i32 DPR:$src), imm:$lane)))),
7468 (f32 (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>,
7469 Requires<[HasNEON]>;
7471 // Vector lengthening move with load, matching extending loads.
7473 // extload, zextload and sextload for a standard lengthening load. Example:
7474 // Lengthen_Single<"8", "i16", "8"> =
7475 // Pat<(v8i16 (extloadvi8 addrmode6:$addr))
7476 // (VMOVLuv8i16 (VLD1d8 addrmode6:$addr,
7477 // (f64 (IMPLICIT_DEF)), (i32 0)))>;
7478 multiclass Lengthen_Single<string DestLanes, string DestTy, string SrcTy> {
7479 let AddedComplexity = 10 in {
7480 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7481 (!cast<PatFrag>("extloadvi" # SrcTy) addrmode6:$addr)),
7482 (!cast<Instruction>("VMOVLuv" # DestLanes # DestTy)
7483 (!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>,
7484 Requires<[HasNEON]>;
7486 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7487 (!cast<PatFrag>("zextloadvi" # SrcTy) addrmode6:$addr)),
7488 (!cast<Instruction>("VMOVLuv" # DestLanes # DestTy)
7489 (!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>,
7490 Requires<[HasNEON]>;
7492 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7493 (!cast<PatFrag>("sextloadvi" # SrcTy) addrmode6:$addr)),
7494 (!cast<Instruction>("VMOVLsv" # DestLanes # DestTy)
7495 (!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>,
7496 Requires<[HasNEON]>;
7500 // extload, zextload and sextload for a lengthening load which only uses
7501 // half the lanes available. Example:
7502 // Lengthen_HalfSingle<"4", "i16", "8", "i16", "i8"> =
7503 // Pat<(v4i16 (extloadvi8 addrmode6oneL32:$addr)),
7504 // (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd32 addrmode6oneL32:$addr,
7505 // (f64 (IMPLICIT_DEF)), (i32 0))),
7507 multiclass Lengthen_HalfSingle<string DestLanes, string DestTy, string SrcTy,
7508 string InsnLanes, string InsnTy> {
7509 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7510 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)),
7511 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy)
7512 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
7514 Requires<[HasNEON]>;
7515 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7516 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)),
7517 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy)
7518 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
7520 Requires<[HasNEON]>;
7521 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7522 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)),
7523 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # InsnLanes # InsnTy)
7524 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
7526 Requires<[HasNEON]>;
7529 // The following class definition is basically a copy of the
7530 // Lengthen_HalfSingle definition above, however with an additional parameter
7531 // "RevLanes" to select the correct VREV32dXX instruction. This is to convert
7532 // data loaded by VLD1LN into proper vector format in big endian mode.
7533 multiclass Lengthen_HalfSingle_Big_Endian<string DestLanes, string DestTy, string SrcTy,
7534 string InsnLanes, string InsnTy, string RevLanes> {
7535 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7536 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)),
7537 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy)
7538 (!cast<Instruction>("VREV32d" # RevLanes)
7539 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
7541 Requires<[HasNEON]>;
7542 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7543 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)),
7544 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy)
7545 (!cast<Instruction>("VREV32d" # RevLanes)
7546 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
7548 Requires<[HasNEON]>;
7549 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7550 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)),
7551 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # InsnLanes # InsnTy)
7552 (!cast<Instruction>("VREV32d" # RevLanes)
7553 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
7555 Requires<[HasNEON]>;
7558 // extload, zextload and sextload for a lengthening load followed by another
7559 // lengthening load, to quadruple the initial length.
7561 // Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32"> =
7562 // Pat<(v4i32 (extloadvi8 addrmode6oneL32:$addr))
7563 // (EXTRACT_SUBREG (VMOVLuv4i32
7564 // (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd32 addrmode6oneL32:$addr,
7565 // (f64 (IMPLICIT_DEF)),
7569 multiclass Lengthen_Double<string DestLanes, string DestTy, string SrcTy,
7570 string Insn1Lanes, string Insn1Ty, string Insn2Lanes,
7572 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7573 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)),
7574 (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
7575 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
7576 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
7578 Requires<[HasNEON]>;
7579 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7580 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)),
7581 (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
7582 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
7583 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
7585 Requires<[HasNEON]>;
7586 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7587 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)),
7588 (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty)
7589 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty)
7590 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
7592 Requires<[HasNEON]>;
7595 // The following class definition is basically a copy of the
7596 // Lengthen_Double definition above, however with an additional parameter
7597 // "RevLanes" to select the correct VREV32dXX instruction. This is to convert
7598 // data loaded by VLD1LN into proper vector format in big endian mode.
7599 multiclass Lengthen_Double_Big_Endian<string DestLanes, string DestTy, string SrcTy,
7600 string Insn1Lanes, string Insn1Ty, string Insn2Lanes,
7601 string Insn2Ty, string RevLanes> {
7602 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7603 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)),
7604 (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
7605 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
7606 (!cast<Instruction>("VREV32d" # RevLanes)
7607 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
7609 Requires<[HasNEON]>;
7610 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7611 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)),
7612 (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
7613 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
7614 (!cast<Instruction>("VREV32d" # RevLanes)
7615 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
7617 Requires<[HasNEON]>;
7618 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7619 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)),
7620 (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty)
7621 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty)
7622 (!cast<Instruction>("VREV32d" # RevLanes)
7623 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
7625 Requires<[HasNEON]>;
7628 // extload, zextload and sextload for a lengthening load followed by another
7629 // lengthening load, to quadruple the initial length, but which ends up only
7630 // requiring half the available lanes (a 64-bit outcome instead of a 128-bit).
7632 // Lengthen_HalfDouble<"2", "i32", "i8", "8", "i16", "4", "i32"> =
7633 // Pat<(v2i32 (extloadvi8 addrmode6:$addr))
7634 // (EXTRACT_SUBREG (VMOVLuv4i32
7635 // (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd16 addrmode6:$addr,
7636 // (f64 (IMPLICIT_DEF)), (i32 0))),
7639 multiclass Lengthen_HalfDouble<string DestLanes, string DestTy, string SrcTy,
7640 string Insn1Lanes, string Insn1Ty, string Insn2Lanes,
7642 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7643 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6:$addr)),
7644 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
7645 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
7646 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
7649 Requires<[HasNEON]>;
7650 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7651 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6:$addr)),
7652 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
7653 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
7654 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
7657 Requires<[HasNEON]>;
7658 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7659 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6:$addr)),
7660 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty)
7661 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty)
7662 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
7665 Requires<[HasNEON]>;
7668 // The following class definition is basically a copy of the
7669 // Lengthen_HalfDouble definition above, however with an additional VREV16d8
7670 // instruction to convert data loaded by VLD1LN into proper vector format
7671 // in big endian mode.
7672 multiclass Lengthen_HalfDouble_Big_Endian<string DestLanes, string DestTy, string SrcTy,
7673 string Insn1Lanes, string Insn1Ty, string Insn2Lanes,
7675 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7676 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6:$addr)),
7677 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
7678 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
7679 (!cast<Instruction>("VREV16d8")
7680 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
7683 Requires<[HasNEON]>;
7684 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7685 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6:$addr)),
7686 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
7687 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
7688 (!cast<Instruction>("VREV16d8")
7689 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
7692 Requires<[HasNEON]>;
7693 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7694 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6:$addr)),
7695 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty)
7696 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty)
7697 (!cast<Instruction>("VREV16d8")
7698 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
7701 Requires<[HasNEON]>;
7704 defm : Lengthen_Single<"8", "i16", "8">; // v8i8 -> v8i16
7705 defm : Lengthen_Single<"4", "i32", "16">; // v4i16 -> v4i32
7706 defm : Lengthen_Single<"2", "i64", "32">; // v2i32 -> v2i64
7708 let Predicates = [HasNEON,IsLE] in {
7709 defm : Lengthen_HalfSingle<"4", "i16", "i8", "8", "i16">; // v4i8 -> v4i16
7710 defm : Lengthen_HalfSingle<"2", "i32", "i16", "4", "i32">; // v2i16 -> v2i32
7712 // Double lengthening - v4i8 -> v4i16 -> v4i32
7713 defm : Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32">;
7714 // v2i8 -> v2i16 -> v2i32
7715 defm : Lengthen_HalfDouble<"2", "i32", "i8", "8", "i16", "4", "i32">;
7716 // v2i16 -> v2i32 -> v2i64
7717 defm : Lengthen_Double<"2", "i64", "i16", "4", "i32", "2", "i64">;
7720 let Predicates = [HasNEON,IsBE] in {
7721 defm : Lengthen_HalfSingle_Big_Endian<"4", "i16", "i8", "8", "i16", "8">; // v4i8 -> v4i16
7722 defm : Lengthen_HalfSingle_Big_Endian<"2", "i32", "i16", "4", "i32", "16">; // v2i16 -> v2i32
7724 // Double lengthening - v4i8 -> v4i16 -> v4i32
7725 defm : Lengthen_Double_Big_Endian<"4", "i32", "i8", "8", "i16", "4", "i32", "8">;
7726 // v2i8 -> v2i16 -> v2i32
7727 defm : Lengthen_HalfDouble_Big_Endian<"2", "i32", "i8", "8", "i16", "4", "i32">;
7728 // v2i16 -> v2i32 -> v2i64
7729 defm : Lengthen_Double_Big_Endian<"2", "i64", "i16", "4", "i32", "2", "i64", "16">;
7732 // Triple lengthening - v2i8 -> v2i16 -> v2i32 -> v2i64
7733 let Predicates = [HasNEON,IsLE] in {
7734 def : Pat<(v2i64 (extloadvi8 addrmode6:$addr)),
7735 (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16
7736 (VLD1LNd16 addrmode6:$addr,
7737 (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>;
7738 def : Pat<(v2i64 (zextloadvi8 addrmode6:$addr)),
7739 (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16
7740 (VLD1LNd16 addrmode6:$addr,
7741 (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>;
7742 def : Pat<(v2i64 (sextloadvi8 addrmode6:$addr)),
7743 (VMOVLsv2i64 (EXTRACT_SUBREG (VMOVLsv4i32 (EXTRACT_SUBREG (VMOVLsv8i16
7744 (VLD1LNd16 addrmode6:$addr,
7745 (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>;
7747 // The following patterns are basically a copy of the patterns above,
7748 // however with an additional VREV16d instruction to convert data
7749 // loaded by VLD1LN into proper vector format in big endian mode.
7750 let Predicates = [HasNEON,IsBE] in {
7751 def : Pat<(v2i64 (extloadvi8 addrmode6:$addr)),
7752 (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16
7753 (!cast<Instruction>("VREV16d8")
7754 (VLD1LNd16 addrmode6:$addr,
7755 (f64 (IMPLICIT_DEF)), (i32 0)))), dsub_0)), dsub_0))>;
7756 def : Pat<(v2i64 (zextloadvi8 addrmode6:$addr)),
7757 (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16
7758 (!cast<Instruction>("VREV16d8")
7759 (VLD1LNd16 addrmode6:$addr,
7760 (f64 (IMPLICIT_DEF)), (i32 0)))), dsub_0)), dsub_0))>;
7761 def : Pat<(v2i64 (sextloadvi8 addrmode6:$addr)),
7762 (VMOVLsv2i64 (EXTRACT_SUBREG (VMOVLsv4i32 (EXTRACT_SUBREG (VMOVLsv8i16
7763 (!cast<Instruction>("VREV16d8")
7764 (VLD1LNd16 addrmode6:$addr,
7765 (f64 (IMPLICIT_DEF)), (i32 0)))), dsub_0)), dsub_0))>;
7768 let Predicates = [HasNEON] in {
7769 def : Pat<(v2i64 (concat_vectors DPR:$Dn, DPR:$Dm)),
7770 (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>;
7771 def : Pat<(v4i32 (concat_vectors DPR:$Dn, DPR:$Dm)),
7772 (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>;
7773 def : Pat<(v8i16 (concat_vectors DPR:$Dn, DPR:$Dm)),
7774 (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>;
7775 def : Pat<(v16i8 (concat_vectors DPR:$Dn, DPR:$Dm)),
7776 (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>;
7777 def : Pat<(v4f32 (concat_vectors DPR:$Dn, DPR:$Dm)),
7778 (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>;
7779 def : Pat<(v8f16 (concat_vectors DPR:$Dn, DPR:$Dm)),
7780 (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>;
7783 //===----------------------------------------------------------------------===//
7784 // Assembler aliases
7787 def : VFP2InstAlias<"fmdhr${p} $Dd, $Rn",
7788 (VSETLNi32 DPR:$Dd, GPR:$Rn, 1, pred:$p)>;
7789 def : VFP2InstAlias<"fmdlr${p} $Dd, $Rn",
7790 (VSETLNi32 DPR:$Dd, GPR:$Rn, 0, pred:$p)>;
7792 // VAND/VBIC/VEOR/VORR accept but do not require a type suffix.
7793 defm : NEONDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm",
7794 (VANDd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
7795 defm : NEONDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm",
7796 (VANDq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
7797 defm : NEONDTAnyInstAlias<"vbic${p}", "$Vd, $Vn, $Vm",
7798 (VBICd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
7799 defm : NEONDTAnyInstAlias<"vbic${p}", "$Vd, $Vn, $Vm",
7800 (VBICq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
7801 defm : NEONDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm",
7802 (VEORd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
7803 defm : NEONDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm",
7804 (VEORq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
7805 defm : NEONDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm",
7806 (VORRd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
7807 defm : NEONDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm",
7808 (VORRq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
7809 // ... two-operand aliases
7810 defm : NEONDTAnyInstAlias<"vand${p}", "$Vdn, $Vm",
7811 (VANDd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
7812 defm : NEONDTAnyInstAlias<"vand${p}", "$Vdn, $Vm",
7813 (VANDq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
7814 defm : NEONDTAnyInstAlias<"veor${p}", "$Vdn, $Vm",
7815 (VEORd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
7816 defm : NEONDTAnyInstAlias<"veor${p}", "$Vdn, $Vm",
7817 (VEORq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
7818 defm : NEONDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm",
7819 (VORRd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
7820 defm : NEONDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm",
7821 (VORRq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
7823 def : NEONInstAlias<"vand${p}.i16 $Vd, $imm",
7824 (VBICiv4i16 DPR:$Vd, nImmSplatNotI16:$imm, pred:$p)>;
7825 def : NEONInstAlias<"vand${p}.i32 $Vd, $imm",
7826 (VBICiv2i32 DPR:$Vd, nImmSplatNotI32:$imm, pred:$p)>;
7827 def : NEONInstAlias<"vand${p}.i16 $Vd, $imm",
7828 (VBICiv8i16 QPR:$Vd, nImmSplatNotI16:$imm, pred:$p)>;
7829 def : NEONInstAlias<"vand${p}.i32 $Vd, $imm",
7830 (VBICiv4i32 QPR:$Vd, nImmSplatNotI32:$imm, pred:$p)>;
7833 // VLD1 single-lane pseudo-instructions. These need special handling for
7834 // the lane index that an InstAlias can't handle, so we use these instead.
7835 def VLD1LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr",
7836 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr,
7838 def VLD1LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr",
7839 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr,
7841 def VLD1LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr",
7842 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr,
7845 def VLD1LNdWB_fixed_Asm_8 :
7846 NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr!",
7847 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr,
7849 def VLD1LNdWB_fixed_Asm_16 :
7850 NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr!",
7851 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr,
7853 def VLD1LNdWB_fixed_Asm_32 :
7854 NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr!",
7855 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr,
7857 def VLD1LNdWB_register_Asm_8 :
7858 NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr, $Rm",
7859 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr,
7860 rGPR:$Rm, pred:$p)>;
7861 def VLD1LNdWB_register_Asm_16 :
7862 NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr, $Rm",
7863 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr,
7864 rGPR:$Rm, pred:$p)>;
7865 def VLD1LNdWB_register_Asm_32 :
7866 NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr, $Rm",
7867 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr,
7868 rGPR:$Rm, pred:$p)>;
7871 // VST1 single-lane pseudo-instructions. These need special handling for
7872 // the lane index that an InstAlias can't handle, so we use these instead.
7873 def VST1LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr",
7874 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr,
7876 def VST1LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr",
7877 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr,
7879 def VST1LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr",
7880 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr,
7883 def VST1LNdWB_fixed_Asm_8 :
7884 NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr!",
7885 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr,
7887 def VST1LNdWB_fixed_Asm_16 :
7888 NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr!",
7889 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr,
7891 def VST1LNdWB_fixed_Asm_32 :
7892 NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr!",
7893 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr,
7895 def VST1LNdWB_register_Asm_8 :
7896 NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr, $Rm",
7897 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr,
7898 rGPR:$Rm, pred:$p)>;
7899 def VST1LNdWB_register_Asm_16 :
7900 NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr, $Rm",
7901 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr,
7902 rGPR:$Rm, pred:$p)>;
7903 def VST1LNdWB_register_Asm_32 :
7904 NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr, $Rm",
7905 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr,
7906 rGPR:$Rm, pred:$p)>;
7908 // VLD2 single-lane pseudo-instructions. These need special handling for
7909 // the lane index that an InstAlias can't handle, so we use these instead.
7910 def VLD2LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr",
7911 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr,
7913 def VLD2LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr",
7914 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr,
7916 def VLD2LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr",
7917 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, pred:$p)>;
7918 def VLD2LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr",
7919 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr,
7921 def VLD2LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr",
7922 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr,
7925 def VLD2LNdWB_fixed_Asm_8 :
7926 NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr!",
7927 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr,
7929 def VLD2LNdWB_fixed_Asm_16 :
7930 NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr!",
7931 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr,
7933 def VLD2LNdWB_fixed_Asm_32 :
7934 NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr!",
7935 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr,
7937 def VLD2LNqWB_fixed_Asm_16 :
7938 NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr!",
7939 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr,
7941 def VLD2LNqWB_fixed_Asm_32 :
7942 NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr!",
7943 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr,
7945 def VLD2LNdWB_register_Asm_8 :
7946 NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr, $Rm",
7947 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr,
7948 rGPR:$Rm, pred:$p)>;
7949 def VLD2LNdWB_register_Asm_16 :
7950 NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr, $Rm",
7951 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr,
7952 rGPR:$Rm, pred:$p)>;
7953 def VLD2LNdWB_register_Asm_32 :
7954 NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr, $Rm",
7955 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr,
7956 rGPR:$Rm, pred:$p)>;
7957 def VLD2LNqWB_register_Asm_16 :
7958 NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr, $Rm",
7959 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr,
7960 rGPR:$Rm, pred:$p)>;
7961 def VLD2LNqWB_register_Asm_32 :
7962 NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr, $Rm",
7963 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr,
7964 rGPR:$Rm, pred:$p)>;
7967 // VST2 single-lane pseudo-instructions. These need special handling for
7968 // the lane index that an InstAlias can't handle, so we use these instead.
7969 def VST2LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr",
7970 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr,
7972 def VST2LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr",
7973 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr,
7975 def VST2LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr",
7976 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr,
7978 def VST2LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr",
7979 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr,
7981 def VST2LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr",
7982 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr,
7985 def VST2LNdWB_fixed_Asm_8 :
7986 NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr!",
7987 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr,
7989 def VST2LNdWB_fixed_Asm_16 :
7990 NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr!",
7991 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr,
7993 def VST2LNdWB_fixed_Asm_32 :
7994 NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr!",
7995 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr,
7997 def VST2LNqWB_fixed_Asm_16 :
7998 NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr!",
7999 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr,
8001 def VST2LNqWB_fixed_Asm_32 :
8002 NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr!",
8003 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr,
8005 def VST2LNdWB_register_Asm_8 :
8006 NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr, $Rm",
8007 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr,
8008 rGPR:$Rm, pred:$p)>;
8009 def VST2LNdWB_register_Asm_16 :
8010 NEONDataTypeAsmPseudoInst<"vst2${p}", ".16","$list, $addr, $Rm",
8011 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr,
8012 rGPR:$Rm, pred:$p)>;
8013 def VST2LNdWB_register_Asm_32 :
8014 NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr, $Rm",
8015 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr,
8016 rGPR:$Rm, pred:$p)>;
8017 def VST2LNqWB_register_Asm_16 :
8018 NEONDataTypeAsmPseudoInst<"vst2${p}", ".16","$list, $addr, $Rm",
8019 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr,
8020 rGPR:$Rm, pred:$p)>;
8021 def VST2LNqWB_register_Asm_32 :
8022 NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr, $Rm",
8023 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr,
8024 rGPR:$Rm, pred:$p)>;
8026 // VLD3 all-lanes pseudo-instructions. These need special handling for
8027 // the lane index that an InstAlias can't handle, so we use these instead.
8028 def VLD3DUPdAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
8029 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
8031 def VLD3DUPdAsm_16: NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
8032 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
8034 def VLD3DUPdAsm_32: NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
8035 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
8037 def VLD3DUPqAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
8038 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
8040 def VLD3DUPqAsm_16: NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
8041 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
8043 def VLD3DUPqAsm_32: NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
8044 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
8047 def VLD3DUPdWB_fixed_Asm_8 :
8048 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!",
8049 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
8051 def VLD3DUPdWB_fixed_Asm_16 :
8052 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
8053 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
8055 def VLD3DUPdWB_fixed_Asm_32 :
8056 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
8057 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
8059 def VLD3DUPqWB_fixed_Asm_8 :
8060 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!",
8061 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
8063 def VLD3DUPqWB_fixed_Asm_16 :
8064 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
8065 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
8067 def VLD3DUPqWB_fixed_Asm_32 :
8068 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
8069 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
8071 def VLD3DUPdWB_register_Asm_8 :
8072 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm",
8073 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
8074 rGPR:$Rm, pred:$p)>;
8075 def VLD3DUPdWB_register_Asm_16 :
8076 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
8077 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
8078 rGPR:$Rm, pred:$p)>;
8079 def VLD3DUPdWB_register_Asm_32 :
8080 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
8081 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
8082 rGPR:$Rm, pred:$p)>;
8083 def VLD3DUPqWB_register_Asm_8 :
8084 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm",
8085 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
8086 rGPR:$Rm, pred:$p)>;
8087 def VLD3DUPqWB_register_Asm_16 :
8088 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
8089 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
8090 rGPR:$Rm, pred:$p)>;
8091 def VLD3DUPqWB_register_Asm_32 :
8092 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
8093 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
8094 rGPR:$Rm, pred:$p)>;
8097 // VLD3 single-lane pseudo-instructions. These need special handling for
8098 // the lane index that an InstAlias can't handle, so we use these instead.
8099 def VLD3LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
8100 (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr,
8102 def VLD3LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
8103 (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr,
8105 def VLD3LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
8106 (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr,
8108 def VLD3LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
8109 (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr,
8111 def VLD3LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
8112 (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr,
8115 def VLD3LNdWB_fixed_Asm_8 :
8116 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!",
8117 (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr,
8119 def VLD3LNdWB_fixed_Asm_16 :
8120 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
8121 (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr,
8123 def VLD3LNdWB_fixed_Asm_32 :
8124 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
8125 (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr,
8127 def VLD3LNqWB_fixed_Asm_16 :
8128 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
8129 (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr,
8131 def VLD3LNqWB_fixed_Asm_32 :
8132 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
8133 (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr,
8135 def VLD3LNdWB_register_Asm_8 :
8136 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm",
8137 (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr,
8138 rGPR:$Rm, pred:$p)>;
8139 def VLD3LNdWB_register_Asm_16 :
8140 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
8141 (ins VecListThreeDHWordIndexed:$list,
8142 addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>;
8143 def VLD3LNdWB_register_Asm_32 :
8144 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
8145 (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr,
8146 rGPR:$Rm, pred:$p)>;
8147 def VLD3LNqWB_register_Asm_16 :
8148 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
8149 (ins VecListThreeQHWordIndexed:$list,
8150 addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>;
8151 def VLD3LNqWB_register_Asm_32 :
8152 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
8153 (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr,
8154 rGPR:$Rm, pred:$p)>;
8156 // VLD3 multiple structure pseudo-instructions. These need special handling for
8157 // the vector operands that the normal instructions don't yet model.
8158 // FIXME: Remove these when the register classes and instructions are updated.
8159 def VLD3dAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
8160 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
8161 def VLD3dAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
8162 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
8163 def VLD3dAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
8164 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
8165 def VLD3qAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
8166 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
8167 def VLD3qAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
8168 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
8169 def VLD3qAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
8170 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
8172 def VLD3dWB_fixed_Asm_8 :
8173 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!",
8174 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
8175 def VLD3dWB_fixed_Asm_16 :
8176 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
8177 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
8178 def VLD3dWB_fixed_Asm_32 :
8179 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
8180 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
8181 def VLD3qWB_fixed_Asm_8 :
8182 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!",
8183 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
8184 def VLD3qWB_fixed_Asm_16 :
8185 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
8186 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
8187 def VLD3qWB_fixed_Asm_32 :
8188 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
8189 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
8190 def VLD3dWB_register_Asm_8 :
8191 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm",
8192 (ins VecListThreeD:$list, addrmode6align64:$addr,
8193 rGPR:$Rm, pred:$p)>;
8194 def VLD3dWB_register_Asm_16 :
8195 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
8196 (ins VecListThreeD:$list, addrmode6align64:$addr,
8197 rGPR:$Rm, pred:$p)>;
8198 def VLD3dWB_register_Asm_32 :
8199 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
8200 (ins VecListThreeD:$list, addrmode6align64:$addr,
8201 rGPR:$Rm, pred:$p)>;
8202 def VLD3qWB_register_Asm_8 :
8203 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm",
8204 (ins VecListThreeQ:$list, addrmode6align64:$addr,
8205 rGPR:$Rm, pred:$p)>;
8206 def VLD3qWB_register_Asm_16 :
8207 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
8208 (ins VecListThreeQ:$list, addrmode6align64:$addr,
8209 rGPR:$Rm, pred:$p)>;
8210 def VLD3qWB_register_Asm_32 :
8211 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
8212 (ins VecListThreeQ:$list, addrmode6align64:$addr,
8213 rGPR:$Rm, pred:$p)>;
8215 // VST3 single-lane pseudo-instructions. These need special handling for
8216 // the lane index that an InstAlias can't handle, so we use these instead.
8217 def VST3LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr",
8218 (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr,
8220 def VST3LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr",
8221 (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr,
8223 def VST3LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr",
8224 (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr,
8226 def VST3LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr",
8227 (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr,
8229 def VST3LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr",
8230 (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr,
8233 def VST3LNdWB_fixed_Asm_8 :
8234 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!",
8235 (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr,
8237 def VST3LNdWB_fixed_Asm_16 :
8238 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!",
8239 (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr,
8241 def VST3LNdWB_fixed_Asm_32 :
8242 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!",
8243 (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr,
8245 def VST3LNqWB_fixed_Asm_16 :
8246 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!",
8247 (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr,
8249 def VST3LNqWB_fixed_Asm_32 :
8250 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!",
8251 (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr,
8253 def VST3LNdWB_register_Asm_8 :
8254 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm",
8255 (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr,
8256 rGPR:$Rm, pred:$p)>;
8257 def VST3LNdWB_register_Asm_16 :
8258 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm",
8259 (ins VecListThreeDHWordIndexed:$list,
8260 addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>;
8261 def VST3LNdWB_register_Asm_32 :
8262 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm",
8263 (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr,
8264 rGPR:$Rm, pred:$p)>;
8265 def VST3LNqWB_register_Asm_16 :
8266 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm",
8267 (ins VecListThreeQHWordIndexed:$list,
8268 addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>;
8269 def VST3LNqWB_register_Asm_32 :
8270 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm",
8271 (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr,
8272 rGPR:$Rm, pred:$p)>;
8275 // VST3 multiple structure pseudo-instructions. These need special handling for
8276 // the vector operands that the normal instructions don't yet model.
8277 // FIXME: Remove these when the register classes and instructions are updated.
8278 def VST3dAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr",
8279 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
8280 def VST3dAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr",
8281 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
8282 def VST3dAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr",
8283 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
8284 def VST3qAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr",
8285 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
8286 def VST3qAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr",
8287 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
8288 def VST3qAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr",
8289 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
8291 def VST3dWB_fixed_Asm_8 :
8292 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!",
8293 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
8294 def VST3dWB_fixed_Asm_16 :
8295 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!",
8296 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
8297 def VST3dWB_fixed_Asm_32 :
8298 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!",
8299 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
8300 def VST3qWB_fixed_Asm_8 :
8301 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!",
8302 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
8303 def VST3qWB_fixed_Asm_16 :
8304 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!",
8305 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
8306 def VST3qWB_fixed_Asm_32 :
8307 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!",
8308 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
8309 def VST3dWB_register_Asm_8 :
8310 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm",
8311 (ins VecListThreeD:$list, addrmode6align64:$addr,
8312 rGPR:$Rm, pred:$p)>;
8313 def VST3dWB_register_Asm_16 :
8314 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm",
8315 (ins VecListThreeD:$list, addrmode6align64:$addr,
8316 rGPR:$Rm, pred:$p)>;
8317 def VST3dWB_register_Asm_32 :
8318 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm",
8319 (ins VecListThreeD:$list, addrmode6align64:$addr,
8320 rGPR:$Rm, pred:$p)>;
8321 def VST3qWB_register_Asm_8 :
8322 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm",
8323 (ins VecListThreeQ:$list, addrmode6align64:$addr,
8324 rGPR:$Rm, pred:$p)>;
8325 def VST3qWB_register_Asm_16 :
8326 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm",
8327 (ins VecListThreeQ:$list, addrmode6align64:$addr,
8328 rGPR:$Rm, pred:$p)>;
8329 def VST3qWB_register_Asm_32 :
8330 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm",
8331 (ins VecListThreeQ:$list, addrmode6align64:$addr,
8332 rGPR:$Rm, pred:$p)>;
8334 // VLD4 all-lanes pseudo-instructions. These need special handling for
8335 // the lane index that an InstAlias can't handle, so we use these instead.
8336 def VLD4DUPdAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
8337 (ins VecListFourDAllLanes:$list, addrmode6dupalign32:$addr,
8339 def VLD4DUPdAsm_16: NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
8340 (ins VecListFourDAllLanes:$list, addrmode6dupalign64:$addr,
8342 def VLD4DUPdAsm_32: NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
8343 (ins VecListFourDAllLanes:$list, addrmode6dupalign64or128:$addr,
8345 def VLD4DUPqAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
8346 (ins VecListFourQAllLanes:$list, addrmode6dupalign32:$addr,
8348 def VLD4DUPqAsm_16: NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
8349 (ins VecListFourQAllLanes:$list, addrmode6dupalign64:$addr,
8351 def VLD4DUPqAsm_32: NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
8352 (ins VecListFourQAllLanes:$list, addrmode6dupalign64or128:$addr,
8355 def VLD4DUPdWB_fixed_Asm_8 :
8356 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!",
8357 (ins VecListFourDAllLanes:$list, addrmode6dupalign32:$addr,
8359 def VLD4DUPdWB_fixed_Asm_16 :
8360 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
8361 (ins VecListFourDAllLanes:$list, addrmode6dupalign64:$addr,
8363 def VLD4DUPdWB_fixed_Asm_32 :
8364 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
8365 (ins VecListFourDAllLanes:$list, addrmode6dupalign64or128:$addr,
8367 def VLD4DUPqWB_fixed_Asm_8 :
8368 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!",
8369 (ins VecListFourQAllLanes:$list, addrmode6dupalign32:$addr,
8371 def VLD4DUPqWB_fixed_Asm_16 :
8372 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
8373 (ins VecListFourQAllLanes:$list, addrmode6dupalign64:$addr,
8375 def VLD4DUPqWB_fixed_Asm_32 :
8376 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
8377 (ins VecListFourQAllLanes:$list, addrmode6dupalign64or128:$addr,
8379 def VLD4DUPdWB_register_Asm_8 :
8380 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm",
8381 (ins VecListFourDAllLanes:$list, addrmode6dupalign32:$addr,
8382 rGPR:$Rm, pred:$p)>;
8383 def VLD4DUPdWB_register_Asm_16 :
8384 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
8385 (ins VecListFourDAllLanes:$list, addrmode6dupalign64:$addr,
8386 rGPR:$Rm, pred:$p)>;
8387 def VLD4DUPdWB_register_Asm_32 :
8388 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
8389 (ins VecListFourDAllLanes:$list,
8390 addrmode6dupalign64or128:$addr, rGPR:$Rm, pred:$p)>;
8391 def VLD4DUPqWB_register_Asm_8 :
8392 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm",
8393 (ins VecListFourQAllLanes:$list, addrmode6dupalign32:$addr,
8394 rGPR:$Rm, pred:$p)>;
8395 def VLD4DUPqWB_register_Asm_16 :
8396 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
8397 (ins VecListFourQAllLanes:$list, addrmode6dupalign64:$addr,
8398 rGPR:$Rm, pred:$p)>;
8399 def VLD4DUPqWB_register_Asm_32 :
8400 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
8401 (ins VecListFourQAllLanes:$list,
8402 addrmode6dupalign64or128:$addr, rGPR:$Rm, pred:$p)>;
8405 // VLD4 single-lane pseudo-instructions. These need special handling for
8406 // the lane index that an InstAlias can't handle, so we use these instead.
8407 def VLD4LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
8408 (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr,
8410 def VLD4LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
8411 (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr,
8413 def VLD4LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
8414 (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr,
8416 def VLD4LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
8417 (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr,
8419 def VLD4LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
8420 (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr,
8423 def VLD4LNdWB_fixed_Asm_8 :
8424 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!",
8425 (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr,
8427 def VLD4LNdWB_fixed_Asm_16 :
8428 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
8429 (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr,
8431 def VLD4LNdWB_fixed_Asm_32 :
8432 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
8433 (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr,
8435 def VLD4LNqWB_fixed_Asm_16 :
8436 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
8437 (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr,
8439 def VLD4LNqWB_fixed_Asm_32 :
8440 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
8441 (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr,
8443 def VLD4LNdWB_register_Asm_8 :
8444 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm",
8445 (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr,
8446 rGPR:$Rm, pred:$p)>;
8447 def VLD4LNdWB_register_Asm_16 :
8448 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
8449 (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr,
8450 rGPR:$Rm, pred:$p)>;
8451 def VLD4LNdWB_register_Asm_32 :
8452 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
8453 (ins VecListFourDWordIndexed:$list,
8454 addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>;
8455 def VLD4LNqWB_register_Asm_16 :
8456 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
8457 (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr,
8458 rGPR:$Rm, pred:$p)>;
8459 def VLD4LNqWB_register_Asm_32 :
8460 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
8461 (ins VecListFourQWordIndexed:$list,
8462 addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>;
8466 // VLD4 multiple structure pseudo-instructions. These need special handling for
8467 // the vector operands that the normal instructions don't yet model.
8468 // FIXME: Remove these when the register classes and instructions are updated.
8469 def VLD4dAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
8470 (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8472 def VLD4dAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
8473 (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8475 def VLD4dAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
8476 (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8478 def VLD4qAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
8479 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8481 def VLD4qAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
8482 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8484 def VLD4qAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
8485 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8488 def VLD4dWB_fixed_Asm_8 :
8489 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!",
8490 (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8492 def VLD4dWB_fixed_Asm_16 :
8493 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
8494 (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8496 def VLD4dWB_fixed_Asm_32 :
8497 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
8498 (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8500 def VLD4qWB_fixed_Asm_8 :
8501 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!",
8502 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8504 def VLD4qWB_fixed_Asm_16 :
8505 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
8506 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8508 def VLD4qWB_fixed_Asm_32 :
8509 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
8510 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8512 def VLD4dWB_register_Asm_8 :
8513 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm",
8514 (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8515 rGPR:$Rm, pred:$p)>;
8516 def VLD4dWB_register_Asm_16 :
8517 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
8518 (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8519 rGPR:$Rm, pred:$p)>;
8520 def VLD4dWB_register_Asm_32 :
8521 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
8522 (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8523 rGPR:$Rm, pred:$p)>;
8524 def VLD4qWB_register_Asm_8 :
8525 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm",
8526 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8527 rGPR:$Rm, pred:$p)>;
8528 def VLD4qWB_register_Asm_16 :
8529 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
8530 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8531 rGPR:$Rm, pred:$p)>;
8532 def VLD4qWB_register_Asm_32 :
8533 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
8534 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8535 rGPR:$Rm, pred:$p)>;
8537 // VST4 single-lane pseudo-instructions. These need special handling for
8538 // the lane index that an InstAlias can't handle, so we use these instead.
8539 def VST4LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr",
8540 (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr,
8542 def VST4LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr",
8543 (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr,
8545 def VST4LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr",
8546 (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr,
8548 def VST4LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr",
8549 (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr,
8551 def VST4LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr",
8552 (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr,
8555 def VST4LNdWB_fixed_Asm_8 :
8556 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!",
8557 (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr,
8559 def VST4LNdWB_fixed_Asm_16 :
8560 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!",
8561 (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr,
8563 def VST4LNdWB_fixed_Asm_32 :
8564 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!",
8565 (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr,
8567 def VST4LNqWB_fixed_Asm_16 :
8568 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!",
8569 (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr,
8571 def VST4LNqWB_fixed_Asm_32 :
8572 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!",
8573 (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr,
8575 def VST4LNdWB_register_Asm_8 :
8576 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm",
8577 (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr,
8578 rGPR:$Rm, pred:$p)>;
8579 def VST4LNdWB_register_Asm_16 :
8580 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm",
8581 (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr,
8582 rGPR:$Rm, pred:$p)>;
8583 def VST4LNdWB_register_Asm_32 :
8584 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm",
8585 (ins VecListFourDWordIndexed:$list,
8586 addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>;
8587 def VST4LNqWB_register_Asm_16 :
8588 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm",
8589 (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr,
8590 rGPR:$Rm, pred:$p)>;
8591 def VST4LNqWB_register_Asm_32 :
8592 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm",
8593 (ins VecListFourQWordIndexed:$list,
8594 addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>;
8597 // VST4 multiple structure pseudo-instructions. These need special handling for
8598 // the vector operands that the normal instructions don't yet model.
8599 // FIXME: Remove these when the register classes and instructions are updated.
8600 def VST4dAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr",
8601 (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8603 def VST4dAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr",
8604 (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8606 def VST4dAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr",
8607 (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8609 def VST4qAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr",
8610 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8612 def VST4qAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr",
8613 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8615 def VST4qAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr",
8616 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8619 def VST4dWB_fixed_Asm_8 :
8620 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!",
8621 (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8623 def VST4dWB_fixed_Asm_16 :
8624 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!",
8625 (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8627 def VST4dWB_fixed_Asm_32 :
8628 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!",
8629 (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8631 def VST4qWB_fixed_Asm_8 :
8632 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!",
8633 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8635 def VST4qWB_fixed_Asm_16 :
8636 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!",
8637 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8639 def VST4qWB_fixed_Asm_32 :
8640 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!",
8641 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8643 def VST4dWB_register_Asm_8 :
8644 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm",
8645 (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8646 rGPR:$Rm, pred:$p)>;
8647 def VST4dWB_register_Asm_16 :
8648 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm",
8649 (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8650 rGPR:$Rm, pred:$p)>;
8651 def VST4dWB_register_Asm_32 :
8652 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm",
8653 (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8654 rGPR:$Rm, pred:$p)>;
8655 def VST4qWB_register_Asm_8 :
8656 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm",
8657 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8658 rGPR:$Rm, pred:$p)>;
8659 def VST4qWB_register_Asm_16 :
8660 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm",
8661 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8662 rGPR:$Rm, pred:$p)>;
8663 def VST4qWB_register_Asm_32 :
8664 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm",
8665 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8666 rGPR:$Rm, pred:$p)>;
8668 // VMOV/VMVN takes an optional datatype suffix
8669 defm : NEONDTAnyInstAlias<"vmov${p}", "$Vd, $Vm",
8670 (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>;
8671 defm : NEONDTAnyInstAlias<"vmov${p}", "$Vd, $Vm",
8672 (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>;
8674 defm : NEONDTAnyInstAlias<"vmvn${p}", "$Vd, $Vm",
8675 (VMVNd DPR:$Vd, DPR:$Vm, pred:$p)>;
8676 defm : NEONDTAnyInstAlias<"vmvn${p}", "$Vd, $Vm",
8677 (VMVNq QPR:$Vd, QPR:$Vm, pred:$p)>;
8679 // VCLT (register) is an assembler alias for VCGT w/ the operands reversed.
8680 // D-register versions.
8681 def : NEONInstAlias<"vcle${p}.s8 $Dd, $Dn, $Dm",
8682 (VCGEsv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8683 def : NEONInstAlias<"vcle${p}.s16 $Dd, $Dn, $Dm",
8684 (VCGEsv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8685 def : NEONInstAlias<"vcle${p}.s32 $Dd, $Dn, $Dm",
8686 (VCGEsv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8687 def : NEONInstAlias<"vcle${p}.u8 $Dd, $Dn, $Dm",
8688 (VCGEuv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8689 def : NEONInstAlias<"vcle${p}.u16 $Dd, $Dn, $Dm",
8690 (VCGEuv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8691 def : NEONInstAlias<"vcle${p}.u32 $Dd, $Dn, $Dm",
8692 (VCGEuv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8693 def : NEONInstAlias<"vcle${p}.f32 $Dd, $Dn, $Dm",
8694 (VCGEfd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8695 let Predicates = [HasNEON, HasFullFP16] in
8696 def : NEONInstAlias<"vcle${p}.f16 $Dd, $Dn, $Dm",
8697 (VCGEhd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8698 // Q-register versions.
8699 def : NEONInstAlias<"vcle${p}.s8 $Qd, $Qn, $Qm",
8700 (VCGEsv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8701 def : NEONInstAlias<"vcle${p}.s16 $Qd, $Qn, $Qm",
8702 (VCGEsv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8703 def : NEONInstAlias<"vcle${p}.s32 $Qd, $Qn, $Qm",
8704 (VCGEsv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8705 def : NEONInstAlias<"vcle${p}.u8 $Qd, $Qn, $Qm",
8706 (VCGEuv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8707 def : NEONInstAlias<"vcle${p}.u16 $Qd, $Qn, $Qm",
8708 (VCGEuv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8709 def : NEONInstAlias<"vcle${p}.u32 $Qd, $Qn, $Qm",
8710 (VCGEuv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8711 def : NEONInstAlias<"vcle${p}.f32 $Qd, $Qn, $Qm",
8712 (VCGEfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8713 let Predicates = [HasNEON, HasFullFP16] in
8714 def : NEONInstAlias<"vcle${p}.f16 $Qd, $Qn, $Qm",
8715 (VCGEhq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8717 // VCLT (register) is an assembler alias for VCGT w/ the operands reversed.
8718 // D-register versions.
8719 def : NEONInstAlias<"vclt${p}.s8 $Dd, $Dn, $Dm",
8720 (VCGTsv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8721 def : NEONInstAlias<"vclt${p}.s16 $Dd, $Dn, $Dm",
8722 (VCGTsv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8723 def : NEONInstAlias<"vclt${p}.s32 $Dd, $Dn, $Dm",
8724 (VCGTsv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8725 def : NEONInstAlias<"vclt${p}.u8 $Dd, $Dn, $Dm",
8726 (VCGTuv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8727 def : NEONInstAlias<"vclt${p}.u16 $Dd, $Dn, $Dm",
8728 (VCGTuv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8729 def : NEONInstAlias<"vclt${p}.u32 $Dd, $Dn, $Dm",
8730 (VCGTuv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8731 def : NEONInstAlias<"vclt${p}.f32 $Dd, $Dn, $Dm",
8732 (VCGTfd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8733 let Predicates = [HasNEON, HasFullFP16] in
8734 def : NEONInstAlias<"vclt${p}.f16 $Dd, $Dn, $Dm",
8735 (VCGThd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8736 // Q-register versions.
8737 def : NEONInstAlias<"vclt${p}.s8 $Qd, $Qn, $Qm",
8738 (VCGTsv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8739 def : NEONInstAlias<"vclt${p}.s16 $Qd, $Qn, $Qm",
8740 (VCGTsv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8741 def : NEONInstAlias<"vclt${p}.s32 $Qd, $Qn, $Qm",
8742 (VCGTsv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8743 def : NEONInstAlias<"vclt${p}.u8 $Qd, $Qn, $Qm",
8744 (VCGTuv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8745 def : NEONInstAlias<"vclt${p}.u16 $Qd, $Qn, $Qm",
8746 (VCGTuv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8747 def : NEONInstAlias<"vclt${p}.u32 $Qd, $Qn, $Qm",
8748 (VCGTuv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8749 def : NEONInstAlias<"vclt${p}.f32 $Qd, $Qn, $Qm",
8750 (VCGTfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8751 let Predicates = [HasNEON, HasFullFP16] in
8752 def : NEONInstAlias<"vclt${p}.f16 $Qd, $Qn, $Qm",
8753 (VCGThq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8755 // VSWP allows, but does not require, a type suffix.
8756 defm : NEONDTAnyInstAlias<"vswp${p}", "$Vd, $Vm",
8757 (VSWPd DPR:$Vd, DPR:$Vm, pred:$p)>;
8758 defm : NEONDTAnyInstAlias<"vswp${p}", "$Vd, $Vm",
8759 (VSWPq QPR:$Vd, QPR:$Vm, pred:$p)>;
8761 // VBIF, VBIT, and VBSL allow, but do not require, a type suffix.
8762 defm : NEONDTAnyInstAlias<"vbif${p}", "$Vd, $Vn, $Vm",
8763 (VBIFd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
8764 defm : NEONDTAnyInstAlias<"vbit${p}", "$Vd, $Vn, $Vm",
8765 (VBITd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
8766 defm : NEONDTAnyInstAlias<"vbsl${p}", "$Vd, $Vn, $Vm",
8767 (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
8768 defm : NEONDTAnyInstAlias<"vbif${p}", "$Vd, $Vn, $Vm",
8769 (VBIFq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
8770 defm : NEONDTAnyInstAlias<"vbit${p}", "$Vd, $Vn, $Vm",
8771 (VBITq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
8772 defm : NEONDTAnyInstAlias<"vbsl${p}", "$Vd, $Vn, $Vm",
8773 (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
8775 // "vmov Rd, #-imm" can be handled via "vmvn".
8776 def : NEONInstAlias<"vmov${p}.i32 $Vd, $imm",
8777 (VMVNv2i32 DPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>;
8778 def : NEONInstAlias<"vmov${p}.i32 $Vd, $imm",
8779 (VMVNv4i32 QPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>;
8780 def : NEONInstAlias<"vmvn${p}.i32 $Vd, $imm",
8781 (VMOVv2i32 DPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>;
8782 def : NEONInstAlias<"vmvn${p}.i32 $Vd, $imm",
8783 (VMOVv4i32 QPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>;
8785 // 'gas' compatibility aliases for quad-word instructions. Strictly speaking,
8786 // these should restrict to just the Q register variants, but the register
8787 // classes are enough to match correctly regardless, so we keep it simple
8788 // and just use MnemonicAlias.
8789 def : NEONMnemonicAlias<"vbicq", "vbic">;
8790 def : NEONMnemonicAlias<"vandq", "vand">;
8791 def : NEONMnemonicAlias<"veorq", "veor">;
8792 def : NEONMnemonicAlias<"vorrq", "vorr">;
8794 def : NEONMnemonicAlias<"vmovq", "vmov">;
8795 def : NEONMnemonicAlias<"vmvnq", "vmvn">;
8796 // Explicit versions for floating point so that the FPImm variants get
8797 // handled early. The parser gets confused otherwise.
8798 def : NEONMnemonicAlias<"vmovq.f32", "vmov.f32">;
8799 def : NEONMnemonicAlias<"vmovq.f64", "vmov.f64">;
8801 def : NEONMnemonicAlias<"vaddq", "vadd">;
8802 def : NEONMnemonicAlias<"vsubq", "vsub">;
8804 def : NEONMnemonicAlias<"vminq", "vmin">;
8805 def : NEONMnemonicAlias<"vmaxq", "vmax">;
8807 def : NEONMnemonicAlias<"vmulq", "vmul">;
8809 def : NEONMnemonicAlias<"vabsq", "vabs">;
8811 def : NEONMnemonicAlias<"vshlq", "vshl">;
8812 def : NEONMnemonicAlias<"vshrq", "vshr">;
8814 def : NEONMnemonicAlias<"vcvtq", "vcvt">;
8816 def : NEONMnemonicAlias<"vcleq", "vcle">;
8817 def : NEONMnemonicAlias<"vceqq", "vceq">;
8819 def : NEONMnemonicAlias<"vzipq", "vzip">;
8820 def : NEONMnemonicAlias<"vswpq", "vswp">;
8822 def : NEONMnemonicAlias<"vrecpeq.f32", "vrecpe.f32">;
8823 def : NEONMnemonicAlias<"vrecpeq.u32", "vrecpe.u32">;
8826 // Alias for loading floating point immediates that aren't representable
8827 // using the vmov.f32 encoding but the bitpattern is representable using
8828 // the .i32 encoding.
8829 def : NEONInstAlias<"vmov${p}.f32 $Vd, $imm",
8830 (VMOVv4i32 QPR:$Vd, nImmVMOVI32:$imm, pred:$p)>;
8831 def : NEONInstAlias<"vmov${p}.f32 $Vd, $imm",
8832 (VMOVv2i32 DPR:$Vd, nImmVMOVI32:$imm, pred:$p)>;