1 //===-- ARMInstrMVE.td - MVE support for ARM ---------------*- tablegen -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file describes the ARM MVE instruction set.
11 //===----------------------------------------------------------------------===//
14 def vpt_mask : Operand<i32> {
15 let PrintMethod = "printVPTMask";
16 let ParserMatchClass = it_mask_asmoperand;
17 let EncoderMethod = "getVPTMaskOpValue";
18 let DecoderMethod = "DecodeVPTMaskOperand";
21 // VPT/VCMP restricted predicate for sign invariant types
22 def pred_restricted_i_asmoperand : AsmOperandClass {
23 let Name = "CondCodeRestrictedI";
24 let RenderMethod = "addITCondCodeOperands";
25 let PredicateMethod = "isITCondCodeRestrictedI";
26 let ParserMethod = "parseITCondCode";
27 let DiagnosticString = "condition code for sign-independent integer "#
28 "comparison must be EQ or NE";
31 // VPT/VCMP restricted predicate for signed types
32 def pred_restricted_s_asmoperand : AsmOperandClass {
33 let Name = "CondCodeRestrictedS";
34 let RenderMethod = "addITCondCodeOperands";
35 let PredicateMethod = "isITCondCodeRestrictedS";
36 let ParserMethod = "parseITCondCode";
37 let DiagnosticString = "condition code for signed integer "#
38 "comparison must be EQ, NE, LT, GT, LE or GE";
41 // VPT/VCMP restricted predicate for unsigned types
42 def pred_restricted_u_asmoperand : AsmOperandClass {
43 let Name = "CondCodeRestrictedU";
44 let RenderMethod = "addITCondCodeOperands";
45 let PredicateMethod = "isITCondCodeRestrictedU";
46 let ParserMethod = "parseITCondCode";
47 let DiagnosticString = "condition code for unsigned integer "#
48 "comparison must be EQ, NE, HS or HI";
51 // VPT/VCMP restricted predicate for floating point
52 def pred_restricted_fp_asmoperand : AsmOperandClass {
53 let Name = "CondCodeRestrictedFP";
54 let RenderMethod = "addITCondCodeOperands";
55 let PredicateMethod = "isITCondCodeRestrictedFP";
56 let ParserMethod = "parseITCondCode";
57 let DiagnosticString = "condition code for floating-point "#
58 "comparison must be EQ, NE, LT, GT, LE or GE";
61 class VCMPPredicateOperand : Operand<i32>;
63 def pred_basic_i : VCMPPredicateOperand {
64 let PrintMethod = "printMandatoryRestrictedPredicateOperand";
65 let ParserMatchClass = pred_restricted_i_asmoperand;
66 let DecoderMethod = "DecodeRestrictedIPredicateOperand";
67 let EncoderMethod = "getRestrictedCondCodeOpValue";
70 def pred_basic_u : VCMPPredicateOperand {
71 let PrintMethod = "printMandatoryRestrictedPredicateOperand";
72 let ParserMatchClass = pred_restricted_u_asmoperand;
73 let DecoderMethod = "DecodeRestrictedUPredicateOperand";
74 let EncoderMethod = "getRestrictedCondCodeOpValue";
77 def pred_basic_s : VCMPPredicateOperand {
78 let PrintMethod = "printMandatoryRestrictedPredicateOperand";
79 let ParserMatchClass = pred_restricted_s_asmoperand;
80 let DecoderMethod = "DecodeRestrictedSPredicateOperand";
81 let EncoderMethod = "getRestrictedCondCodeOpValue";
84 def pred_basic_fp : VCMPPredicateOperand {
85 let PrintMethod = "printMandatoryRestrictedPredicateOperand";
86 let ParserMatchClass = pred_restricted_fp_asmoperand;
87 let DecoderMethod = "DecodeRestrictedFPPredicateOperand";
88 let EncoderMethod = "getRestrictedCondCodeOpValue";
91 // Register list operands for interleaving load/stores
92 def VecList2QAsmOperand : AsmOperandClass {
93 let Name = "VecListTwoMQ";
94 let ParserMethod = "parseVectorList";
95 let RenderMethod = "addMVEVecListOperands";
96 let DiagnosticString = "operand must be a list of two consecutive "#
97 "q-registers in range [q0,q7]";
100 def VecList2Q : RegisterOperand<MQQPR, "printMVEVectorListTwoQ"> {
101 let ParserMatchClass = VecList2QAsmOperand;
102 let PrintMethod = "printMVEVectorList<2>";
105 def VecList4QAsmOperand : AsmOperandClass {
106 let Name = "VecListFourMQ";
107 let ParserMethod = "parseVectorList";
108 let RenderMethod = "addMVEVecListOperands";
109 let DiagnosticString = "operand must be a list of four consecutive "#
110 "q-registers in range [q0,q7]";
113 def VecList4Q : RegisterOperand<MQQQQPR, "printMVEVectorListFourQ"> {
114 let ParserMatchClass = VecList4QAsmOperand;
115 let PrintMethod = "printMVEVectorList<4>";
118 // taddrmode_imm7 := reg[r0-r7] +/- (imm7 << shift)
119 class TMemImm7ShiftOffsetAsmOperand<int shift> : AsmOperandClass {
120 let Name = "TMemImm7Shift"#shift#"Offset";
121 let PredicateMethod = "isMemImm7ShiftedOffset<"#shift#",ARM::tGPRRegClassID>";
122 let RenderMethod = "addMemImmOffsetOperands";
125 class taddrmode_imm7<int shift> : MemOperand,
126 ComplexPattern<i32, 2, "SelectTAddrModeImm7<"#shift#">", []> {
127 let ParserMatchClass = TMemImm7ShiftOffsetAsmOperand<shift>;
128 // They are printed the same way as the T2 imm8 version
129 let PrintMethod = "printT2AddrModeImm8Operand<false>";
130 // This can also be the same as the T2 version.
131 let EncoderMethod = "getT2AddrModeImmOpValue<7,"#shift#">";
132 let DecoderMethod = "DecodeTAddrModeImm7<"#shift#">";
133 let MIOperandInfo = (ops tGPR:$base, i32imm:$offsimm);
136 // t2addrmode_imm7 := reg +/- (imm7)
137 class MemImm7ShiftOffsetAsmOperand<int shift> : AsmOperandClass {
138 let Name = "MemImm7Shift"#shift#"Offset";
139 let PredicateMethod = "isMemImm7ShiftedOffset<" # shift #
140 ",ARM::GPRnopcRegClassID>";
141 let RenderMethod = "addMemImmOffsetOperands";
144 def MemImm7Shift0OffsetAsmOperand : MemImm7ShiftOffsetAsmOperand<0>;
145 def MemImm7Shift1OffsetAsmOperand : MemImm7ShiftOffsetAsmOperand<1>;
146 def MemImm7Shift2OffsetAsmOperand : MemImm7ShiftOffsetAsmOperand<2>;
147 class T2AddrMode_Imm7<int shift> : MemOperand,
148 ComplexPattern<i32, 2, "SelectT2AddrModeImm7<"#shift#">", []> {
149 let EncoderMethod = "getT2AddrModeImmOpValue<7,"#shift#">";
150 let DecoderMethod = "DecodeT2AddrModeImm7<"#shift#", 0>";
151 let ParserMatchClass =
152 !cast<AsmOperandClass>("MemImm7Shift"#shift#"OffsetAsmOperand");
153 let MIOperandInfo = (ops GPRnopc:$base, i32imm:$offsimm);
156 class t2addrmode_imm7<int shift> : T2AddrMode_Imm7<shift> {
157 // They are printed the same way as the imm8 version
158 let PrintMethod = "printT2AddrModeImm8Operand<false>";
161 class MemImm7ShiftOffsetWBAsmOperand<int shift> : AsmOperandClass {
162 let Name = "MemImm7Shift"#shift#"OffsetWB";
163 let PredicateMethod = "isMemImm7ShiftedOffset<" # shift #
164 ",ARM::rGPRRegClassID>";
165 let RenderMethod = "addMemImmOffsetOperands";
168 def MemImm7Shift0OffsetWBAsmOperand : MemImm7ShiftOffsetWBAsmOperand<0>;
169 def MemImm7Shift1OffsetWBAsmOperand : MemImm7ShiftOffsetWBAsmOperand<1>;
170 def MemImm7Shift2OffsetWBAsmOperand : MemImm7ShiftOffsetWBAsmOperand<2>;
172 class t2addrmode_imm7_pre<int shift> : T2AddrMode_Imm7<shift> {
173 // They are printed the same way as the imm8 version
174 let PrintMethod = "printT2AddrModeImm8Operand<true>";
175 let ParserMatchClass =
176 !cast<AsmOperandClass>("MemImm7Shift"#shift#"OffsetWBAsmOperand");
177 let DecoderMethod = "DecodeT2AddrModeImm7<"#shift#", 1>";
178 let MIOperandInfo = (ops rGPR:$base, i32imm:$offsim);
181 class t2am_imm7shiftOffsetAsmOperand<int shift>
182 : AsmOperandClass { let Name = "Imm7Shift"#shift; }
183 def t2am_imm7shift0OffsetAsmOperand : t2am_imm7shiftOffsetAsmOperand<0>;
184 def t2am_imm7shift1OffsetAsmOperand : t2am_imm7shiftOffsetAsmOperand<1>;
185 def t2am_imm7shift2OffsetAsmOperand : t2am_imm7shiftOffsetAsmOperand<2>;
187 class t2am_imm7_offset<int shift> : MemOperand,
188 ComplexPattern<i32, 1, "SelectT2AddrModeImm7Offset<"#shift#">",
189 [], [SDNPWantRoot]> {
190 // They are printed the same way as the imm8 version
191 let PrintMethod = "printT2AddrModeImm8OffsetOperand";
192 let ParserMatchClass =
193 !cast<AsmOperandClass>("t2am_imm7shift"#shift#"OffsetAsmOperand");
194 let EncoderMethod = "getT2ScaledImmOpValue<7,"#shift#">";
195 let DecoderMethod = "DecodeT2Imm7<"#shift#">";
198 // Operands for gather/scatter loads of the form [Rbase, Qoffsets]
199 class MemRegRQOffsetAsmOperand<int shift> : AsmOperandClass {
200 let Name = "MemRegRQS"#shift#"Offset";
201 let PredicateMethod = "isMemRegRQOffset<"#shift#">";
202 let RenderMethod = "addMemRegRQOffsetOperands";
205 def MemRegRQS0OffsetAsmOperand : MemRegRQOffsetAsmOperand<0>;
206 def MemRegRQS1OffsetAsmOperand : MemRegRQOffsetAsmOperand<1>;
207 def MemRegRQS2OffsetAsmOperand : MemRegRQOffsetAsmOperand<2>;
208 def MemRegRQS3OffsetAsmOperand : MemRegRQOffsetAsmOperand<3>;
210 // mve_addr_rq_shift := reg + vreg{ << UXTW #shift}
211 class mve_addr_rq_shift<int shift> : MemOperand {
212 let EncoderMethod = "getMveAddrModeRQOpValue";
213 let PrintMethod = "printMveAddrModeRQOperand<"#shift#">";
214 let ParserMatchClass =
215 !cast<AsmOperandClass>("MemRegRQS"#shift#"OffsetAsmOperand");
216 let DecoderMethod = "DecodeMveAddrModeRQ";
217 let MIOperandInfo = (ops GPRnopc:$base, MQPR:$offsreg);
220 class MemRegQOffsetAsmOperand<int shift> : AsmOperandClass {
221 let Name = "MemRegQS"#shift#"Offset";
222 let PredicateMethod = "isMemRegQOffset<"#shift#">";
223 let RenderMethod = "addMemImmOffsetOperands";
226 def MemRegQS2OffsetAsmOperand : MemRegQOffsetAsmOperand<2>;
227 def MemRegQS3OffsetAsmOperand : MemRegQOffsetAsmOperand<3>;
229 // mve_addr_q_shift := vreg {+ #imm7s2/4}
230 class mve_addr_q_shift<int shift> : MemOperand {
231 let EncoderMethod = "getMveAddrModeQOpValue<"#shift#">";
232 // Can be printed same way as other reg + imm operands
233 let PrintMethod = "printT2AddrModeImm8Operand<false>";
234 let ParserMatchClass =
235 !cast<AsmOperandClass>("MemRegQS"#shift#"OffsetAsmOperand");
236 let DecoderMethod = "DecodeMveAddrModeQ<"#shift#">";
237 let MIOperandInfo = (ops MQPR:$base, i32imm:$imm);
240 // A family of classes wrapping up information about the vector types
242 class MVEVectorVTInfo<ValueType vec, ValueType dblvec,
243 ValueType pred, ValueType dblpred,
244 bits<2> size, string suffixletter, bit unsigned> {
245 // The LLVM ValueType representing the vector, so we can use it in
249 // The LLVM ValueType representing a vector with elements double the size
250 // of those in Vec, so we can use it in ISel patterns. It is up to the
251 // invoker of this class to ensure that this is a correct choice.
252 ValueType DblVec = dblvec;
254 // An LLVM ValueType representing a corresponding vector of
255 // predicate bits, for use in ISel patterns that handle an IR
256 // intrinsic describing the predicated form of the instruction.
258 // Usually, for a vector of N things, this will be vNi1. But for
259 // vectors of 2 values, we make an exception, and use v4i1 instead
260 // of v2i1. Rationale: MVE codegen doesn't support doing all the
261 // auxiliary operations on v2i1 (vector shuffles etc), and also,
262 // there's no MVE compare instruction that will _generate_ v2i1
264 ValueType Pred = pred;
266 // Same as Pred but for DblVec rather than Vec.
267 ValueType DblPred = dblpred;
269 // The most common representation of the vector element size in MVE
270 // instruction encodings: a 2-bit value V representing an (8<<V)-bit
274 // For vectors explicitly mentioning a signedness of integers: 0 for
275 // signed and 1 for unsigned. For anything else, undefined.
276 bit Unsigned = unsigned;
278 // The number of bits in a vector element, in integer form.
279 int LaneBits = !shl(8, Size);
281 // The suffix used in assembly language on an instruction operating
282 // on this lane if it only cares about number of bits.
283 string BitsSuffix = !if(!eq(suffixletter, "p"),
284 !if(!eq(unsigned, 0b0), "8", "16"),
285 !cast<string>(LaneBits));
287 // The suffix used on an instruction that mentions the whole type.
288 string Suffix = suffixletter # BitsSuffix;
290 // The letter part of the suffix only.
291 string SuffixLetter = suffixletter;
294 // Integer vector types that don't treat signed and unsigned differently.
295 def MVE_v16i8 : MVEVectorVTInfo<v16i8, v8i16, v16i1, v8i1, 0b00, "i", ?>;
296 def MVE_v8i16 : MVEVectorVTInfo<v8i16, v4i32, v8i1, v4i1, 0b01, "i", ?>;
297 def MVE_v4i32 : MVEVectorVTInfo<v4i32, v2i64, v4i1, v4i1, 0b10, "i", ?>;
298 def MVE_v2i64 : MVEVectorVTInfo<v2i64, ?, v4i1, ?, 0b11, "i", ?>;
300 // Explicitly signed and unsigned integer vectors. They map to the
301 // same set of LLVM ValueTypes as above, but are represented
302 // differently in assembly and instruction encodings.
303 def MVE_v16s8 : MVEVectorVTInfo<v16i8, v8i16, v16i1, v8i1, 0b00, "s", 0b0>;
304 def MVE_v8s16 : MVEVectorVTInfo<v8i16, v4i32, v8i1, v4i1, 0b01, "s", 0b0>;
305 def MVE_v4s32 : MVEVectorVTInfo<v4i32, v2i64, v4i1, v4i1, 0b10, "s", 0b0>;
306 def MVE_v2s64 : MVEVectorVTInfo<v2i64, ?, v4i1, ?, 0b11, "s", 0b0>;
307 def MVE_v16u8 : MVEVectorVTInfo<v16i8, v8i16, v16i1, v8i1, 0b00, "u", 0b1>;
308 def MVE_v8u16 : MVEVectorVTInfo<v8i16, v4i32, v8i1, v4i1, 0b01, "u", 0b1>;
309 def MVE_v4u32 : MVEVectorVTInfo<v4i32, v2i64, v4i1, v4i1, 0b10, "u", 0b1>;
310 def MVE_v2u64 : MVEVectorVTInfo<v2i64, ?, v4i1, ?, 0b11, "u", 0b1>;
313 def MVE_v8f16 : MVEVectorVTInfo<v8f16, v4f32, v8i1, v4i1, 0b01, "f", ?>;
314 def MVE_v4f32 : MVEVectorVTInfo<v4f32, v2f64, v4i1, v4i1, 0b10, "f", ?>;
315 def MVE_v2f64 : MVEVectorVTInfo<v2f64, ?, v4i1, ?, 0b11, "f", ?>;
317 // Polynomial vector types.
318 def MVE_v16p8 : MVEVectorVTInfo<v16i8, v8i16, v16i1, v8i1, 0b11, "p", 0b0>;
319 def MVE_v8p16 : MVEVectorVTInfo<v8i16, v4i32, v8i1, v4i1, 0b11, "p", 0b1>;
321 multiclass MVE_TwoOpPattern<MVEVectorVTInfo VTI, SDPatternOperator Op, Intrinsic PredInt,
322 dag PredOperands, Instruction Inst,
323 SDPatternOperator IdentityVec = null_frag> {
325 def : Pat<(VTI.Vec (Op (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn))),
326 (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>;
328 // Predicated with select
329 if !ne(VTI.Size, 0b11) then {
330 def : Pat<(VTI.Vec (vselect (VTI.Pred VCCR:$mask),
331 (VTI.Vec (Op (VTI.Vec MQPR:$Qm),
332 (VTI.Vec MQPR:$Qn))),
333 (VTI.Vec MQPR:$inactive))),
334 (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
335 ARMVCCThen, (VTI.Pred VCCR:$mask),
336 (VTI.Vec MQPR:$inactive)))>;
338 // Optionally with the select folded through the op
339 def : Pat<(VTI.Vec (Op (VTI.Vec MQPR:$Qm),
340 (VTI.Vec (vselect (VTI.Pred VCCR:$mask),
342 (VTI.Vec IdentityVec))))),
343 (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
344 ARMVCCThen, (VTI.Pred VCCR:$mask),
345 (VTI.Vec MQPR:$Qm)))>;
348 // Predicated with intrinsic
349 def : Pat<(VTI.Vec !con((PredInt (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)),
351 (? (VTI.Pred VCCR:$mask), (VTI.Vec MQPR:$inactive)))),
352 (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
353 ARMVCCThen, (VTI.Pred VCCR:$mask),
354 (VTI.Vec MQPR:$inactive)))>;
357 multiclass MVE_TwoOpPatternDup<MVEVectorVTInfo VTI, SDPatternOperator Op, Intrinsic PredInt,
358 dag PredOperands, Instruction Inst,
359 SDPatternOperator IdentityVec = null_frag> {
361 def : Pat<(VTI.Vec (Op (VTI.Vec MQPR:$Qm), (VTI.Vec (ARMvdup rGPR:$Rn)))),
362 (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), rGPR:$Rn))>;
364 // Predicated with select
365 if !ne(VTI.Size, 0b11) then {
366 def : Pat<(VTI.Vec (vselect (VTI.Pred VCCR:$mask),
367 (VTI.Vec (Op (VTI.Vec MQPR:$Qm),
368 (VTI.Vec (ARMvdup rGPR:$Rn)))),
369 (VTI.Vec MQPR:$inactive))),
370 (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), rGPR:$Rn,
371 ARMVCCThen, (VTI.Pred VCCR:$mask),
372 (VTI.Vec MQPR:$inactive)))>;
374 // Optionally with the select folded through the op
375 def : Pat<(VTI.Vec (Op (VTI.Vec MQPR:$Qm),
376 (VTI.Vec (vselect (VTI.Pred VCCR:$mask),
378 (VTI.Vec IdentityVec))))),
379 (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), rGPR:$Rn,
380 ARMVCCThen, (VTI.Pred VCCR:$mask),
381 (VTI.Vec MQPR:$Qm)))>;
384 // Predicated with intrinsic
385 def : Pat<(VTI.Vec !con((PredInt (VTI.Vec MQPR:$Qm), (VTI.Vec (ARMvdup rGPR:$Rn))),
387 (? (VTI.Pred VCCR:$mask), (VTI.Vec MQPR:$inactive)))),
388 (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), rGPR:$Rn,
389 ARMVCCThen, (VTI.Pred VCCR:$mask),
390 (VTI.Vec MQPR:$inactive)))>;
393 // --------- Start of base classes for the instructions themselves
395 class MVE_MI<dag oops, dag iops, InstrItinClass itin, string asm,
396 string ops, string cstr, list<dag> pattern>
397 : Thumb2XI<oops, iops, AddrModeNone, 4, itin, !strconcat(asm, "\t", ops), cstr,
399 Requires<[HasMVEInt]> {
401 let DecoderNamespace = "MVE";
404 // MVE_p is used for most predicated instructions, to add the cluster
405 // of input operands that provides the VPT suffix (none, T or E) and
406 // the input predicate register.
407 class MVE_p<dag oops, dag iops, InstrItinClass itin, string iname,
408 string suffix, string ops, vpred_ops vpred, string cstr,
409 list<dag> pattern=[]>
410 : MVE_MI<oops, !con(iops, (ins vpred:$vp)), itin,
411 // If the instruction has a suffix, like vadd.f32, then the
412 // VPT predication suffix goes before the dot, so the full
413 // name has to be "vadd${vp}.f32".
414 !strconcat(iname, "${vp}",
415 !if(!eq(suffix, ""), "", !strconcat(".", suffix))),
416 ops, !strconcat(cstr, vpred.vpred_constraint), pattern> {
417 let Inst{31-29} = 0b111;
418 let Inst{27-26} = 0b11;
421 class MVE_f<dag oops, dag iops, InstrItinClass itin, string iname,
422 string suffix, string ops, vpred_ops vpred, string cstr,
423 list<dag> pattern=[]>
424 : MVE_p<oops, iops, itin, iname, suffix, ops, vpred, cstr, pattern> {
425 let Predicates = [HasMVEFloat];
428 class MVE_MI_with_pred<dag oops, dag iops, InstrItinClass itin, string asm,
429 string ops, string cstr, list<dag> pattern>
430 : Thumb2I<oops, iops, AddrModeNone, 4, itin, asm, !strconcat("\t", ops), cstr,
432 Requires<[HasV8_1MMainline, HasMVEInt]> {
434 let DecoderNamespace = "MVE";
437 class MVE_VMOV_lane_base<dag oops, dag iops, InstrItinClass itin, string asm,
438 string suffix, string ops, string cstr,
440 : Thumb2I<oops, iops, AddrModeNone, 4, itin, asm,
441 !if(!eq(suffix, ""), "", "." # suffix) # "\t" # ops,
443 Requires<[HasV8_1MMainline, HasMVEInt]> {
445 let DecoderNamespace = "MVE";
448 class MVE_ScalarShift<string iname, dag oops, dag iops, string asm, string cstr,
449 list<dag> pattern=[]>
450 : MVE_MI_with_pred<oops, iops, NoItinerary, iname, asm, cstr, pattern> {
451 let Inst{31-20} = 0b111010100101;
453 let validForTailPredication=1;
456 class MVE_ScalarShiftSingleReg<string iname, dag iops, string asm, string cstr,
457 list<dag> pattern=[]>
458 : MVE_ScalarShift<iname, (outs rGPR:$RdaDest), iops, asm, cstr, pattern> {
461 let Inst{19-16} = RdaDest{3-0};
464 class MVE_ScalarShiftSRegImm<string iname, bits<2> op5_4>
465 : MVE_ScalarShiftSingleReg<iname, (ins rGPR:$RdaSrc, long_shift:$imm),
466 "$RdaSrc, $imm", "$RdaDest = $RdaSrc",
468 (i32 (!cast<Intrinsic>("int_arm_mve_" # iname)
469 (i32 rGPR:$RdaSrc), (i32 imm:$imm))))]> {
473 let Inst{14-12} = imm{4-2};
474 let Inst{11-8} = 0b1111;
475 let Inst{7-6} = imm{1-0};
476 let Inst{5-4} = op5_4{1-0};
477 let Inst{3-0} = 0b1111;
480 def MVE_SQSHL : MVE_ScalarShiftSRegImm<"sqshl", 0b11>;
481 def MVE_SRSHR : MVE_ScalarShiftSRegImm<"srshr", 0b10>;
482 def MVE_UQSHL : MVE_ScalarShiftSRegImm<"uqshl", 0b00>;
483 def MVE_URSHR : MVE_ScalarShiftSRegImm<"urshr", 0b01>;
485 class MVE_ScalarShiftSRegReg<string iname, bits<2> op5_4>
486 : MVE_ScalarShiftSingleReg<iname, (ins rGPR:$RdaSrc, rGPR:$Rm),
487 "$RdaSrc, $Rm", "$RdaDest = $RdaSrc",
489 (i32 (!cast<Intrinsic>("int_arm_mve_" # iname)
490 (i32 rGPR:$RdaSrc), (i32 rGPR:$Rm))))]> {
493 let Inst{15-12} = Rm{3-0};
494 let Inst{11-8} = 0b1111;
495 let Inst{7-6} = 0b00;
496 let Inst{5-4} = op5_4{1-0};
497 let Inst{3-0} = 0b1101;
499 let Unpredictable{8-6} = 0b111;
502 def MVE_SQRSHR : MVE_ScalarShiftSRegReg<"sqrshr", 0b10>;
503 def MVE_UQRSHL : MVE_ScalarShiftSRegReg<"uqrshl", 0b00>;
505 class MVE_ScalarShiftDoubleReg<string iname, dag iops, string asm,
506 string cstr, list<dag> pattern=[]>
507 : MVE_ScalarShift<iname, (outs tGPREven:$RdaLo, tGPROdd:$RdaHi),
508 iops, asm, cstr, pattern> {
512 let Inst{19-17} = RdaLo{3-1};
513 let Inst{11-9} = RdaHi{3-1};
515 let hasSideEffects = 0;
518 class MVE_ScalarShiftDRegImm<string iname, bits<2> op5_4, bit op16,
519 list<dag> pattern=[]>
520 : MVE_ScalarShiftDoubleReg<
521 iname, (ins tGPREven:$RdaLo_src, tGPROdd:$RdaHi_src, long_shift:$imm),
522 "$RdaLo, $RdaHi, $imm", "$RdaLo = $RdaLo_src,$RdaHi = $RdaHi_src",
528 let Inst{14-12} = imm{4-2};
529 let Inst{7-6} = imm{1-0};
530 let Inst{5-4} = op5_4{1-0};
531 let Inst{3-0} = 0b1111;
534 class MVE_ScalarShiftDRegRegBase<string iname, dag iops, string asm,
535 bit op5, bit op16, list<dag> pattern=[]>
536 : MVE_ScalarShiftDoubleReg<
537 iname, iops, asm, "@earlyclobber $RdaHi,@earlyclobber $RdaLo,"
538 "$RdaLo = $RdaLo_src,$RdaHi = $RdaHi_src",
543 let Inst{15-12} = Rm{3-0};
547 let Inst{3-0} = 0b1101;
549 // Custom decoder method because of the following overlapping encodings:
552 // SQRSHRL and SQRSHR
553 // UQRSHLL and UQRSHL
554 let DecoderMethod = "DecodeMVEOverlappingLongShift";
557 class MVE_ScalarShiftDRegReg<string iname, bit op5, list<dag> pattern=[]>
558 : MVE_ScalarShiftDRegRegBase<
559 iname, (ins tGPREven:$RdaLo_src, tGPROdd:$RdaHi_src, rGPR:$Rm),
560 "$RdaLo, $RdaHi, $Rm", op5, 0b0, pattern> {
565 class MVE_ScalarShiftDRegRegWithSat<string iname, bit op5, list<dag> pattern=[]>
566 : MVE_ScalarShiftDRegRegBase<
567 iname, (ins tGPREven:$RdaLo_src, tGPROdd:$RdaHi_src, rGPR:$Rm, saturateop:$sat),
568 "$RdaLo, $RdaHi, $sat, $Rm", op5, 0b1, pattern> {
574 def MVE_ASRLr : MVE_ScalarShiftDRegReg<"asrl", 0b1, [(set tGPREven:$RdaLo, tGPROdd:$RdaHi,
575 (ARMasrl tGPREven:$RdaLo_src,
576 tGPROdd:$RdaHi_src, rGPR:$Rm))]>;
577 def MVE_ASRLi : MVE_ScalarShiftDRegImm<"asrl", 0b10, ?, [(set tGPREven:$RdaLo, tGPROdd:$RdaHi,
578 (ARMasrl tGPREven:$RdaLo_src,
579 tGPROdd:$RdaHi_src, (i32 long_shift:$imm)))]>;
580 def MVE_LSLLr : MVE_ScalarShiftDRegReg<"lsll", 0b0, [(set tGPREven:$RdaLo, tGPROdd:$RdaHi,
581 (ARMlsll tGPREven:$RdaLo_src,
582 tGPROdd:$RdaHi_src, rGPR:$Rm))]>;
583 def MVE_LSLLi : MVE_ScalarShiftDRegImm<"lsll", 0b00, ?, [(set tGPREven:$RdaLo, tGPROdd:$RdaHi,
584 (ARMlsll tGPREven:$RdaLo_src,
585 tGPROdd:$RdaHi_src, (i32 long_shift:$imm)))]>;
586 def MVE_LSRL : MVE_ScalarShiftDRegImm<"lsrl", 0b01, ?, [(set tGPREven:$RdaLo, tGPROdd:$RdaHi,
587 (ARMlsrl tGPREven:$RdaLo_src,
588 tGPROdd:$RdaHi_src, (i32 long_shift:$imm)))]>;
590 def MVE_SQRSHRL : MVE_ScalarShiftDRegRegWithSat<"sqrshrl", 0b1>;
591 def MVE_SQSHLL : MVE_ScalarShiftDRegImm<"sqshll", 0b11, 0b1>;
592 def MVE_SRSHRL : MVE_ScalarShiftDRegImm<"srshrl", 0b10, 0b1>;
594 def MVE_UQRSHLL : MVE_ScalarShiftDRegRegWithSat<"uqrshll", 0b0>;
595 def MVE_UQSHLL : MVE_ScalarShiftDRegImm<"uqshll", 0b00, 0b1>;
596 def MVE_URSHRL : MVE_ScalarShiftDRegImm<"urshrl", 0b01, 0b1>;
598 // start of mve_rDest instructions
600 class MVE_rDest<dag oops, dag iops, InstrItinClass itin,
601 string iname, string suffix,
602 string ops, string cstr, list<dag> pattern=[]>
603 // Always use vpred_n and not vpred_r: with the output register being
604 // a GPR and not a vector register, there can't be any question of
605 // what to put in its inactive lanes.
606 : MVE_p<oops, iops, itin, iname, suffix, ops, vpred_n, cstr, pattern> {
608 let Inst{25-23} = 0b101;
609 let Inst{11-9} = 0b111;
613 class MVE_VABAV<string suffix, bit U, bits<2> size>
614 : MVE_rDest<(outs rGPR:$Rda), (ins rGPR:$Rda_src, MQPR:$Qn, MQPR:$Qm),
615 NoItinerary, "vabav", suffix, "$Rda, $Qn, $Qm", "$Rda = $Rda_src",
623 let Inst{21-20} = size{1-0};
624 let Inst{19-17} = Qn{2-0};
626 let Inst{15-12} = Rda{3-0};
631 let Inst{3-1} = Qm{2-0};
633 let horizontalReduction = 1;
636 multiclass MVE_VABAV_m<MVEVectorVTInfo VTI> {
637 def "" : MVE_VABAV<VTI.Suffix, VTI.Unsigned, VTI.Size>;
638 defvar Inst = !cast<Instruction>(NAME);
640 let Predicates = [HasMVEInt] in {
641 def : Pat<(i32 (int_arm_mve_vabav
644 (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm))),
645 (i32 (Inst (i32 rGPR:$Rda_src),
646 (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm)))>;
648 def : Pat<(i32 (int_arm_mve_vabav_predicated
651 (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm),
652 (VTI.Pred VCCR:$mask))),
653 (i32 (Inst (i32 rGPR:$Rda_src),
654 (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm),
655 ARMVCCThen, (VTI.Pred VCCR:$mask)))>;
659 defm MVE_VABAVs8 : MVE_VABAV_m<MVE_v16s8>;
660 defm MVE_VABAVs16 : MVE_VABAV_m<MVE_v8s16>;
661 defm MVE_VABAVs32 : MVE_VABAV_m<MVE_v4s32>;
662 defm MVE_VABAVu8 : MVE_VABAV_m<MVE_v16u8>;
663 defm MVE_VABAVu16 : MVE_VABAV_m<MVE_v8u16>;
664 defm MVE_VABAVu32 : MVE_VABAV_m<MVE_v4u32>;
666 class MVE_VADDV<string iname, string suffix, dag iops, string cstr,
667 bit A, bit U, bits<2> size, list<dag> pattern=[]>
668 : MVE_rDest<(outs tGPREven:$Rda), iops, NoItinerary,
669 iname, suffix, "$Rda, $Qm", cstr, pattern> {
674 let Inst{22-20} = 0b111;
675 let Inst{19-18} = size{1-0};
676 let Inst{17-16} = 0b01;
677 let Inst{15-13} = Rda{3-1};
679 let Inst{8-6} = 0b100;
681 let Inst{3-1} = Qm{2-0};
683 let horizontalReduction = 1;
684 let validForTailPredication = 1;
687 def SDTVecReduceP : SDTypeProfile<1, 2, [ // VADDLVp
688 SDTCisInt<0>, SDTCisVec<1>, SDTCisVec<2>
690 def ARMVADDVs : SDNode<"ARMISD::VADDVs", SDTVecReduce>;
691 def ARMVADDVu : SDNode<"ARMISD::VADDVu", SDTVecReduce>;
692 def ARMVADDVps : SDNode<"ARMISD::VADDVps", SDTVecReduceP>;
693 def ARMVADDVpu : SDNode<"ARMISD::VADDVpu", SDTVecReduceP>;
695 multiclass MVE_VADDV_A<MVEVectorVTInfo VTI> {
696 def acc : MVE_VADDV<"vaddva", VTI.Suffix,
697 (ins tGPREven:$Rda_src, MQPR:$Qm), "$Rda = $Rda_src",
698 0b1, VTI.Unsigned, VTI.Size>;
699 def no_acc : MVE_VADDV<"vaddv", VTI.Suffix,
701 0b0, VTI.Unsigned, VTI.Size>;
703 defvar InstA = !cast<Instruction>(NAME # "acc");
704 defvar InstN = !cast<Instruction>(NAME # "no_acc");
706 let Predicates = [HasMVEInt] in {
707 if VTI.Unsigned then {
708 def : Pat<(i32 (vecreduce_add (VTI.Vec MQPR:$vec))),
710 def : Pat<(i32 (vecreduce_add (VTI.Vec (vselect (VTI.Pred VCCR:$pred),
712 (VTI.Vec ARMimmAllZerosV))))),
713 (i32 (InstN $vec, ARMVCCThen, $pred))>;
714 def : Pat<(i32 (ARMVADDVu (VTI.Vec MQPR:$vec))),
716 def : Pat<(i32 (ARMVADDVpu (VTI.Vec MQPR:$vec), (VTI.Pred VCCR:$pred))),
717 (i32 (InstN $vec, ARMVCCThen, $pred))>;
718 def : Pat<(i32 (add (i32 (vecreduce_add (VTI.Vec MQPR:$vec))),
719 (i32 tGPREven:$acc))),
720 (i32 (InstA $acc, $vec))>;
721 def : Pat<(i32 (add (i32 (vecreduce_add (VTI.Vec (vselect (VTI.Pred VCCR:$pred),
723 (VTI.Vec ARMimmAllZerosV))))),
724 (i32 tGPREven:$acc))),
725 (i32 (InstA $acc, $vec, ARMVCCThen, $pred))>;
726 def : Pat<(i32 (add (i32 (ARMVADDVu (VTI.Vec MQPR:$vec))),
727 (i32 tGPREven:$acc))),
728 (i32 (InstA $acc, $vec))>;
729 def : Pat<(i32 (add (i32 (ARMVADDVpu (VTI.Vec MQPR:$vec), (VTI.Pred VCCR:$pred))),
730 (i32 tGPREven:$acc))),
731 (i32 (InstA $acc, $vec, ARMVCCThen, $pred))>;
733 def : Pat<(i32 (ARMVADDVs (VTI.Vec MQPR:$vec))),
735 def : Pat<(i32 (add (i32 (ARMVADDVs (VTI.Vec MQPR:$vec))),
736 (i32 tGPREven:$acc))),
737 (i32 (InstA $acc, $vec))>;
738 def : Pat<(i32 (ARMVADDVps (VTI.Vec MQPR:$vec), (VTI.Pred VCCR:$pred))),
739 (i32 (InstN $vec, ARMVCCThen, $pred))>;
740 def : Pat<(i32 (add (i32 (ARMVADDVps (VTI.Vec MQPR:$vec), (VTI.Pred VCCR:$pred))),
741 (i32 tGPREven:$acc))),
742 (i32 (InstA $acc, $vec, ARMVCCThen, $pred))>;
745 def : Pat<(i32 (int_arm_mve_addv_predicated (VTI.Vec MQPR:$vec),
747 (VTI.Pred VCCR:$pred))),
748 (i32 (InstN $vec, ARMVCCThen, $pred))>;
749 def : Pat<(i32 (add (int_arm_mve_addv_predicated (VTI.Vec MQPR:$vec),
751 (VTI.Pred VCCR:$pred)),
752 (i32 tGPREven:$acc))),
753 (i32 (InstA $acc, $vec, ARMVCCThen, $pred))>;
757 defm MVE_VADDVs8 : MVE_VADDV_A<MVE_v16s8>;
758 defm MVE_VADDVs16 : MVE_VADDV_A<MVE_v8s16>;
759 defm MVE_VADDVs32 : MVE_VADDV_A<MVE_v4s32>;
760 defm MVE_VADDVu8 : MVE_VADDV_A<MVE_v16u8>;
761 defm MVE_VADDVu16 : MVE_VADDV_A<MVE_v8u16>;
762 defm MVE_VADDVu32 : MVE_VADDV_A<MVE_v4u32>;
764 class MVE_VADDLV<string iname, string suffix, dag iops, string cstr,
765 bit A, bit U, list<dag> pattern=[]>
766 : MVE_rDest<(outs tGPREven:$RdaLo, tGPROdd:$RdaHi), iops, NoItinerary, iname,
767 suffix, "$RdaLo, $RdaHi, $Qm", cstr, pattern> {
773 let Inst{22-20} = RdaHi{3-1};
774 let Inst{19-18} = 0b10;
775 let Inst{17-16} = 0b01;
776 let Inst{15-13} = RdaLo{3-1};
778 let Inst{8-6} = 0b100;
780 let Inst{3-1} = Qm{2-0};
782 let horizontalReduction = 1;
785 def SDTVecReduceL : SDTypeProfile<2, 1, [ // VADDLV
786 SDTCisInt<0>, SDTCisInt<1>, SDTCisVec<2>
788 def SDTVecReduceLA : SDTypeProfile<2, 3, [ // VADDLVA
789 SDTCisInt<0>, SDTCisInt<1>, SDTCisInt<2>, SDTCisInt<3>,
792 def SDTVecReduceLP : SDTypeProfile<2, 2, [ // VADDLVp
793 SDTCisInt<0>, SDTCisInt<1>, SDTCisVec<2>, SDTCisVec<2>
795 def SDTVecReduceLPA : SDTypeProfile<2, 4, [ // VADDLVAp
796 SDTCisInt<0>, SDTCisInt<1>, SDTCisInt<2>, SDTCisInt<3>,
797 SDTCisVec<4>, SDTCisVec<5>
800 multiclass MVE_VADDLV_A<MVEVectorVTInfo VTI> {
801 def acc : MVE_VADDLV<"vaddlva", VTI.Suffix,
802 (ins tGPREven:$RdaLo_src, tGPROdd:$RdaHi_src, MQPR:$Qm),
803 "$RdaLo = $RdaLo_src,$RdaHi = $RdaHi_src",
805 def no_acc : MVE_VADDLV<"vaddlv", VTI.Suffix,
809 defvar InstA = !cast<Instruction>(NAME # "acc");
810 defvar InstN = !cast<Instruction>(NAME # "no_acc");
812 defvar letter = VTI.SuffixLetter;
813 defvar ARMVADDLV = SDNode<"ARMISD::VADDLV" # letter, SDTVecReduceL>;
814 defvar ARMVADDLVA = SDNode<"ARMISD::VADDLVA" # letter, SDTVecReduceLA>;
815 defvar ARMVADDLVp = SDNode<"ARMISD::VADDLVp" # letter, SDTVecReduceLP>;
816 defvar ARMVADDLVAp = SDNode<"ARMISD::VADDLVAp" # letter, SDTVecReduceLPA>;
818 let Predicates = [HasMVEInt] in {
819 def : Pat<(ARMVADDLV (v4i32 MQPR:$vec)),
820 (InstN (v4i32 MQPR:$vec))>;
821 def : Pat<(ARMVADDLVA tGPREven:$acclo, tGPROdd:$acchi, (v4i32 MQPR:$vec)),
822 (InstA tGPREven:$acclo, tGPROdd:$acchi, (v4i32 MQPR:$vec))>;
823 def : Pat<(ARMVADDLVp (v4i32 MQPR:$vec), (VTI.Pred VCCR:$pred)),
824 (InstN (v4i32 MQPR:$vec), ARMVCCThen, (VTI.Pred VCCR:$pred))>;
825 def : Pat<(ARMVADDLVAp tGPREven:$acclo, tGPROdd:$acchi, (v4i32 MQPR:$vec),
826 (VTI.Pred VCCR:$pred)),
827 (InstA tGPREven:$acclo, tGPROdd:$acchi, (v4i32 MQPR:$vec),
828 ARMVCCThen, (VTI.Pred VCCR:$pred))>;
832 defm MVE_VADDLVs32 : MVE_VADDLV_A<MVE_v4s32>;
833 defm MVE_VADDLVu32 : MVE_VADDLV_A<MVE_v4u32>;
835 class MVE_VMINMAXNMV<string iname, string suffix, bit sz,
836 bit bit_17, bit bit_7, list<dag> pattern=[]>
837 : MVE_rDest<(outs rGPR:$RdaDest), (ins rGPR:$RdaSrc, MQPR:$Qm),
838 NoItinerary, iname, suffix, "$RdaSrc, $Qm",
839 "$RdaDest = $RdaSrc", pattern> {
844 let Inst{22-20} = 0b110;
845 let Inst{19-18} = 0b11;
846 let Inst{17} = bit_17;
848 let Inst{15-12} = RdaDest{3-0};
851 let Inst{6-5} = 0b00;
852 let Inst{3-1} = Qm{2-0};
854 let horizontalReduction = 1;
856 let Predicates = [HasMVEFloat];
857 let hasSideEffects = 0;
860 multiclass MVE_VMINMAXNMV_p<string iname, bit notAbs, bit isMin,
861 MVEVectorVTInfo VTI, string intrBaseName,
862 ValueType Scalar, RegisterClass ScalarReg> {
863 def "": MVE_VMINMAXNMV<iname, VTI.Suffix, VTI.Size{0}, notAbs, isMin>;
864 defvar Inst = !cast<Instruction>(NAME);
865 defvar unpred_intr = !cast<Intrinsic>(intrBaseName);
866 defvar pred_intr = !cast<Intrinsic>(intrBaseName#"_predicated");
868 let Predicates = [HasMVEFloat] in {
869 def : Pat<(Scalar (unpred_intr (Scalar ScalarReg:$prev),
870 (VTI.Vec MQPR:$vec))),
871 (COPY_TO_REGCLASS (Inst (COPY_TO_REGCLASS ScalarReg:$prev, rGPR),
872 (VTI.Vec MQPR:$vec)),
874 def : Pat<(Scalar (pred_intr (Scalar ScalarReg:$prev),
876 (VTI.Pred VCCR:$pred))),
877 (COPY_TO_REGCLASS (Inst (COPY_TO_REGCLASS ScalarReg:$prev, rGPR),
879 ARMVCCThen, (VTI.Pred VCCR:$pred)),
884 multiclass MVE_VMINMAXNMV_fty<string iname, bit notAbs, bit isMin,
886 defm f32 : MVE_VMINMAXNMV_p<iname, notAbs, isMin, MVE_v4f32, intrBase,
888 defm f16 : MVE_VMINMAXNMV_p<iname, notAbs, isMin, MVE_v8f16, intrBase,
892 defm MVE_VMINNMV : MVE_VMINMAXNMV_fty<"vminnmv", 1, 1, "int_arm_mve_minnmv">;
893 defm MVE_VMAXNMV : MVE_VMINMAXNMV_fty<"vmaxnmv", 1, 0, "int_arm_mve_maxnmv">;
894 defm MVE_VMINNMAV: MVE_VMINMAXNMV_fty<"vminnmav", 0, 1, "int_arm_mve_minnmav">;
895 defm MVE_VMAXNMAV: MVE_VMINMAXNMV_fty<"vmaxnmav", 0, 0, "int_arm_mve_maxnmav">;
897 class MVE_VMINMAXV<string iname, string suffix, bit U, bits<2> size,
898 bit bit_17, bit bit_7, list<dag> pattern=[]>
899 : MVE_rDest<(outs rGPR:$RdaDest), (ins rGPR:$RdaSrc, MQPR:$Qm), NoItinerary,
900 iname, suffix, "$RdaSrc, $Qm", "$RdaDest = $RdaSrc", pattern> {
905 let Inst{22-20} = 0b110;
906 let Inst{19-18} = size{1-0};
907 let Inst{17} = bit_17;
909 let Inst{15-12} = RdaDest{3-0};
912 let Inst{6-5} = 0b00;
913 let Inst{3-1} = Qm{2-0};
915 let horizontalReduction = 1;
918 multiclass MVE_VMINMAXV_p<string iname, bit notAbs, bit isMin,
919 MVEVectorVTInfo VTI, string intrBaseName> {
920 def "": MVE_VMINMAXV<iname, VTI.Suffix, VTI.Unsigned, VTI.Size,
922 defvar Inst = !cast<Instruction>(NAME);
923 defvar unpred_intr = !cast<Intrinsic>(intrBaseName);
924 defvar pred_intr = !cast<Intrinsic>(intrBaseName#"_predicated");
925 defvar base_args = (? (i32 rGPR:$prev), (VTI.Vec MQPR:$vec));
926 defvar args = !if(notAbs, !con(base_args, (? (i32 VTI.Unsigned))),
929 let Predicates = [HasMVEInt] in {
930 def : Pat<(i32 !con(args, (unpred_intr))),
931 (i32 (Inst (i32 rGPR:$prev), (VTI.Vec MQPR:$vec)))>;
932 def : Pat<(i32 !con(args, (pred_intr (VTI.Pred VCCR:$pred)))),
933 (i32 (Inst (i32 rGPR:$prev), (VTI.Vec MQPR:$vec),
934 ARMVCCThen, (VTI.Pred VCCR:$pred)))>;
938 multiclass MVE_VMINMAXV_ty<string iname, bit isMin, string intrBaseName> {
939 defm s8 : MVE_VMINMAXV_p<iname, 1, isMin, MVE_v16s8, intrBaseName>;
940 defm s16: MVE_VMINMAXV_p<iname, 1, isMin, MVE_v8s16, intrBaseName>;
941 defm s32: MVE_VMINMAXV_p<iname, 1, isMin, MVE_v4s32, intrBaseName>;
942 defm u8 : MVE_VMINMAXV_p<iname, 1, isMin, MVE_v16u8, intrBaseName>;
943 defm u16: MVE_VMINMAXV_p<iname, 1, isMin, MVE_v8u16, intrBaseName>;
944 defm u32: MVE_VMINMAXV_p<iname, 1, isMin, MVE_v4u32, intrBaseName>;
947 def SDTVecReduceR : SDTypeProfile<1, 2, [ // Reduction of an integer and vector into an integer
948 SDTCisInt<0>, SDTCisInt<1>, SDTCisVec<2>
950 def ARMVMINVu : SDNode<"ARMISD::VMINVu", SDTVecReduceR>;
951 def ARMVMINVs : SDNode<"ARMISD::VMINVs", SDTVecReduceR>;
952 def ARMVMAXVu : SDNode<"ARMISD::VMAXVu", SDTVecReduceR>;
953 def ARMVMAXVs : SDNode<"ARMISD::VMAXVs", SDTVecReduceR>;
955 defm MVE_VMINV : MVE_VMINMAXV_ty<"vminv", 1, "int_arm_mve_minv">;
956 defm MVE_VMAXV : MVE_VMINMAXV_ty<"vmaxv", 0, "int_arm_mve_maxv">;
958 let Predicates = [HasMVEInt] in {
959 def : Pat<(i32 (vecreduce_smax (v16i8 MQPR:$src))),
960 (i32 (MVE_VMAXVs8 (t2MVNi (i32 127)), $src))>;
961 def : Pat<(i32 (vecreduce_smax (v8i16 MQPR:$src))),
962 (i32 (MVE_VMAXVs16 (t2MOVi32imm (i32 -32768)), $src))>;
963 def : Pat<(i32 (vecreduce_smax (v4i32 MQPR:$src))),
964 (i32 (MVE_VMAXVs32 (t2MOVi (i32 -2147483648)), $src))>;
965 def : Pat<(i32 (vecreduce_umax (v16i8 MQPR:$src))),
966 (i32 (MVE_VMAXVu8 (t2MOVi (i32 0)), $src))>;
967 def : Pat<(i32 (vecreduce_umax (v8i16 MQPR:$src))),
968 (i32 (MVE_VMAXVu16 (t2MOVi (i32 0)), $src))>;
969 def : Pat<(i32 (vecreduce_umax (v4i32 MQPR:$src))),
970 (i32 (MVE_VMAXVu32 (t2MOVi (i32 0)), $src))>;
972 def : Pat<(i32 (vecreduce_smin (v16i8 MQPR:$src))),
973 (i32 (MVE_VMINVs8 (t2MOVi (i32 127)), $src))>;
974 def : Pat<(i32 (vecreduce_smin (v8i16 MQPR:$src))),
975 (i32 (MVE_VMINVs16 (t2MOVi16 (i32 32767)), $src))>;
976 def : Pat<(i32 (vecreduce_smin (v4i32 MQPR:$src))),
977 (i32 (MVE_VMINVs32 (t2MVNi (i32 -2147483648)), $src))>;
978 def : Pat<(i32 (vecreduce_umin (v16i8 MQPR:$src))),
979 (i32 (MVE_VMINVu8 (t2MOVi (i32 255)), $src))>;
980 def : Pat<(i32 (vecreduce_umin (v8i16 MQPR:$src))),
981 (i32 (MVE_VMINVu16 (t2MOVi16 (i32 65535)), $src))>;
982 def : Pat<(i32 (vecreduce_umin (v4i32 MQPR:$src))),
983 (i32 (MVE_VMINVu32 (t2MOVi (i32 4294967295)), $src))>;
985 def : Pat<(i32 (ARMVMINVu (i32 rGPR:$x), (v16i8 MQPR:$src))),
986 (i32 (MVE_VMINVu8 $x, $src))>;
987 def : Pat<(i32 (ARMVMINVu (i32 rGPR:$x), (v8i16 MQPR:$src))),
988 (i32 (MVE_VMINVu16 $x, $src))>;
989 def : Pat<(i32 (ARMVMINVu (i32 rGPR:$x), (v4i32 MQPR:$src))),
990 (i32 (MVE_VMINVu32 $x, $src))>;
991 def : Pat<(i32 (ARMVMINVs (i32 rGPR:$x), (v16i8 MQPR:$src))),
992 (i32 (MVE_VMINVs8 $x, $src))>;
993 def : Pat<(i32 (ARMVMINVs (i32 rGPR:$x), (v8i16 MQPR:$src))),
994 (i32 (MVE_VMINVs16 $x, $src))>;
995 def : Pat<(i32 (ARMVMINVs (i32 rGPR:$x), (v4i32 MQPR:$src))),
996 (i32 (MVE_VMINVs32 $x, $src))>;
998 def : Pat<(i32 (ARMVMAXVu (i32 rGPR:$x), (v16i8 MQPR:$src))),
999 (i32 (MVE_VMAXVu8 $x, $src))>;
1000 def : Pat<(i32 (ARMVMAXVu (i32 rGPR:$x), (v8i16 MQPR:$src))),
1001 (i32 (MVE_VMAXVu16 $x, $src))>;
1002 def : Pat<(i32 (ARMVMAXVu (i32 rGPR:$x), (v4i32 MQPR:$src))),
1003 (i32 (MVE_VMAXVu32 $x, $src))>;
1004 def : Pat<(i32 (ARMVMAXVs (i32 rGPR:$x), (v16i8 MQPR:$src))),
1005 (i32 (MVE_VMAXVs8 $x, $src))>;
1006 def : Pat<(i32 (ARMVMAXVs (i32 rGPR:$x), (v8i16 MQPR:$src))),
1007 (i32 (MVE_VMAXVs16 $x, $src))>;
1008 def : Pat<(i32 (ARMVMAXVs (i32 rGPR:$x), (v4i32 MQPR:$src))),
1009 (i32 (MVE_VMAXVs32 $x, $src))>;
1013 multiclass MVE_VMINMAXAV_ty<string iname, bit isMin, string intrBaseName> {
1014 defm s8 : MVE_VMINMAXV_p<iname, 0, isMin, MVE_v16s8, intrBaseName>;
1015 defm s16: MVE_VMINMAXV_p<iname, 0, isMin, MVE_v8s16, intrBaseName>;
1016 defm s32: MVE_VMINMAXV_p<iname, 0, isMin, MVE_v4s32, intrBaseName>;
1019 defm MVE_VMINAV : MVE_VMINMAXAV_ty<"vminav", 1, "int_arm_mve_minav">;
1020 defm MVE_VMAXAV : MVE_VMINMAXAV_ty<"vmaxav", 0, "int_arm_mve_maxav">;
1022 class MVE_VMLAMLSDAV<string iname, string suffix, dag iops, string cstr,
1023 bit sz, bit bit_28, bit A, bit X, bit bit_8, bit bit_0>
1024 : MVE_rDest<(outs tGPREven:$RdaDest), iops, NoItinerary, iname, suffix,
1025 "$RdaDest, $Qn, $Qm", cstr, []> {
1030 let Inst{28} = bit_28;
1031 let Inst{22-20} = 0b111;
1032 let Inst{19-17} = Qn{2-0};
1034 let Inst{15-13} = RdaDest{3-1};
1036 let Inst{8} = bit_8;
1037 let Inst{7-6} = 0b00;
1039 let Inst{3-1} = Qm{2-0};
1040 let Inst{0} = bit_0;
1041 let horizontalReduction = 1;
1042 // Allow tail predication for non-exchanging versions. As this is also a
1043 // horizontalReduction, ARMLowOverheadLoops will also have to check that
1044 // the vector operands contain zeros in their false lanes for the instruction
1045 // to be properly valid.
1046 let validForTailPredication = !eq(X, 0);
1049 multiclass MVE_VMLAMLSDAV_A<string iname, string x, MVEVectorVTInfo VTI,
1050 bit sz, bit bit_28, bit X, bit bit_8, bit bit_0> {
1051 def ""#x#VTI.Suffix : MVE_VMLAMLSDAV<iname # x, VTI.Suffix,
1052 (ins MQPR:$Qn, MQPR:$Qm), "",
1053 sz, bit_28, 0b0, X, bit_8, bit_0>;
1054 def "a"#x#VTI.Suffix : MVE_VMLAMLSDAV<iname # "a" # x, VTI.Suffix,
1055 (ins tGPREven:$RdaSrc, MQPR:$Qn, MQPR:$Qm),
1056 "$RdaDest = $RdaSrc",
1057 sz, bit_28, 0b1, X, bit_8, bit_0>;
1058 let Predicates = [HasMVEInt] in {
1059 def : Pat<(i32 (int_arm_mve_vmldava
1061 (i32 bit_0) /* subtract */,
1062 (i32 X) /* exchange */,
1063 (i32 0) /* accumulator */,
1064 (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm))),
1065 (i32 (!cast<Instruction>(NAME # x # VTI.Suffix)
1066 (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm)))>;
1068 def : Pat<(i32 (int_arm_mve_vmldava_predicated
1070 (i32 bit_0) /* subtract */,
1071 (i32 X) /* exchange */,
1072 (i32 0) /* accumulator */,
1073 (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm),
1074 (VTI.Pred VCCR:$mask))),
1075 (i32 (!cast<Instruction>(NAME # x # VTI.Suffix)
1076 (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm),
1077 ARMVCCThen, (VTI.Pred VCCR:$mask)))>;
1079 def : Pat<(i32 (int_arm_mve_vmldava
1081 (i32 bit_0) /* subtract */,
1082 (i32 X) /* exchange */,
1083 (i32 tGPREven:$RdaSrc),
1084 (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm))),
1085 (i32 (!cast<Instruction>(NAME # "a" # x # VTI.Suffix)
1086 (i32 tGPREven:$RdaSrc),
1087 (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm)))>;
1089 def : Pat<(i32 (int_arm_mve_vmldava_predicated
1091 (i32 bit_0) /* subtract */,
1092 (i32 X) /* exchange */,
1093 (i32 tGPREven:$RdaSrc),
1094 (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm),
1095 (VTI.Pred VCCR:$mask))),
1096 (i32 (!cast<Instruction>(NAME # "a" # x # VTI.Suffix)
1097 (i32 tGPREven:$RdaSrc),
1098 (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm),
1099 ARMVCCThen, (VTI.Pred VCCR:$mask)))>;
1103 multiclass MVE_VMLAMLSDAV_AX<string iname, MVEVectorVTInfo VTI, bit sz,
1104 bit bit_28, bit bit_8, bit bit_0> {
1105 defm "" : MVE_VMLAMLSDAV_A<iname, "", VTI, sz, bit_28,
1107 defm "" : MVE_VMLAMLSDAV_A<iname, "x", VTI, sz, bit_28,
1111 multiclass MVE_VMLADAV_multi<MVEVectorVTInfo SVTI, MVEVectorVTInfo UVTI,
1112 bit sz, bit bit_8> {
1113 defm "" : MVE_VMLAMLSDAV_AX<"vmladav", SVTI,
1114 sz, 0b0, bit_8, 0b0>;
1115 defm "" : MVE_VMLAMLSDAV_A<"vmladav", "", UVTI,
1116 sz, 0b1, 0b0, bit_8, 0b0>;
1119 multiclass MVE_VMLSDAV_multi<MVEVectorVTInfo VTI, bit sz, bit bit_28> {
1120 defm "" : MVE_VMLAMLSDAV_AX<"vmlsdav", VTI,
1121 sz, bit_28, 0b0, 0b1>;
1124 defm MVE_VMLADAV : MVE_VMLADAV_multi<MVE_v16s8, MVE_v16u8, 0b0, 0b1>;
1125 defm MVE_VMLADAV : MVE_VMLADAV_multi<MVE_v8s16, MVE_v8u16, 0b0, 0b0>;
1126 defm MVE_VMLADAV : MVE_VMLADAV_multi<MVE_v4s32, MVE_v4u32, 0b1, 0b0>;
1128 defm MVE_VMLSDAV : MVE_VMLSDAV_multi<MVE_v16s8, 0b0, 0b1>;
1129 defm MVE_VMLSDAV : MVE_VMLSDAV_multi<MVE_v8s16, 0b0, 0b0>;
1130 defm MVE_VMLSDAV : MVE_VMLSDAV_multi<MVE_v4s32, 0b1, 0b0>;
1132 def SDTVecReduce2 : SDTypeProfile<1, 2, [ // VMLAV
1133 SDTCisInt<0>, SDTCisVec<1>, SDTCisVec<2>
1135 def SDTVecReduce2L : SDTypeProfile<2, 2, [ // VMLALV
1136 SDTCisInt<0>, SDTCisInt<1>, SDTCisVec<2>, SDTCisVec<3>
1138 def SDTVecReduce2LA : SDTypeProfile<2, 4, [ // VMLALVA
1139 SDTCisInt<0>, SDTCisInt<1>, SDTCisInt<2>, SDTCisInt<3>,
1140 SDTCisVec<4>, SDTCisVec<5>
1142 def SDTVecReduce2P : SDTypeProfile<1, 3, [ // VMLAV
1143 SDTCisInt<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisVec<3>
1145 def SDTVecReduce2LP : SDTypeProfile<2, 3, [ // VMLALV
1146 SDTCisInt<0>, SDTCisInt<1>, SDTCisVec<2>, SDTCisVec<3>, SDTCisVec<4>
1148 def SDTVecReduce2LAP : SDTypeProfile<2, 5, [ // VMLALVA
1149 SDTCisInt<0>, SDTCisInt<1>, SDTCisInt<2>, SDTCisInt<3>,
1150 SDTCisVec<4>, SDTCisVec<5>, SDTCisVec<6>
1152 def ARMVMLAVs : SDNode<"ARMISD::VMLAVs", SDTVecReduce2>;
1153 def ARMVMLAVu : SDNode<"ARMISD::VMLAVu", SDTVecReduce2>;
1154 def ARMVMLALVs : SDNode<"ARMISD::VMLALVs", SDTVecReduce2L>;
1155 def ARMVMLALVu : SDNode<"ARMISD::VMLALVu", SDTVecReduce2L>;
1156 def ARMVMLALVAs : SDNode<"ARMISD::VMLALVAs", SDTVecReduce2LA>;
1157 def ARMVMLALVAu : SDNode<"ARMISD::VMLALVAu", SDTVecReduce2LA>;
1158 def ARMVMLAVps : SDNode<"ARMISD::VMLAVps", SDTVecReduce2P>;
1159 def ARMVMLAVpu : SDNode<"ARMISD::VMLAVpu", SDTVecReduce2P>;
1160 def ARMVMLALVps : SDNode<"ARMISD::VMLALVps", SDTVecReduce2LP>;
1161 def ARMVMLALVpu : SDNode<"ARMISD::VMLALVpu", SDTVecReduce2LP>;
1162 def ARMVMLALVAps : SDNode<"ARMISD::VMLALVAps", SDTVecReduce2LAP>;
1163 def ARMVMLALVApu : SDNode<"ARMISD::VMLALVApu", SDTVecReduce2LAP>;
1165 let Predicates = [HasMVEInt] in {
1166 def : Pat<(i32 (vecreduce_add (mul (v4i32 MQPR:$src1), (v4i32 MQPR:$src2)))),
1167 (i32 (MVE_VMLADAVu32 $src1, $src2))>;
1168 def : Pat<(i32 (vecreduce_add (mul (v8i16 MQPR:$src1), (v8i16 MQPR:$src2)))),
1169 (i32 (MVE_VMLADAVu16 $src1, $src2))>;
1170 def : Pat<(i32 (ARMVMLAVs (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))),
1171 (i32 (MVE_VMLADAVs16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
1172 def : Pat<(i32 (ARMVMLAVu (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))),
1173 (i32 (MVE_VMLADAVu16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
1174 def : Pat<(i32 (vecreduce_add (mul (v16i8 MQPR:$src1), (v16i8 MQPR:$src2)))),
1175 (i32 (MVE_VMLADAVu8 $src1, $src2))>;
1176 def : Pat<(i32 (ARMVMLAVs (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))),
1177 (i32 (MVE_VMLADAVs8 (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
1178 def : Pat<(i32 (ARMVMLAVu (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))),
1179 (i32 (MVE_VMLADAVu8 (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
1181 def : Pat<(i32 (add (i32 (vecreduce_add (mul (v4i32 MQPR:$src1), (v4i32 MQPR:$src2)))),
1182 (i32 tGPREven:$src3))),
1183 (i32 (MVE_VMLADAVau32 $src3, $src1, $src2))>;
1184 def : Pat<(i32 (add (i32 (vecreduce_add (mul (v8i16 MQPR:$src1), (v8i16 MQPR:$src2)))),
1185 (i32 tGPREven:$src3))),
1186 (i32 (MVE_VMLADAVau16 $src3, $src1, $src2))>;
1187 def : Pat<(i32 (add (ARMVMLAVs (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)), tGPREven:$Rd)),
1188 (i32 (MVE_VMLADAVas16 tGPREven:$Rd, (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
1189 def : Pat<(i32 (add (ARMVMLAVu (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)), tGPREven:$Rd)),
1190 (i32 (MVE_VMLADAVau16 tGPREven:$Rd, (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
1191 def : Pat<(i32 (add (i32 (vecreduce_add (mul (v16i8 MQPR:$src1), (v16i8 MQPR:$src2)))),
1192 (i32 tGPREven:$src3))),
1193 (i32 (MVE_VMLADAVau8 $src3, $src1, $src2))>;
1194 def : Pat<(i32 (add (ARMVMLAVs (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)), tGPREven:$Rd)),
1195 (i32 (MVE_VMLADAVas8 tGPREven:$Rd, (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
1196 def : Pat<(i32 (add (ARMVMLAVu (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)), tGPREven:$Rd)),
1197 (i32 (MVE_VMLADAVau8 tGPREven:$Rd, (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
1200 def : Pat<(i32 (vecreduce_add (vselect (v4i1 VCCR:$pred),
1201 (mul (v4i32 MQPR:$src1), (v4i32 MQPR:$src2)),
1202 (v4i32 ARMimmAllZerosV)))),
1203 (i32 (MVE_VMLADAVu32 $src1, $src2, ARMVCCThen, $pred))>;
1204 def : Pat<(i32 (vecreduce_add (vselect (v8i1 VCCR:$pred),
1205 (mul (v8i16 MQPR:$src1), (v8i16 MQPR:$src2)),
1206 (v8i16 ARMimmAllZerosV)))),
1207 (i32 (MVE_VMLADAVu16 $src1, $src2, ARMVCCThen, $pred))>;
1208 def : Pat<(i32 (ARMVMLAVps (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), (v8i1 VCCR:$pred))),
1209 (i32 (MVE_VMLADAVs16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), ARMVCCThen, $pred))>;
1210 def : Pat<(i32 (ARMVMLAVpu (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), (v8i1 VCCR:$pred))),
1211 (i32 (MVE_VMLADAVu16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), ARMVCCThen, $pred))>;
1212 def : Pat<(i32 (vecreduce_add (vselect (v16i1 VCCR:$pred),
1213 (mul (v16i8 MQPR:$src1), (v16i8 MQPR:$src2)),
1214 (v16i8 ARMimmAllZerosV)))),
1215 (i32 (MVE_VMLADAVu8 $src1, $src2, ARMVCCThen, $pred))>;
1216 def : Pat<(i32 (ARMVMLAVps (v16i8 MQPR:$val1), (v16i8 MQPR:$val2), (v16i1 VCCR:$pred))),
1217 (i32 (MVE_VMLADAVs8 (v16i8 MQPR:$val1), (v16i8 MQPR:$val2), ARMVCCThen, $pred))>;
1218 def : Pat<(i32 (ARMVMLAVpu (v16i8 MQPR:$val1), (v16i8 MQPR:$val2), (v16i1 VCCR:$pred))),
1219 (i32 (MVE_VMLADAVu8 (v16i8 MQPR:$val1), (v16i8 MQPR:$val2), ARMVCCThen, $pred))>;
1221 def : Pat<(i32 (add (i32 (vecreduce_add (vselect (v4i1 VCCR:$pred),
1222 (mul (v4i32 MQPR:$src1), (v4i32 MQPR:$src2)),
1223 (v4i32 ARMimmAllZerosV)))),
1224 (i32 tGPREven:$src3))),
1225 (i32 (MVE_VMLADAVau32 $src3, $src1, $src2, ARMVCCThen, $pred))>;
1226 def : Pat<(i32 (add (i32 (vecreduce_add (vselect (v8i1 VCCR:$pred),
1227 (mul (v8i16 MQPR:$src1), (v8i16 MQPR:$src2)),
1228 (v8i16 ARMimmAllZerosV)))),
1229 (i32 tGPREven:$src3))),
1230 (i32 (MVE_VMLADAVau16 $src3, $src1, $src2, ARMVCCThen, $pred))>;
1231 def : Pat<(i32 (add (ARMVMLAVps (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), (v8i1 VCCR:$pred)), tGPREven:$Rd)),
1232 (i32 (MVE_VMLADAVas16 tGPREven:$Rd, (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), ARMVCCThen, $pred))>;
1233 def : Pat<(i32 (add (ARMVMLAVpu (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), (v8i1 VCCR:$pred)), tGPREven:$Rd)),
1234 (i32 (MVE_VMLADAVau16 tGPREven:$Rd, (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), ARMVCCThen, $pred))>;
1235 def : Pat<(i32 (add (i32 (vecreduce_add (vselect (v16i1 VCCR:$pred),
1236 (mul (v16i8 MQPR:$src1), (v16i8 MQPR:$src2)),
1237 (v16i8 ARMimmAllZerosV)))),
1238 (i32 tGPREven:$src3))),
1239 (i32 (MVE_VMLADAVau8 $src3, $src1, $src2, ARMVCCThen, $pred))>;
1240 def : Pat<(i32 (add (ARMVMLAVps (v16i8 MQPR:$val1), (v16i8 MQPR:$val2), (v16i1 VCCR:$pred)), tGPREven:$Rd)),
1241 (i32 (MVE_VMLADAVas8 tGPREven:$Rd, (v16i8 MQPR:$val1), (v16i8 MQPR:$val2), ARMVCCThen, $pred))>;
1242 def : Pat<(i32 (add (ARMVMLAVpu (v16i8 MQPR:$val1), (v16i8 MQPR:$val2), (v16i1 VCCR:$pred)), tGPREven:$Rd)),
1243 (i32 (MVE_VMLADAVau8 tGPREven:$Rd, (v16i8 MQPR:$val1), (v16i8 MQPR:$val2), ARMVCCThen, $pred))>;
1246 // vmlav aliases vmladav
1247 foreach acc = ["", "a"] in {
1248 foreach suffix = ["s8", "s16", "s32", "u8", "u16", "u32"] in {
1249 def : MVEInstAlias<"vmlav"#acc#"${vp}."#suffix#"\t$RdaDest, $Qn, $Qm",
1250 (!cast<Instruction>("MVE_VMLADAV"#acc#suffix)
1251 tGPREven:$RdaDest, MQPR:$Qn, MQPR:$Qm, vpred_n:$vp)>;
1255 // Base class for VMLALDAV and VMLSLDAV, VRMLALDAVH, VRMLSLDAVH
1256 class MVE_VMLALDAVBase<string iname, string suffix, dag iops, string cstr,
1257 bit sz, bit bit_28, bit A, bit X, bit bit_8, bit bit_0,
1258 list<dag> pattern=[]>
1259 : MVE_rDest<(outs tGPREven:$RdaLoDest, tGPROdd:$RdaHiDest), iops, NoItinerary,
1260 iname, suffix, "$RdaLoDest, $RdaHiDest, $Qn, $Qm", cstr, pattern> {
1266 let Inst{28} = bit_28;
1267 let Inst{22-20} = RdaHiDest{3-1};
1268 let Inst{19-17} = Qn{2-0};
1270 let Inst{15-13} = RdaLoDest{3-1};
1272 let Inst{8} = bit_8;
1273 let Inst{7-6} = 0b00;
1275 let Inst{3-1} = Qm{2-0};
1276 let Inst{0} = bit_0;
1277 let horizontalReduction = 1;
1278 // Allow tail predication for non-exchanging versions. As this is also a
1279 // horizontalReduction, ARMLowOverheadLoops will also have to check that
1280 // the vector operands contain zeros in their false lanes for the instruction
1281 // to be properly valid.
1282 let validForTailPredication = !eq(X, 0);
1284 let hasSideEffects = 0;
1287 multiclass MVE_VMLALDAVBase_A<string iname, string x, string suffix,
1288 bit sz, bit bit_28, bit X, bit bit_8, bit bit_0,
1289 list<dag> pattern=[]> {
1290 def ""#x#suffix : MVE_VMLALDAVBase<
1291 iname # x, suffix, (ins MQPR:$Qn, MQPR:$Qm), "",
1292 sz, bit_28, 0b0, X, bit_8, bit_0, pattern>;
1293 def "a"#x#suffix : MVE_VMLALDAVBase<
1294 iname # "a" # x, suffix,
1295 (ins tGPREven:$RdaLoSrc, tGPROdd:$RdaHiSrc, MQPR:$Qn, MQPR:$Qm),
1296 "$RdaLoDest = $RdaLoSrc,$RdaHiDest = $RdaHiSrc",
1297 sz, bit_28, 0b1, X, bit_8, bit_0, pattern>;
1301 multiclass MVE_VMLALDAVBase_AX<string iname, string suffix, bit sz, bit bit_28,
1302 bit bit_8, bit bit_0, list<dag> pattern=[]> {
1303 defm "" : MVE_VMLALDAVBase_A<iname, "", suffix, sz,
1304 bit_28, 0b0, bit_8, bit_0, pattern>;
1305 defm "" : MVE_VMLALDAVBase_A<iname, "x", suffix, sz,
1306 bit_28, 0b1, bit_8, bit_0, pattern>;
1309 multiclass MVE_VRMLALDAVH_multi<string suffix, list<dag> pattern=[]> {
1310 defm "" : MVE_VMLALDAVBase_AX<"vrmlaldavh", "s"#suffix,
1311 0b0, 0b0, 0b1, 0b0, pattern>;
1312 defm "" : MVE_VMLALDAVBase_A<"vrmlaldavh", "", "u"#suffix,
1313 0b0, 0b1, 0b0, 0b1, 0b0, pattern>;
1316 defm MVE_VRMLALDAVH : MVE_VRMLALDAVH_multi<"32">;
1318 // vrmlalvh aliases for vrmlaldavh
1319 def : MVEInstAlias<"vrmlalvh${vp}.s32\t$RdaLo, $RdaHi, $Qn, $Qm",
1321 tGPREven:$RdaLo, tGPROdd:$RdaHi,
1322 MQPR:$Qn, MQPR:$Qm, vpred_n:$vp)>;
1323 def : MVEInstAlias<"vrmlalvha${vp}.s32\t$RdaLo, $RdaHi, $Qn, $Qm",
1325 tGPREven:$RdaLo, tGPROdd:$RdaHi,
1326 MQPR:$Qn, MQPR:$Qm, vpred_n:$vp)>;
1327 def : MVEInstAlias<"vrmlalvh${vp}.u32\t$RdaLo, $RdaHi, $Qn, $Qm",
1329 tGPREven:$RdaLo, tGPROdd:$RdaHi,
1330 MQPR:$Qn, MQPR:$Qm, vpred_n:$vp)>;
1331 def : MVEInstAlias<"vrmlalvha${vp}.u32\t$RdaLo, $RdaHi, $Qn, $Qm",
1333 tGPREven:$RdaLo, tGPROdd:$RdaHi,
1334 MQPR:$Qn, MQPR:$Qm, vpred_n:$vp)>;
1336 multiclass MVE_VMLALDAV_multi<string suffix, bit sz, list<dag> pattern=[]> {
1337 defm "" : MVE_VMLALDAVBase_AX<"vmlaldav", "s"#suffix, sz, 0b0, 0b0, 0b0, pattern>;
1338 defm "" : MVE_VMLALDAVBase_A<"vmlaldav", "", "u"#suffix,
1339 sz, 0b1, 0b0, 0b0, 0b0, pattern>;
1342 defm MVE_VMLALDAV : MVE_VMLALDAV_multi<"16", 0b0>;
1343 defm MVE_VMLALDAV : MVE_VMLALDAV_multi<"32", 0b1>;
1345 let Predicates = [HasMVEInt] in {
1346 def : Pat<(ARMVMLALVs (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)),
1347 (MVE_VMLALDAVs32 (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))>;
1348 def : Pat<(ARMVMLALVu (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)),
1349 (MVE_VMLALDAVu32 (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))>;
1350 def : Pat<(ARMVMLALVs (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)),
1351 (MVE_VMLALDAVs16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))>;
1352 def : Pat<(ARMVMLALVu (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)),
1353 (MVE_VMLALDAVu16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))>;
1355 def : Pat<(ARMVMLALVAs tGPREven:$Rda, tGPROdd:$Rdb, (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)),
1356 (MVE_VMLALDAVas32 tGPREven:$Rda, tGPROdd:$Rdb, (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))>;
1357 def : Pat<(ARMVMLALVAu tGPREven:$Rda, tGPROdd:$Rdb, (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)),
1358 (MVE_VMLALDAVau32 tGPREven:$Rda, tGPROdd:$Rdb, (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))>;
1359 def : Pat<(ARMVMLALVAs tGPREven:$Rda, tGPROdd:$Rdb, (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)),
1360 (MVE_VMLALDAVas16 tGPREven:$Rda, tGPROdd:$Rdb, (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))>;
1361 def : Pat<(ARMVMLALVAu tGPREven:$Rda, tGPROdd:$Rdb, (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)),
1362 (MVE_VMLALDAVau16 tGPREven:$Rda, tGPROdd:$Rdb, (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))>;
1365 def : Pat<(ARMVMLALVps (v4i32 MQPR:$val1), (v4i32 MQPR:$val2), (v4i1 VCCR:$pred)),
1366 (MVE_VMLALDAVs32 (v4i32 MQPR:$val1), (v4i32 MQPR:$val2), ARMVCCThen, $pred)>;
1367 def : Pat<(ARMVMLALVpu (v4i32 MQPR:$val1), (v4i32 MQPR:$val2), (v4i1 VCCR:$pred)),
1368 (MVE_VMLALDAVu32 (v4i32 MQPR:$val1), (v4i32 MQPR:$val2), ARMVCCThen, $pred)>;
1369 def : Pat<(ARMVMLALVps (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), (v8i1 VCCR:$pred)),
1370 (MVE_VMLALDAVs16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), ARMVCCThen, $pred)>;
1371 def : Pat<(ARMVMLALVpu (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), (v8i1 VCCR:$pred)),
1372 (MVE_VMLALDAVu16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), ARMVCCThen, $pred)>;
1374 def : Pat<(ARMVMLALVAps tGPREven:$Rda, tGPROdd:$Rdb, (v4i32 MQPR:$val1), (v4i32 MQPR:$val2), (v4i1 VCCR:$pred)),
1375 (MVE_VMLALDAVas32 tGPREven:$Rda, tGPROdd:$Rdb, (v4i32 MQPR:$val1), (v4i32 MQPR:$val2), ARMVCCThen, $pred)>;
1376 def : Pat<(ARMVMLALVApu tGPREven:$Rda, tGPROdd:$Rdb, (v4i32 MQPR:$val1), (v4i32 MQPR:$val2), (v4i1 VCCR:$pred)),
1377 (MVE_VMLALDAVau32 tGPREven:$Rda, tGPROdd:$Rdb, (v4i32 MQPR:$val1), (v4i32 MQPR:$val2), ARMVCCThen, $pred)>;
1378 def : Pat<(ARMVMLALVAps tGPREven:$Rda, tGPROdd:$Rdb, (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), (v8i1 VCCR:$pred)),
1379 (MVE_VMLALDAVas16 tGPREven:$Rda, tGPROdd:$Rdb, (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), ARMVCCThen, $pred)>;
1380 def : Pat<(ARMVMLALVApu tGPREven:$Rda, tGPROdd:$Rdb, (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), (v8i1 VCCR:$pred)),
1381 (MVE_VMLALDAVau16 tGPREven:$Rda, tGPROdd:$Rdb, (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), ARMVCCThen, $pred)>;
1384 // vmlalv aliases vmlaldav
1385 foreach acc = ["", "a"] in {
1386 foreach suffix = ["s16", "s32", "u16", "u32"] in {
1387 def : MVEInstAlias<"vmlalv" # acc # "${vp}." # suffix #
1388 "\t$RdaLoDest, $RdaHiDest, $Qn, $Qm",
1389 (!cast<Instruction>("MVE_VMLALDAV"#acc#suffix)
1390 tGPREven:$RdaLoDest, tGPROdd:$RdaHiDest,
1391 MQPR:$Qn, MQPR:$Qm, vpred_n:$vp)>;
1395 multiclass MVE_VMLSLDAV_multi<string iname, string suffix, bit sz,
1396 bit bit_28, list<dag> pattern=[]> {
1397 defm "" : MVE_VMLALDAVBase_AX<iname, suffix, sz, bit_28, 0b0, 0b1, pattern>;
1400 defm MVE_VMLSLDAV : MVE_VMLSLDAV_multi<"vmlsldav", "s16", 0b0, 0b0>;
1401 defm MVE_VMLSLDAV : MVE_VMLSLDAV_multi<"vmlsldav", "s32", 0b1, 0b0>;
1402 defm MVE_VRMLSLDAVH : MVE_VMLSLDAV_multi<"vrmlsldavh", "s32", 0b0, 0b1>;
1404 // end of mve_rDest instructions
1406 // start of mve_comp instructions
1408 class MVE_comp<InstrItinClass itin, string iname, string suffix,
1409 string cstr, list<dag> pattern=[]>
1410 : MVE_p<(outs MQPR:$Qd), (ins MQPR:$Qn, MQPR:$Qm), itin, iname, suffix,
1411 "$Qd, $Qn, $Qm", vpred_r, cstr, pattern> {
1416 let Inst{22} = Qd{3};
1417 let Inst{19-17} = Qn{2-0};
1419 let Inst{15-13} = Qd{2-0};
1421 let Inst{10-9} = 0b11;
1422 let Inst{7} = Qn{3};
1423 let Inst{5} = Qm{3};
1424 let Inst{3-1} = Qm{2-0};
1428 class MVE_VMINMAXNM<string iname, string suffix, bit sz, bit bit_21,
1429 list<dag> pattern=[]>
1430 : MVE_comp<NoItinerary, iname, suffix, "", pattern> {
1433 let Inst{25-24} = 0b11;
1435 let Inst{21} = bit_21;
1442 let Predicates = [HasMVEFloat];
1445 multiclass MVE_VMINMAXNM_m<string iname, bit bit_4, MVEVectorVTInfo VTI, SDNode Op, Intrinsic PredInt> {
1446 def "" : MVE_VMINMAXNM<iname, VTI.Suffix, VTI.Size{0}, bit_4>;
1448 let Predicates = [HasMVEFloat] in {
1449 defm : MVE_TwoOpPattern<VTI, Op, PredInt, (? (i32 0)), !cast<Instruction>(NAME)>;
1453 defm MVE_VMAXNMf32 : MVE_VMINMAXNM_m<"vmaxnm", 0b0, MVE_v4f32, fmaxnum, int_arm_mve_max_predicated>;
1454 defm MVE_VMAXNMf16 : MVE_VMINMAXNM_m<"vmaxnm", 0b0, MVE_v8f16, fmaxnum, int_arm_mve_max_predicated>;
1455 defm MVE_VMINNMf32 : MVE_VMINMAXNM_m<"vminnm", 0b1, MVE_v4f32, fminnum, int_arm_mve_min_predicated>;
1456 defm MVE_VMINNMf16 : MVE_VMINMAXNM_m<"vminnm", 0b1, MVE_v8f16, fminnum, int_arm_mve_min_predicated>;
1459 class MVE_VMINMAX<string iname, string suffix, bit U, bits<2> size,
1460 bit bit_4, list<dag> pattern=[]>
1461 : MVE_comp<NoItinerary, iname, suffix, "", pattern> {
1464 let Inst{25-24} = 0b11;
1466 let Inst{21-20} = size{1-0};
1470 let Inst{4} = bit_4;
1471 let validForTailPredication = 1;
1474 multiclass MVE_VMINMAX_m<string iname, bit bit_4, MVEVectorVTInfo VTI,
1475 SDNode Op, Intrinsic PredInt> {
1476 def "" : MVE_VMINMAX<iname, VTI.Suffix, VTI.Unsigned, VTI.Size, bit_4>;
1478 let Predicates = [HasMVEInt] in {
1479 defm : MVE_TwoOpPattern<VTI, Op, PredInt, (? (i32 VTI.Unsigned)), !cast<Instruction>(NAME)>;
1483 multiclass MVE_VMAX<MVEVectorVTInfo VTI>
1484 : MVE_VMINMAX_m<"vmax", 0b0, VTI, !if(VTI.Unsigned, umax, smax), int_arm_mve_max_predicated>;
1485 multiclass MVE_VMIN<MVEVectorVTInfo VTI>
1486 : MVE_VMINMAX_m<"vmin", 0b1, VTI, !if(VTI.Unsigned, umin, smin), int_arm_mve_min_predicated>;
1488 defm MVE_VMINs8 : MVE_VMIN<MVE_v16s8>;
1489 defm MVE_VMINs16 : MVE_VMIN<MVE_v8s16>;
1490 defm MVE_VMINs32 : MVE_VMIN<MVE_v4s32>;
1491 defm MVE_VMINu8 : MVE_VMIN<MVE_v16u8>;
1492 defm MVE_VMINu16 : MVE_VMIN<MVE_v8u16>;
1493 defm MVE_VMINu32 : MVE_VMIN<MVE_v4u32>;
1495 defm MVE_VMAXs8 : MVE_VMAX<MVE_v16s8>;
1496 defm MVE_VMAXs16 : MVE_VMAX<MVE_v8s16>;
1497 defm MVE_VMAXs32 : MVE_VMAX<MVE_v4s32>;
1498 defm MVE_VMAXu8 : MVE_VMAX<MVE_v16u8>;
1499 defm MVE_VMAXu16 : MVE_VMAX<MVE_v8u16>;
1500 defm MVE_VMAXu32 : MVE_VMAX<MVE_v4u32>;
1502 // end of mve_comp instructions
1504 // start of mve_bit instructions
1506 class MVE_bit_arith<dag oops, dag iops, string iname, string suffix,
1507 string ops, string cstr, list<dag> pattern=[]>
1508 : MVE_p<oops, iops, NoItinerary, iname, suffix, ops, vpred_r, cstr, pattern> {
1512 let Inst{22} = Qd{3};
1513 let Inst{15-13} = Qd{2-0};
1514 let Inst{5} = Qm{3};
1515 let Inst{3-1} = Qm{2-0};
1518 def MVE_VBIC : MVE_bit_arith<(outs MQPR:$Qd), (ins MQPR:$Qn, MQPR:$Qm),
1519 "vbic", "", "$Qd, $Qn, $Qm", ""> {
1523 let Inst{25-23} = 0b110;
1524 let Inst{21-20} = 0b01;
1525 let Inst{19-17} = Qn{2-0};
1527 let Inst{12-8} = 0b00001;
1528 let Inst{7} = Qn{3};
1532 let validForTailPredication = 1;
1535 class MVE_VREV<string iname, string suffix, bits<2> size, bits<2> bit_8_7, string cstr="">
1536 : MVE_bit_arith<(outs MQPR:$Qd), (ins MQPR:$Qm), iname,
1537 suffix, "$Qd, $Qm", cstr> {
1540 let Inst{25-23} = 0b111;
1541 let Inst{21-20} = 0b11;
1542 let Inst{19-18} = size;
1543 let Inst{17-16} = 0b00;
1544 let Inst{12-9} = 0b0000;
1545 let Inst{8-7} = bit_8_7;
1551 def MVE_VREV64_8 : MVE_VREV<"vrev64", "8", 0b00, 0b00, "@earlyclobber $Qd">;
1552 def MVE_VREV64_16 : MVE_VREV<"vrev64", "16", 0b01, 0b00, "@earlyclobber $Qd">;
1553 def MVE_VREV64_32 : MVE_VREV<"vrev64", "32", 0b10, 0b00, "@earlyclobber $Qd">;
1555 def MVE_VREV32_8 : MVE_VREV<"vrev32", "8", 0b00, 0b01>;
1556 def MVE_VREV32_16 : MVE_VREV<"vrev32", "16", 0b01, 0b01>;
1558 def MVE_VREV16_8 : MVE_VREV<"vrev16", "8", 0b00, 0b10>;
1560 let Predicates = [HasMVEInt] in {
1561 def : Pat<(v8i16 (bswap (v8i16 MQPR:$src))),
1562 (v8i16 (MVE_VREV16_8 (v8i16 MQPR:$src)))>;
1563 def : Pat<(v4i32 (bswap (v4i32 MQPR:$src))),
1564 (v4i32 (MVE_VREV32_8 (v4i32 MQPR:$src)))>;
1567 multiclass MVE_VREV_basic_patterns<int revbits, list<MVEVectorVTInfo> VTIs,
1569 defvar unpred_op = !cast<SDNode>("ARMvrev" # revbits);
1571 foreach VTI = VTIs in {
1572 def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$src))),
1573 (VTI.Vec (Inst (VTI.Vec MQPR:$src)))>;
1574 def : Pat<(VTI.Vec (int_arm_mve_vrev_predicated (VTI.Vec MQPR:$src),
1575 revbits, (VTI.Pred VCCR:$pred), (VTI.Vec MQPR:$inactive))),
1576 (VTI.Vec (Inst (VTI.Vec MQPR:$src), ARMVCCThen,
1577 (VTI.Pred VCCR:$pred), (VTI.Vec MQPR:$inactive)))>;
1581 let Predicates = [HasMVEInt] in {
1582 defm: MVE_VREV_basic_patterns<64, [MVE_v4i32, MVE_v4f32], MVE_VREV64_32>;
1583 defm: MVE_VREV_basic_patterns<64, [MVE_v8i16, MVE_v8f16], MVE_VREV64_16>;
1584 defm: MVE_VREV_basic_patterns<64, [MVE_v16i8 ], MVE_VREV64_8>;
1586 defm: MVE_VREV_basic_patterns<32, [MVE_v8i16, MVE_v8f16], MVE_VREV32_16>;
1587 defm: MVE_VREV_basic_patterns<32, [MVE_v16i8 ], MVE_VREV32_8>;
1589 defm: MVE_VREV_basic_patterns<16, [MVE_v16i8 ], MVE_VREV16_8>;
1592 def MVE_VMVN : MVE_bit_arith<(outs MQPR:$Qd), (ins MQPR:$Qm),
1593 "vmvn", "", "$Qd, $Qm", ""> {
1595 let Inst{25-23} = 0b111;
1596 let Inst{21-16} = 0b110000;
1597 let Inst{12-6} = 0b0010111;
1600 let validForTailPredication = 1;
1603 let Predicates = [HasMVEInt] in {
1604 foreach VTI = [ MVE_v16i8, MVE_v8i16, MVE_v4i32, MVE_v2i64 ] in {
1605 def : Pat<(VTI.Vec (vnotq (VTI.Vec MQPR:$val1))),
1606 (VTI.Vec (MVE_VMVN (VTI.Vec MQPR:$val1)))>;
1607 def : Pat<(VTI.Vec (int_arm_mve_mvn_predicated (VTI.Vec MQPR:$val1),
1608 (VTI.Pred VCCR:$pred), (VTI.Vec MQPR:$inactive))),
1609 (VTI.Vec (MVE_VMVN (VTI.Vec MQPR:$val1), ARMVCCThen,
1610 (VTI.Pred VCCR:$pred), (VTI.Vec MQPR:$inactive)))>;
1614 class MVE_bit_ops<string iname, bits<2> bit_21_20, bit bit_28>
1615 : MVE_bit_arith<(outs MQPR:$Qd), (ins MQPR:$Qn, MQPR:$Qm),
1616 iname, "", "$Qd, $Qn, $Qm", ""> {
1619 let Inst{28} = bit_28;
1620 let Inst{25-23} = 0b110;
1621 let Inst{21-20} = bit_21_20;
1622 let Inst{19-17} = Qn{2-0};
1624 let Inst{12-8} = 0b00001;
1625 let Inst{7} = Qn{3};
1629 let validForTailPredication = 1;
1632 def MVE_VEOR : MVE_bit_ops<"veor", 0b00, 0b1>;
1633 def MVE_VORN : MVE_bit_ops<"vorn", 0b11, 0b0>;
1634 def MVE_VORR : MVE_bit_ops<"vorr", 0b10, 0b0>;
1635 def MVE_VAND : MVE_bit_ops<"vand", 0b00, 0b0>;
1637 // add ignored suffixes as aliases
1639 foreach s=["s8", "s16", "s32", "u8", "u16", "u32", "i8", "i16", "i32", "f16", "f32"] in {
1640 def : MVEInstAlias<"vbic${vp}." # s # "\t$QdSrc, $QnSrc, $QmSrc",
1641 (MVE_VBIC MQPR:$QdSrc, MQPR:$QnSrc, MQPR:$QmSrc, vpred_r:$vp)>;
1642 def : MVEInstAlias<"veor${vp}." # s # "\t$QdSrc, $QnSrc, $QmSrc",
1643 (MVE_VEOR MQPR:$QdSrc, MQPR:$QnSrc, MQPR:$QmSrc, vpred_r:$vp)>;
1644 def : MVEInstAlias<"vorn${vp}." # s # "\t$QdSrc, $QnSrc, $QmSrc",
1645 (MVE_VORN MQPR:$QdSrc, MQPR:$QnSrc, MQPR:$QmSrc, vpred_r:$vp)>;
1646 def : MVEInstAlias<"vorr${vp}." # s # "\t$QdSrc, $QnSrc, $QmSrc",
1647 (MVE_VORR MQPR:$QdSrc, MQPR:$QnSrc, MQPR:$QmSrc, vpred_r:$vp)>;
1648 def : MVEInstAlias<"vand${vp}." # s # "\t$QdSrc, $QnSrc, $QmSrc",
1649 (MVE_VAND MQPR:$QdSrc, MQPR:$QnSrc, MQPR:$QmSrc, vpred_r:$vp)>;
1652 let Predicates = [HasMVEInt] in {
1653 defm : MVE_TwoOpPattern<MVE_v16i8, and, int_arm_mve_and_predicated, (? ), MVE_VAND, ARMimmAllOnesV>;
1654 defm : MVE_TwoOpPattern<MVE_v8i16, and, int_arm_mve_and_predicated, (? ), MVE_VAND, ARMimmAllOnesV>;
1655 defm : MVE_TwoOpPattern<MVE_v4i32, and, int_arm_mve_and_predicated, (? ), MVE_VAND, ARMimmAllOnesV>;
1656 defm : MVE_TwoOpPattern<MVE_v2i64, and, int_arm_mve_and_predicated, (? ), MVE_VAND, ARMimmAllOnesV>;
1658 defm : MVE_TwoOpPattern<MVE_v16i8, or, int_arm_mve_orr_predicated, (? ), MVE_VORR, ARMimmAllZerosV>;
1659 defm : MVE_TwoOpPattern<MVE_v8i16, or, int_arm_mve_orr_predicated, (? ), MVE_VORR, ARMimmAllZerosV>;
1660 defm : MVE_TwoOpPattern<MVE_v4i32, or, int_arm_mve_orr_predicated, (? ), MVE_VORR, ARMimmAllZerosV>;
1661 defm : MVE_TwoOpPattern<MVE_v2i64, or, int_arm_mve_orr_predicated, (? ), MVE_VORR, ARMimmAllZerosV>;
1663 defm : MVE_TwoOpPattern<MVE_v16i8, xor, int_arm_mve_eor_predicated, (? ), MVE_VEOR, ARMimmAllZerosV>;
1664 defm : MVE_TwoOpPattern<MVE_v8i16, xor, int_arm_mve_eor_predicated, (? ), MVE_VEOR, ARMimmAllZerosV>;
1665 defm : MVE_TwoOpPattern<MVE_v4i32, xor, int_arm_mve_eor_predicated, (? ), MVE_VEOR, ARMimmAllZerosV>;
1666 defm : MVE_TwoOpPattern<MVE_v2i64, xor, int_arm_mve_eor_predicated, (? ), MVE_VEOR, ARMimmAllZerosV>;
1668 defm : MVE_TwoOpPattern<MVE_v16i8, BinOpFrag<(and node:$LHS, (vnotq node:$RHS))>,
1669 int_arm_mve_bic_predicated, (? ), MVE_VBIC>;
1670 defm : MVE_TwoOpPattern<MVE_v8i16, BinOpFrag<(and node:$LHS, (vnotq node:$RHS))>,
1671 int_arm_mve_bic_predicated, (? ), MVE_VBIC>;
1672 defm : MVE_TwoOpPattern<MVE_v4i32, BinOpFrag<(and node:$LHS, (vnotq node:$RHS))>,
1673 int_arm_mve_bic_predicated, (? ), MVE_VBIC>;
1674 defm : MVE_TwoOpPattern<MVE_v2i64, BinOpFrag<(and node:$LHS, (vnotq node:$RHS))>,
1675 int_arm_mve_bic_predicated, (? ), MVE_VBIC>;
1677 defm : MVE_TwoOpPattern<MVE_v16i8, BinOpFrag<(or node:$LHS, (vnotq node:$RHS))>,
1678 int_arm_mve_orn_predicated, (? ), MVE_VORN>;
1679 defm : MVE_TwoOpPattern<MVE_v8i16, BinOpFrag<(or node:$LHS, (vnotq node:$RHS))>,
1680 int_arm_mve_orn_predicated, (? ), MVE_VORN>;
1681 defm : MVE_TwoOpPattern<MVE_v4i32, BinOpFrag<(or node:$LHS, (vnotq node:$RHS))>,
1682 int_arm_mve_orn_predicated, (? ), MVE_VORN>;
1683 defm : MVE_TwoOpPattern<MVE_v2i64, BinOpFrag<(or node:$LHS, (vnotq node:$RHS))>,
1684 int_arm_mve_orn_predicated, (? ), MVE_VORN>;
1687 class MVE_bit_cmode<string iname, string suffix, bit halfword, dag inOps>
1688 : MVE_p<(outs MQPR:$Qd), inOps, NoItinerary,
1689 iname, suffix, "$Qd, $imm", vpred_n, "$Qd = $Qd_src"> {
1693 let Inst{28} = imm{7};
1694 let Inst{27-23} = 0b11111;
1695 let Inst{22} = Qd{3};
1696 let Inst{21-19} = 0b000;
1697 let Inst{18-16} = imm{6-4};
1698 let Inst{15-13} = Qd{2-0};
1700 let Inst{11} = halfword;
1701 let Inst{10} = !if(halfword, 0, imm{10});
1702 let Inst{9} = imm{9};
1704 let Inst{7-6} = 0b01;
1706 let Inst{3-0} = imm{3-0};
1709 multiclass MVE_bit_cmode_p<string iname, bit opcode,
1710 MVEVectorVTInfo VTI, Operand imm_type, SDNode op> {
1711 def "" : MVE_bit_cmode<iname, VTI.Suffix, VTI.Size{0},
1712 (ins MQPR:$Qd_src, imm_type:$imm)> {
1713 let Inst{5} = opcode;
1714 let validForTailPredication = 1;
1717 defvar Inst = !cast<Instruction>(NAME);
1718 defvar UnpredPat = (VTI.Vec (op (VTI.Vec MQPR:$src), timm:$simm));
1720 let Predicates = [HasMVEInt] in {
1721 def : Pat<UnpredPat,
1722 (VTI.Vec (Inst (VTI.Vec MQPR:$src), imm_type:$simm))>;
1723 def : Pat<(VTI.Vec (vselect (VTI.Pred VCCR:$pred),
1724 UnpredPat, (VTI.Vec MQPR:$src))),
1725 (VTI.Vec (Inst (VTI.Vec MQPR:$src), imm_type:$simm,
1726 ARMVCCThen, (VTI.Pred VCCR:$pred)))>;
1730 multiclass MVE_VORRimm<MVEVectorVTInfo VTI, Operand imm_type> {
1731 defm "": MVE_bit_cmode_p<"vorr", 0, VTI, imm_type, ARMvorrImm>;
1733 multiclass MVE_VBICimm<MVEVectorVTInfo VTI, Operand imm_type> {
1734 defm "": MVE_bit_cmode_p<"vbic", 1, VTI, imm_type, ARMvbicImm>;
1737 defm MVE_VORRimmi16 : MVE_VORRimm<MVE_v8i16, nImmSplatI16>;
1738 defm MVE_VORRimmi32 : MVE_VORRimm<MVE_v4i32, nImmSplatI32>;
1739 defm MVE_VBICimmi16 : MVE_VBICimm<MVE_v8i16, nImmSplatI16>;
1740 defm MVE_VBICimmi32 : MVE_VBICimm<MVE_v4i32, nImmSplatI32>;
1742 def MVE_VORNimmi16 : MVEInstAlias<"vorn${vp}.i16\t$Qd, $imm",
1743 (MVE_VORRimmi16 MQPR:$Qd, nImmSplatNotI16:$imm, vpred_n:$vp), 0>;
1744 def MVE_VORNimmi32 : MVEInstAlias<"vorn${vp}.i32\t$Qd, $imm",
1745 (MVE_VORRimmi32 MQPR:$Qd, nImmSplatNotI32:$imm, vpred_n:$vp), 0>;
1747 def MVE_VANDimmi16 : MVEInstAlias<"vand${vp}.i16\t$Qd, $imm",
1748 (MVE_VBICimmi16 MQPR:$Qd, nImmSplatNotI16:$imm, vpred_n:$vp), 0>;
1749 def MVE_VANDimmi32 : MVEInstAlias<"vand${vp}.i32\t$Qd, $imm",
1750 (MVE_VBICimmi32 MQPR:$Qd, nImmSplatNotI32:$imm, vpred_n:$vp), 0>;
1752 def MVE_VMOV : MVEInstAlias<"vmov${vp}\t$Qd, $Qm",
1753 (MVE_VORR MQPR:$Qd, MQPR:$Qm, MQPR:$Qm, vpred_r:$vp)>;
1755 class MVE_VMOV_lane_direction {
1762 def MVE_VMOV_from_lane : MVE_VMOV_lane_direction {
1764 let oops = (outs rGPR:$Rt);
1765 let iops = (ins MQPR:$Qd);
1766 let ops = "$Rt, $Qd$Idx";
1769 def MVE_VMOV_to_lane : MVE_VMOV_lane_direction {
1771 let oops = (outs MQPR:$Qd);
1772 let iops = (ins MQPR:$Qd_src, rGPR:$Rt);
1773 let ops = "$Qd$Idx, $Rt";
1774 let cstr = "$Qd = $Qd_src";
1777 class MVE_VMOV_lane<string suffix, bit U, dag indexop,
1778 MVE_VMOV_lane_direction dir>
1779 : MVE_VMOV_lane_base<dir.oops, !con(dir.iops, indexop), NoItinerary,
1780 "vmov", suffix, dir.ops, dir.cstr, []> {
1784 let Inst{31-24} = 0b11101110;
1786 let Inst{20} = dir.bit_20;
1787 let Inst{19-17} = Qd{2-0};
1788 let Inst{15-12} = Rt{3-0};
1789 let Inst{11-8} = 0b1011;
1790 let Inst{7} = Qd{3};
1791 let Inst{4-0} = 0b10000;
1793 let hasSideEffects = 0;
1796 class MVE_VMOV_lane_32<MVE_VMOV_lane_direction dir>
1797 : MVE_VMOV_lane<"32", 0b0, (ins MVEVectorIndex<4>:$Idx), dir> {
1800 let Inst{6-5} = 0b00;
1801 let Inst{16} = Idx{1};
1802 let Inst{21} = Idx{0};
1804 let Predicates = [HasFPRegsV8_1M];
1807 class MVE_VMOV_lane_16<string suffix, bit U, MVE_VMOV_lane_direction dir>
1808 : MVE_VMOV_lane<suffix, U, (ins MVEVectorIndex<8>:$Idx), dir> {
1812 let Inst{16} = Idx{2};
1813 let Inst{21} = Idx{1};
1814 let Inst{6} = Idx{0};
1817 class MVE_VMOV_lane_8<string suffix, bit U, MVE_VMOV_lane_direction dir>
1818 : MVE_VMOV_lane<suffix, U, (ins MVEVectorIndex<16>:$Idx), dir> {
1821 let Inst{16} = Idx{3};
1822 let Inst{21} = Idx{2};
1823 let Inst{6} = Idx{1};
1824 let Inst{5} = Idx{0};
1827 def MVE_VMOV_from_lane_32 : MVE_VMOV_lane_32< MVE_VMOV_from_lane>;
1828 def MVE_VMOV_from_lane_s16 : MVE_VMOV_lane_16<"s16", 0b0, MVE_VMOV_from_lane>;
1829 def MVE_VMOV_from_lane_u16 : MVE_VMOV_lane_16<"u16", 0b1, MVE_VMOV_from_lane>;
1830 def MVE_VMOV_from_lane_s8 : MVE_VMOV_lane_8 < "s8", 0b0, MVE_VMOV_from_lane>;
1831 def MVE_VMOV_from_lane_u8 : MVE_VMOV_lane_8 < "u8", 0b1, MVE_VMOV_from_lane>;
1832 let isInsertSubreg = 1 in
1833 def MVE_VMOV_to_lane_32 : MVE_VMOV_lane_32< MVE_VMOV_to_lane>;
1834 def MVE_VMOV_to_lane_16 : MVE_VMOV_lane_16< "16", 0b0, MVE_VMOV_to_lane>;
1835 def MVE_VMOV_to_lane_8 : MVE_VMOV_lane_8 < "8", 0b0, MVE_VMOV_to_lane>;
1837 // This is the same as insertelt but allows the inserted value to be an i32 as
1838 // will be used when it is the only legal type.
1839 def ARMVecInsert : SDTypeProfile<1, 3, [
1840 SDTCisVT<2, i32>, SDTCisSameAs<0, 1>, SDTCisPtrTy<3>
1842 def ARMinsertelt : SDNode<"ISD::INSERT_VECTOR_ELT", ARMVecInsert>;
1844 let Predicates = [HasMVEInt] in {
1845 def : Pat<(extractelt (v2f64 MQPR:$src), imm:$lane),
1846 (f64 (EXTRACT_SUBREG MQPR:$src, (DSubReg_f64_reg imm:$lane)))>;
1847 def : Pat<(insertelt (v2f64 MQPR:$src1), DPR:$src2, imm:$lane),
1848 (INSERT_SUBREG (v2f64 (COPY_TO_REGCLASS MQPR:$src1, MQPR)), DPR:$src2, (DSubReg_f64_reg imm:$lane))>;
1850 def : Pat<(extractelt (v4i32 MQPR:$src), imm:$lane),
1852 (i32 (EXTRACT_SUBREG MQPR:$src, (SSubReg_f32_reg imm:$lane))), rGPR)>;
1853 def : Pat<(insertelt (v4i32 MQPR:$src1), rGPR:$src2, imm:$lane),
1854 (MVE_VMOV_to_lane_32 MQPR:$src1, rGPR:$src2, imm:$lane)>;
1855 // This tries to copy from one lane to another, without going via GPR regs
1856 def : Pat<(insertelt (v4i32 MQPR:$src1), (extractelt (v4i32 MQPR:$src2), imm:$extlane), imm:$inslane),
1857 (v4i32 (COPY_TO_REGCLASS
1858 (INSERT_SUBREG (v4f32 (COPY_TO_REGCLASS (v4i32 MQPR:$src1), MQPR)),
1859 (f32 (EXTRACT_SUBREG (v4f32 (COPY_TO_REGCLASS (v4i32 MQPR:$src2), MQPR)),
1860 (SSubReg_f32_reg imm:$extlane))),
1861 (SSubReg_f32_reg imm:$inslane)),
1864 def : Pat<(vector_insert (v16i8 MQPR:$src1), rGPR:$src2, imm:$lane),
1865 (MVE_VMOV_to_lane_8 MQPR:$src1, rGPR:$src2, imm:$lane)>;
1866 def : Pat<(vector_insert (v8i16 MQPR:$src1), rGPR:$src2, imm:$lane),
1867 (MVE_VMOV_to_lane_16 MQPR:$src1, rGPR:$src2, imm:$lane)>;
1869 def : Pat<(ARMvgetlanes (v16i8 MQPR:$src), imm:$lane),
1870 (MVE_VMOV_from_lane_s8 MQPR:$src, imm:$lane)>;
1871 def : Pat<(ARMvgetlanes (v8i16 MQPR:$src), imm:$lane),
1872 (MVE_VMOV_from_lane_s16 MQPR:$src, imm:$lane)>;
1873 def : Pat<(ARMvgetlanes (v8f16 MQPR:$src), imm:$lane),
1874 (MVE_VMOV_from_lane_s16 MQPR:$src, imm:$lane)>;
1875 def : Pat<(ARMvgetlaneu (v16i8 MQPR:$src), imm:$lane),
1876 (MVE_VMOV_from_lane_u8 MQPR:$src, imm:$lane)>;
1877 def : Pat<(ARMvgetlaneu (v8i16 MQPR:$src), imm:$lane),
1878 (MVE_VMOV_from_lane_u16 MQPR:$src, imm:$lane)>;
1879 def : Pat<(ARMvgetlaneu (v8f16 MQPR:$src), imm:$lane),
1880 (MVE_VMOV_from_lane_u16 MQPR:$src, imm:$lane)>;
1881 // For i16's inserts being extracted from low lanes, then may use VINS.
1882 def : Pat<(ARMinsertelt (v8i16 MQPR:$src1),
1883 (ARMvgetlaneu (v8i16 MQPR:$src2), imm_even:$extlane),
1885 (COPY_TO_REGCLASS (INSERT_SUBREG (v4f32 (COPY_TO_REGCLASS MQPR:$src1, MQPR)),
1886 (VINSH (EXTRACT_SUBREG MQPR:$src1, (SSubReg_f16_reg imm_odd:$inslane)),
1887 (EXTRACT_SUBREG MQPR:$src2, (SSubReg_f16_reg imm_even:$extlane))),
1888 (SSubReg_f16_reg imm_odd:$inslane)), MQPR)>;
1890 def : Pat<(v16i8 (scalar_to_vector GPR:$src)),
1891 (MVE_VMOV_to_lane_8 (v16i8 (IMPLICIT_DEF)), rGPR:$src, (i32 0))>;
1892 def : Pat<(v8i16 (scalar_to_vector GPR:$src)),
1893 (MVE_VMOV_to_lane_16 (v8i16 (IMPLICIT_DEF)), rGPR:$src, (i32 0))>;
1894 def : Pat<(v4i32 (scalar_to_vector GPR:$src)),
1895 (MVE_VMOV_to_lane_32 (v4i32 (IMPLICIT_DEF)), rGPR:$src, (i32 0))>;
1897 // Floating point patterns, still enabled under HasMVEInt
1898 def : Pat<(extractelt (v4f32 MQPR:$src), imm:$lane),
1899 (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG MQPR:$src, (SSubReg_f32_reg imm:$lane))), SPR)>;
1900 def : Pat<(insertelt (v4f32 MQPR:$src1), (f32 SPR:$src2), imm:$lane),
1901 (INSERT_SUBREG (v4f32 (COPY_TO_REGCLASS MQPR:$src1, MQPR)), SPR:$src2, (SSubReg_f32_reg imm:$lane))>;
1903 def : Pat<(insertelt (v8f16 MQPR:$src1), (f16 HPR:$src2), imm_even:$lane),
1904 (MVE_VMOV_to_lane_16 MQPR:$src1, (COPY_TO_REGCLASS (f16 HPR:$src2), rGPR), imm:$lane)>;
1905 def : Pat<(insertelt (v8f16 MQPR:$src1), (f16 HPR:$src2), imm_odd:$lane),
1906 (COPY_TO_REGCLASS (INSERT_SUBREG (v4f32 (COPY_TO_REGCLASS MQPR:$src1, MQPR)),
1907 (VINSH (EXTRACT_SUBREG MQPR:$src1, (SSubReg_f16_reg imm_odd:$lane)),
1908 (COPY_TO_REGCLASS HPR:$src2, SPR)),
1909 (SSubReg_f16_reg imm_odd:$lane)), MQPR)>;
1910 def : Pat<(extractelt (v8f16 MQPR:$src), imm_even:$lane),
1911 (EXTRACT_SUBREG MQPR:$src, (SSubReg_f16_reg imm_even:$lane))>;
1912 def : Pat<(extractelt (v8f16 MQPR:$src), imm_odd:$lane),
1914 (VMOVH (EXTRACT_SUBREG MQPR:$src, (SSubReg_f16_reg imm_odd:$lane))),
1917 def : Pat<(v2f64 (scalar_to_vector (f64 DPR:$src))),
1918 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), DPR:$src, dsub_0)>;
1919 def : Pat<(v4f32 (scalar_to_vector SPR:$src)),
1920 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>;
1921 def : Pat<(v4f32 (scalar_to_vector GPR:$src)),
1922 (MVE_VMOV_to_lane_32 (v4f32 (IMPLICIT_DEF)), rGPR:$src, (i32 0))>;
1923 def : Pat<(v8f16 (scalar_to_vector (f16 HPR:$src))),
1924 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), (f16 HPR:$src), ssub_0)>;
1925 def : Pat<(v8f16 (scalar_to_vector GPR:$src)),
1926 (MVE_VMOV_to_lane_16 (v8f16 (IMPLICIT_DEF)), rGPR:$src, (i32 0))>;
1929 // end of mve_bit instructions
1931 // start of MVE Integer instructions
1933 class MVE_int<string iname, string suffix, bits<2> size, list<dag> pattern=[]>
1934 : MVE_p<(outs MQPR:$Qd), (ins MQPR:$Qn, MQPR:$Qm), NoItinerary,
1935 iname, suffix, "$Qd, $Qn, $Qm", vpred_r, "", pattern> {
1940 let Inst{22} = Qd{3};
1941 let Inst{21-20} = size;
1942 let Inst{19-17} = Qn{2-0};
1943 let Inst{15-13} = Qd{2-0};
1944 let Inst{7} = Qn{3};
1946 let Inst{5} = Qm{3};
1947 let Inst{3-1} = Qm{2-0};
1950 class MVE_VMULt1<string iname, string suffix, bits<2> size,
1951 list<dag> pattern=[]>
1952 : MVE_int<iname, suffix, size, pattern> {
1955 let Inst{25-23} = 0b110;
1957 let Inst{12-8} = 0b01001;
1960 let validForTailPredication = 1;
1963 multiclass MVE_VMUL_m<MVEVectorVTInfo VTI> {
1964 def "" : MVE_VMULt1<"vmul", VTI.Suffix, VTI.Size>;
1966 let Predicates = [HasMVEInt] in {
1967 defm : MVE_TwoOpPattern<VTI, mul, int_arm_mve_mul_predicated, (? ),
1968 !cast<Instruction>(NAME), ARMimmOneV>;
1972 defm MVE_VMULi8 : MVE_VMUL_m<MVE_v16i8>;
1973 defm MVE_VMULi16 : MVE_VMUL_m<MVE_v8i16>;
1974 defm MVE_VMULi32 : MVE_VMUL_m<MVE_v4i32>;
1976 class MVE_VQxDMULH_Base<string iname, string suffix, bits<2> size, bit rounding,
1977 list<dag> pattern=[]>
1978 : MVE_int<iname, suffix, size, pattern> {
1980 let Inst{28} = rounding;
1981 let Inst{25-23} = 0b110;
1983 let Inst{12-8} = 0b01011;
1986 let validForTailPredication = 1;
1989 def MVEvqdmulh : SDNode<"ARMISD::VQDMULH", SDTIntBinOp>;
1991 multiclass MVE_VQxDMULH_m<string iname, MVEVectorVTInfo VTI,
1992 SDNode Op, Intrinsic unpred_int, Intrinsic pred_int,
1994 def "" : MVE_VQxDMULH_Base<iname, VTI.Suffix, VTI.Size, rounding>;
1995 defvar Inst = !cast<Instruction>(NAME);
1997 let Predicates = [HasMVEInt] in {
1998 defm : MVE_TwoOpPattern<VTI, Op, pred_int, (? ), Inst>;
2000 // Extra unpredicated multiply intrinsic patterns
2001 def : Pat<(VTI.Vec (unpred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn))),
2002 (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>;
2006 multiclass MVE_VQxDMULH<string iname, MVEVectorVTInfo VTI, bit rounding>
2007 : MVE_VQxDMULH_m<iname, VTI, !if(rounding, null_frag,
2009 !if(rounding, int_arm_mve_vqrdmulh,
2010 int_arm_mve_vqdmulh),
2011 !if(rounding, int_arm_mve_qrdmulh_predicated,
2012 int_arm_mve_qdmulh_predicated),
2015 defm MVE_VQDMULHi8 : MVE_VQxDMULH<"vqdmulh", MVE_v16s8, 0b0>;
2016 defm MVE_VQDMULHi16 : MVE_VQxDMULH<"vqdmulh", MVE_v8s16, 0b0>;
2017 defm MVE_VQDMULHi32 : MVE_VQxDMULH<"vqdmulh", MVE_v4s32, 0b0>;
2019 defm MVE_VQRDMULHi8 : MVE_VQxDMULH<"vqrdmulh", MVE_v16s8, 0b1>;
2020 defm MVE_VQRDMULHi16 : MVE_VQxDMULH<"vqrdmulh", MVE_v8s16, 0b1>;
2021 defm MVE_VQRDMULHi32 : MVE_VQxDMULH<"vqrdmulh", MVE_v4s32, 0b1>;
2023 class MVE_VADDSUB<string iname, string suffix, bits<2> size, bit subtract,
2024 list<dag> pattern=[]>
2025 : MVE_int<iname, suffix, size, pattern> {
2027 let Inst{28} = subtract;
2028 let Inst{25-23} = 0b110;
2030 let Inst{12-8} = 0b01000;
2033 let validForTailPredication = 1;
2036 multiclass MVE_VADDSUB_m<string iname, MVEVectorVTInfo VTI, bit subtract,
2037 SDNode Op, Intrinsic PredInt> {
2038 def "" : MVE_VADDSUB<iname, VTI.Suffix, VTI.Size, subtract>;
2039 defvar Inst = !cast<Instruction>(NAME);
2041 let Predicates = [HasMVEInt] in {
2042 defm : MVE_TwoOpPattern<VTI, Op, PredInt, (? ), !cast<Instruction>(NAME), ARMimmAllZerosV>;
2046 multiclass MVE_VADD<MVEVectorVTInfo VTI>
2047 : MVE_VADDSUB_m<"vadd", VTI, 0b0, add, int_arm_mve_add_predicated>;
2048 multiclass MVE_VSUB<MVEVectorVTInfo VTI>
2049 : MVE_VADDSUB_m<"vsub", VTI, 0b1, sub, int_arm_mve_sub_predicated>;
2051 defm MVE_VADDi8 : MVE_VADD<MVE_v16i8>;
2052 defm MVE_VADDi16 : MVE_VADD<MVE_v8i16>;
2053 defm MVE_VADDi32 : MVE_VADD<MVE_v4i32>;
2055 defm MVE_VSUBi8 : MVE_VSUB<MVE_v16i8>;
2056 defm MVE_VSUBi16 : MVE_VSUB<MVE_v8i16>;
2057 defm MVE_VSUBi32 : MVE_VSUB<MVE_v4i32>;
2059 class MVE_VQADDSUB<string iname, string suffix, bit U, bit subtract,
2061 : MVE_int<iname, suffix, size, []> {
2064 let Inst{25-23} = 0b110;
2066 let Inst{12-10} = 0b000;
2067 let Inst{9} = subtract;
2071 let validForTailPredication = 1;
2074 class MVE_VQADD_<string suffix, bit U, bits<2> size>
2075 : MVE_VQADDSUB<"vqadd", suffix, U, 0b0, size>;
2076 class MVE_VQSUB_<string suffix, bit U, bits<2> size>
2077 : MVE_VQADDSUB<"vqsub", suffix, U, 0b1, size>;
2079 multiclass MVE_VQADD_m<MVEVectorVTInfo VTI,
2080 SDNode Op, Intrinsic PredInt> {
2081 def "" : MVE_VQADD_<VTI.Suffix, VTI.Unsigned, VTI.Size>;
2082 defvar Inst = !cast<Instruction>(NAME);
2084 let Predicates = [HasMVEInt] in {
2085 defm : MVE_TwoOpPattern<VTI, Op, PredInt, (? (i32 VTI.Unsigned)),
2086 !cast<Instruction>(NAME)>;
2090 multiclass MVE_VQADD<MVEVectorVTInfo VTI, SDNode unpred_op>
2091 : MVE_VQADD_m<VTI, unpred_op, int_arm_mve_qadd_predicated>;
2093 defm MVE_VQADDs8 : MVE_VQADD<MVE_v16s8, saddsat>;
2094 defm MVE_VQADDs16 : MVE_VQADD<MVE_v8s16, saddsat>;
2095 defm MVE_VQADDs32 : MVE_VQADD<MVE_v4s32, saddsat>;
2096 defm MVE_VQADDu8 : MVE_VQADD<MVE_v16u8, uaddsat>;
2097 defm MVE_VQADDu16 : MVE_VQADD<MVE_v8u16, uaddsat>;
2098 defm MVE_VQADDu32 : MVE_VQADD<MVE_v4u32, uaddsat>;
2100 multiclass MVE_VQSUB_m<MVEVectorVTInfo VTI,
2101 SDNode Op, Intrinsic PredInt> {
2102 def "" : MVE_VQSUB_<VTI.Suffix, VTI.Unsigned, VTI.Size>;
2103 defvar Inst = !cast<Instruction>(NAME);
2105 let Predicates = [HasMVEInt] in {
2106 defm : MVE_TwoOpPattern<VTI, Op, PredInt, (? (i32 VTI.Unsigned)),
2107 !cast<Instruction>(NAME)>;
2111 multiclass MVE_VQSUB<MVEVectorVTInfo VTI, SDNode unpred_op>
2112 : MVE_VQSUB_m<VTI, unpred_op, int_arm_mve_qsub_predicated>;
2114 defm MVE_VQSUBs8 : MVE_VQSUB<MVE_v16s8, ssubsat>;
2115 defm MVE_VQSUBs16 : MVE_VQSUB<MVE_v8s16, ssubsat>;
2116 defm MVE_VQSUBs32 : MVE_VQSUB<MVE_v4s32, ssubsat>;
2117 defm MVE_VQSUBu8 : MVE_VQSUB<MVE_v16u8, usubsat>;
2118 defm MVE_VQSUBu16 : MVE_VQSUB<MVE_v8u16, usubsat>;
2119 defm MVE_VQSUBu32 : MVE_VQSUB<MVE_v4u32, usubsat>;
2121 class MVE_VABD_int<string suffix, bit U, bits<2> size,
2122 list<dag> pattern=[]>
2123 : MVE_int<"vabd", suffix, size, pattern> {
2126 let Inst{25-23} = 0b110;
2128 let Inst{12-8} = 0b00111;
2131 let validForTailPredication = 1;
2134 multiclass MVE_VABD_m<MVEVectorVTInfo VTI, SDNode Op,
2135 Intrinsic unpred_int, Intrinsic PredInt> {
2136 def "" : MVE_VABD_int<VTI.Suffix, VTI.Unsigned, VTI.Size>;
2137 defvar Inst = !cast<Instruction>(NAME);
2139 let Predicates = [HasMVEInt] in {
2140 defm : MVE_TwoOpPattern<VTI, Op, PredInt, (? (i32 VTI.Unsigned)),
2141 !cast<Instruction>(NAME)>;
2143 // Unpredicated absolute difference
2144 def : Pat<(VTI.Vec (unpred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
2145 (i32 VTI.Unsigned))),
2146 (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>;
2150 multiclass MVE_VABD<MVEVectorVTInfo VTI, SDNode Op>
2151 : MVE_VABD_m<VTI, Op, int_arm_mve_vabd, int_arm_mve_abd_predicated>;
2153 defm MVE_VABDs8 : MVE_VABD<MVE_v16s8, abds>;
2154 defm MVE_VABDs16 : MVE_VABD<MVE_v8s16, abds>;
2155 defm MVE_VABDs32 : MVE_VABD<MVE_v4s32, abds>;
2156 defm MVE_VABDu8 : MVE_VABD<MVE_v16u8, abdu>;
2157 defm MVE_VABDu16 : MVE_VABD<MVE_v8u16, abdu>;
2158 defm MVE_VABDu32 : MVE_VABD<MVE_v4u32, abdu>;
2160 class MVE_VRHADD_Base<string suffix, bit U, bits<2> size, list<dag> pattern=[]>
2161 : MVE_int<"vrhadd", suffix, size, pattern> {
2164 let Inst{25-23} = 0b110;
2166 let Inst{12-8} = 0b00001;
2169 let validForTailPredication = 1;
2172 def addnuw : PatFrag<(ops node:$lhs, node:$rhs),
2173 (add node:$lhs, node:$rhs), [{
2174 return N->getFlags().hasNoUnsignedWrap();
2177 def addnsw : PatFrag<(ops node:$lhs, node:$rhs),
2178 (add node:$lhs, node:$rhs), [{
2179 return N->getFlags().hasNoSignedWrap();
2182 def subnuw : PatFrag<(ops node:$lhs, node:$rhs),
2183 (sub node:$lhs, node:$rhs), [{
2184 return N->getFlags().hasNoUnsignedWrap();
2187 def subnsw : PatFrag<(ops node:$lhs, node:$rhs),
2188 (sub node:$lhs, node:$rhs), [{
2189 return N->getFlags().hasNoSignedWrap();
2192 multiclass MVE_VRHADD_m<MVEVectorVTInfo VTI,
2193 SDNode unpred_op, Intrinsic pred_int> {
2194 def "" : MVE_VRHADD_Base<VTI.Suffix, VTI.Unsigned, VTI.Size>;
2195 defvar Inst = !cast<Instruction>(NAME);
2197 let Predicates = [HasMVEInt] in {
2198 // Unpredicated rounding add-with-divide-by-two
2199 def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
2200 (i32 VTI.Unsigned))),
2201 (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>;
2203 // Predicated add-with-divide-by-two
2204 def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
2205 (i32 VTI.Unsigned), (VTI.Pred VCCR:$mask),
2206 (VTI.Vec MQPR:$inactive))),
2207 (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
2208 ARMVCCThen, (VTI.Pred VCCR:$mask),
2209 (VTI.Vec MQPR:$inactive)))>;
2213 multiclass MVE_VRHADD<MVEVectorVTInfo VTI>
2214 : MVE_VRHADD_m<VTI, int_arm_mve_vrhadd, int_arm_mve_rhadd_predicated>;
2216 defm MVE_VRHADDs8 : MVE_VRHADD<MVE_v16s8>;
2217 defm MVE_VRHADDs16 : MVE_VRHADD<MVE_v8s16>;
2218 defm MVE_VRHADDs32 : MVE_VRHADD<MVE_v4s32>;
2219 defm MVE_VRHADDu8 : MVE_VRHADD<MVE_v16u8>;
2220 defm MVE_VRHADDu16 : MVE_VRHADD<MVE_v8u16>;
2221 defm MVE_VRHADDu32 : MVE_VRHADD<MVE_v4u32>;
2223 // Rounding Halving Add perform the arithemtic operation with an extra bit of
2224 // precision, before performing the shift, to void clipping errors. We're not
2225 // modelling that here with these patterns, but we're using no wrap forms of
2226 // add to ensure that the extra bit of information is not needed for the
2227 // arithmetic or the rounding.
2228 let Predicates = [HasMVEInt] in {
2229 def : Pat<(v16i8 (ARMvshrsImm (addnsw (addnsw (v16i8 MQPR:$Qm), (v16i8 MQPR:$Qn)),
2230 (v16i8 (ARMvmovImm (i32 3585)))),
2232 (MVE_VRHADDs8 MQPR:$Qm, MQPR:$Qn)>;
2233 def : Pat<(v8i16 (ARMvshrsImm (addnsw (addnsw (v8i16 MQPR:$Qm), (v8i16 MQPR:$Qn)),
2234 (v8i16 (ARMvmovImm (i32 2049)))),
2236 (MVE_VRHADDs16 MQPR:$Qm, MQPR:$Qn)>;
2237 def : Pat<(v4i32 (ARMvshrsImm (addnsw (addnsw (v4i32 MQPR:$Qm), (v4i32 MQPR:$Qn)),
2238 (v4i32 (ARMvmovImm (i32 1)))),
2240 (MVE_VRHADDs32 MQPR:$Qm, MQPR:$Qn)>;
2241 def : Pat<(v16i8 (ARMvshruImm (addnuw (addnuw (v16i8 MQPR:$Qm), (v16i8 MQPR:$Qn)),
2242 (v16i8 (ARMvmovImm (i32 3585)))),
2244 (MVE_VRHADDu8 MQPR:$Qm, MQPR:$Qn)>;
2245 def : Pat<(v8i16 (ARMvshruImm (addnuw (addnuw (v8i16 MQPR:$Qm), (v8i16 MQPR:$Qn)),
2246 (v8i16 (ARMvmovImm (i32 2049)))),
2248 (MVE_VRHADDu16 MQPR:$Qm, MQPR:$Qn)>;
2249 def : Pat<(v4i32 (ARMvshruImm (addnuw (addnuw (v4i32 MQPR:$Qm), (v4i32 MQPR:$Qn)),
2250 (v4i32 (ARMvmovImm (i32 1)))),
2252 (MVE_VRHADDu32 MQPR:$Qm, MQPR:$Qn)>;
2256 class MVE_VHADDSUB<string iname, string suffix, bit U, bit subtract,
2257 bits<2> size, list<dag> pattern=[]>
2258 : MVE_int<iname, suffix, size, pattern> {
2261 let Inst{25-23} = 0b110;
2263 let Inst{12-10} = 0b000;
2264 let Inst{9} = subtract;
2268 let validForTailPredication = 1;
2271 class MVE_VHADD_<string suffix, bit U, bits<2> size,
2272 list<dag> pattern=[]>
2273 : MVE_VHADDSUB<"vhadd", suffix, U, 0b0, size, pattern>;
2274 class MVE_VHSUB_<string suffix, bit U, bits<2> size,
2275 list<dag> pattern=[]>
2276 : MVE_VHADDSUB<"vhsub", suffix, U, 0b1, size, pattern>;
2278 multiclass MVE_VHADD_m<MVEVectorVTInfo VTI,
2279 SDNode unpred_op, Intrinsic pred_int, PatFrag add_op,
2281 def "" : MVE_VHADD_<VTI.Suffix, VTI.Unsigned, VTI.Size>;
2282 defvar Inst = !cast<Instruction>(NAME);
2284 let Predicates = [HasMVEInt] in {
2285 // Unpredicated add-and-divide-by-two
2286 def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn), (i32 VTI.Unsigned))),
2287 (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>;
2289 def : Pat<(VTI.Vec (shift_op (add_op (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)), (i32 1))),
2290 (Inst MQPR:$Qm, MQPR:$Qn)>;
2292 // Predicated add-and-divide-by-two
2293 def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn), (i32 VTI.Unsigned),
2294 (VTI.Pred VCCR:$mask), (VTI.Vec MQPR:$inactive))),
2295 (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
2296 ARMVCCThen, (VTI.Pred VCCR:$mask),
2297 (VTI.Vec MQPR:$inactive)))>;
2301 multiclass MVE_VHADD<MVEVectorVTInfo VTI, PatFrag add_op, SDNode shift_op>
2302 : MVE_VHADD_m<VTI, int_arm_mve_vhadd, int_arm_mve_hadd_predicated, add_op,
2305 // Halving add/sub perform the arithemtic operation with an extra bit of
2306 // precision, before performing the shift, to void clipping errors. We're not
2307 // modelling that here with these patterns, but we're using no wrap forms of
2308 // add/sub to ensure that the extra bit of information is not needed.
2309 defm MVE_VHADDs8 : MVE_VHADD<MVE_v16s8, addnsw, ARMvshrsImm>;
2310 defm MVE_VHADDs16 : MVE_VHADD<MVE_v8s16, addnsw, ARMvshrsImm>;
2311 defm MVE_VHADDs32 : MVE_VHADD<MVE_v4s32, addnsw, ARMvshrsImm>;
2312 defm MVE_VHADDu8 : MVE_VHADD<MVE_v16u8, addnuw, ARMvshruImm>;
2313 defm MVE_VHADDu16 : MVE_VHADD<MVE_v8u16, addnuw, ARMvshruImm>;
2314 defm MVE_VHADDu32 : MVE_VHADD<MVE_v4u32, addnuw, ARMvshruImm>;
2316 multiclass MVE_VHSUB_m<MVEVectorVTInfo VTI,
2317 SDNode unpred_op, Intrinsic pred_int, PatFrag sub_op,
2319 def "" : MVE_VHSUB_<VTI.Suffix, VTI.Unsigned, VTI.Size>;
2320 defvar Inst = !cast<Instruction>(NAME);
2322 let Predicates = [HasMVEInt] in {
2323 // Unpredicated subtract-and-divide-by-two
2324 def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
2325 (i32 VTI.Unsigned))),
2326 (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>;
2328 def : Pat<(VTI.Vec (shift_op (sub_op (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)), (i32 1))),
2329 (Inst MQPR:$Qm, MQPR:$Qn)>;
2332 // Predicated subtract-and-divide-by-two
2333 def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
2334 (i32 VTI.Unsigned), (VTI.Pred VCCR:$mask),
2335 (VTI.Vec MQPR:$inactive))),
2336 (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
2337 ARMVCCThen, (VTI.Pred VCCR:$mask),
2338 (VTI.Vec MQPR:$inactive)))>;
2342 multiclass MVE_VHSUB<MVEVectorVTInfo VTI, PatFrag sub_op, SDNode shift_op>
2343 : MVE_VHSUB_m<VTI, int_arm_mve_vhsub, int_arm_mve_hsub_predicated, sub_op,
2346 defm MVE_VHSUBs8 : MVE_VHSUB<MVE_v16s8, subnsw, ARMvshrsImm>;
2347 defm MVE_VHSUBs16 : MVE_VHSUB<MVE_v8s16, subnsw, ARMvshrsImm>;
2348 defm MVE_VHSUBs32 : MVE_VHSUB<MVE_v4s32, subnsw, ARMvshrsImm>;
2349 defm MVE_VHSUBu8 : MVE_VHSUB<MVE_v16u8, subnuw, ARMvshruImm>;
2350 defm MVE_VHSUBu16 : MVE_VHSUB<MVE_v8u16, subnuw, ARMvshruImm>;
2351 defm MVE_VHSUBu32 : MVE_VHSUB<MVE_v4u32, subnuw, ARMvshruImm>;
2353 class MVE_VDUP<string suffix, bit B, bit E, list<dag> pattern=[]>
2354 : MVE_p<(outs MQPR:$Qd), (ins rGPR:$Rt), NoItinerary,
2355 "vdup", suffix, "$Qd, $Rt", vpred_r, "", pattern> {
2360 let Inst{25-23} = 0b101;
2362 let Inst{21-20} = 0b10;
2363 let Inst{19-17} = Qd{2-0};
2365 let Inst{15-12} = Rt;
2366 let Inst{11-8} = 0b1011;
2367 let Inst{7} = Qd{3};
2370 let Inst{4-0} = 0b10000;
2371 let validForTailPredication = 1;
2374 def MVE_VDUP32 : MVE_VDUP<"32", 0b0, 0b0>;
2375 def MVE_VDUP16 : MVE_VDUP<"16", 0b0, 0b1>;
2376 def MVE_VDUP8 : MVE_VDUP<"8", 0b1, 0b0>;
2378 let Predicates = [HasMVEInt] in {
2379 def : Pat<(v16i8 (ARMvdup (i32 rGPR:$elem))),
2380 (MVE_VDUP8 rGPR:$elem)>;
2381 def : Pat<(v8i16 (ARMvdup (i32 rGPR:$elem))),
2382 (MVE_VDUP16 rGPR:$elem)>;
2383 def : Pat<(v4i32 (ARMvdup (i32 rGPR:$elem))),
2384 (MVE_VDUP32 rGPR:$elem)>;
2386 def : Pat<(v8f16 (ARMvdup (i32 rGPR:$elem))),
2387 (MVE_VDUP16 rGPR:$elem)>;
2388 def : Pat<(v4f32 (ARMvdup (i32 rGPR:$elem))),
2389 (MVE_VDUP32 rGPR:$elem)>;
2391 // Match a vselect with an ARMvdup as a predicated MVE_VDUP
2392 def : Pat<(v16i8 (vselect (v16i1 VCCR:$pred),
2393 (v16i8 (ARMvdup (i32 rGPR:$elem))),
2394 (v16i8 MQPR:$inactive))),
2395 (MVE_VDUP8 rGPR:$elem, ARMVCCThen, (v16i1 VCCR:$pred),
2396 (v16i8 MQPR:$inactive))>;
2397 def : Pat<(v8i16 (vselect (v8i1 VCCR:$pred),
2398 (v8i16 (ARMvdup (i32 rGPR:$elem))),
2399 (v8i16 MQPR:$inactive))),
2400 (MVE_VDUP16 rGPR:$elem, ARMVCCThen, (v8i1 VCCR:$pred),
2401 (v8i16 MQPR:$inactive))>;
2402 def : Pat<(v4i32 (vselect (v4i1 VCCR:$pred),
2403 (v4i32 (ARMvdup (i32 rGPR:$elem))),
2404 (v4i32 MQPR:$inactive))),
2405 (MVE_VDUP32 rGPR:$elem, ARMVCCThen, (v4i1 VCCR:$pred),
2406 (v4i32 MQPR:$inactive))>;
2407 def : Pat<(v4f32 (vselect (v4i1 VCCR:$pred),
2408 (v4f32 (ARMvdup (i32 rGPR:$elem))),
2409 (v4f32 MQPR:$inactive))),
2410 (MVE_VDUP32 rGPR:$elem, ARMVCCThen, (v4i1 VCCR:$pred),
2411 (v4f32 MQPR:$inactive))>;
2412 def : Pat<(v8f16 (vselect (v8i1 VCCR:$pred),
2413 (v8f16 (ARMvdup (i32 rGPR:$elem))),
2414 (v8f16 MQPR:$inactive))),
2415 (MVE_VDUP16 rGPR:$elem, ARMVCCThen, (v8i1 VCCR:$pred),
2416 (v8f16 MQPR:$inactive))>;
2420 class MVEIntSingleSrc<string iname, string suffix, bits<2> size,
2421 list<dag> pattern=[]>
2422 : MVE_p<(outs MQPR:$Qd), (ins MQPR:$Qm), NoItinerary,
2423 iname, suffix, "$Qd, $Qm", vpred_r, "", pattern> {
2427 let Inst{22} = Qd{3};
2428 let Inst{19-18} = size{1-0};
2429 let Inst{15-13} = Qd{2-0};
2430 let Inst{5} = Qm{3};
2431 let Inst{3-1} = Qm{2-0};
2434 class MVE_VCLSCLZ<string iname, string suffix, bits<2> size,
2435 bit count_zeroes, list<dag> pattern=[]>
2436 : MVEIntSingleSrc<iname, suffix, size, pattern> {
2439 let Inst{25-23} = 0b111;
2440 let Inst{21-20} = 0b11;
2441 let Inst{17-16} = 0b00;
2442 let Inst{12-8} = 0b00100;
2443 let Inst{7} = count_zeroes;
2447 let validForTailPredication = 1;
2450 multiclass MVE_VCLSCLZ_p<string opname, bit opcode, MVEVectorVTInfo VTI,
2451 SDPatternOperator unpred_op> {
2452 def "": MVE_VCLSCLZ<"v"#opname, VTI.Suffix, VTI.Size, opcode>;
2454 defvar Inst = !cast<Instruction>(NAME);
2455 defvar pred_int = !cast<Intrinsic>("int_arm_mve_"#opname#"_predicated");
2457 let Predicates = [HasMVEInt] in {
2458 def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$val))),
2459 (VTI.Vec (Inst (VTI.Vec MQPR:$val)))>;
2460 def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$val), (VTI.Pred VCCR:$pred),
2461 (VTI.Vec MQPR:$inactive))),
2462 (VTI.Vec (Inst (VTI.Vec MQPR:$val), ARMVCCThen,
2463 (VTI.Pred VCCR:$pred), (VTI.Vec MQPR:$inactive)))>;
2467 defm MVE_VCLSs8 : MVE_VCLSCLZ_p<"cls", 0, MVE_v16s8, int_arm_mve_vcls>;
2468 defm MVE_VCLSs16 : MVE_VCLSCLZ_p<"cls", 0, MVE_v8s16, int_arm_mve_vcls>;
2469 defm MVE_VCLSs32 : MVE_VCLSCLZ_p<"cls", 0, MVE_v4s32, int_arm_mve_vcls>;
2471 defm MVE_VCLZs8 : MVE_VCLSCLZ_p<"clz", 1, MVE_v16i8, ctlz>;
2472 defm MVE_VCLZs16 : MVE_VCLSCLZ_p<"clz", 1, MVE_v8i16, ctlz>;
2473 defm MVE_VCLZs32 : MVE_VCLSCLZ_p<"clz", 1, MVE_v4i32, ctlz>;
2475 class MVE_VABSNEG_int<string iname, string suffix, bits<2> size, bit negate,
2476 bit saturate, list<dag> pattern=[]>
2477 : MVEIntSingleSrc<iname, suffix, size, pattern> {
2480 let Inst{25-23} = 0b111;
2481 let Inst{21-20} = 0b11;
2483 let Inst{16} = !eq(saturate, 0);
2484 let Inst{12-11} = 0b00;
2485 let Inst{10} = saturate;
2486 let Inst{9-8} = 0b11;
2487 let Inst{7} = negate;
2491 let validForTailPredication = 1;
2494 multiclass MVE_VABSNEG_int_m<string iname, bit negate, bit saturate,
2495 SDPatternOperator unpred_op, Intrinsic pred_int,
2496 MVEVectorVTInfo VTI> {
2497 def "" : MVE_VABSNEG_int<iname, VTI.Suffix, VTI.Size, negate, saturate>;
2498 defvar Inst = !cast<Instruction>(NAME);
2500 let Predicates = [HasMVEInt] in {
2501 // VQABS and VQNEG have more difficult isel patterns defined elsewhere
2502 if !not(saturate) then {
2503 def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$v))),
2504 (VTI.Vec (Inst $v))>;
2507 def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$v), (VTI.Pred VCCR:$mask),
2508 (VTI.Vec MQPR:$inactive))),
2509 (VTI.Vec (Inst $v, ARMVCCThen, $mask, $inactive))>;
2513 foreach VTI = [ MVE_v16s8, MVE_v8s16, MVE_v4s32 ] in {
2514 defm "MVE_VABS" # VTI.Suffix : MVE_VABSNEG_int_m<
2515 "vabs", 0, 0, abs, int_arm_mve_abs_predicated, VTI>;
2516 defm "MVE_VQABS" # VTI.Suffix : MVE_VABSNEG_int_m<
2517 "vqabs", 0, 1, ?, int_arm_mve_qabs_predicated, VTI>;
2518 defm "MVE_VNEG" # VTI.Suffix : MVE_VABSNEG_int_m<
2519 "vneg", 1, 0, vnegq, int_arm_mve_neg_predicated, VTI>;
2520 defm "MVE_VQNEG" # VTI.Suffix : MVE_VABSNEG_int_m<
2521 "vqneg", 1, 1, ?, int_arm_mve_qneg_predicated, VTI>;
2524 // int_min/int_max: vector containing INT_MIN/INT_MAX VTI.Size times
2525 // zero_vec: v4i32-initialized zero vector, potentially wrapped in a bitconvert
2526 multiclass vqabsneg_pattern<MVEVectorVTInfo VTI, dag int_min, dag int_max,
2527 dag zero_vec, MVE_VABSNEG_int vqabs_instruction,
2528 MVE_VABSNEG_int vqneg_instruction> {
2529 let Predicates = [HasMVEInt] in {
2530 // The below tree can be replaced by a vqabs instruction, as it represents
2531 // the following vectorized expression (r being the value in $reg):
2532 // r > 0 ? r : (r == INT_MIN ? INT_MAX : -r)
2533 def : Pat<(VTI.Vec (vselect
2534 (VTI.Pred (ARMvcmpz (VTI.Vec MQPR:$reg), ARMCCgt)),
2535 (VTI.Vec MQPR:$reg),
2537 (VTI.Pred (ARMvcmp (VTI.Vec MQPR:$reg), int_min, ARMCCeq)),
2539 (sub (VTI.Vec zero_vec), (VTI.Vec MQPR:$reg)))))),
2540 (VTI.Vec (vqabs_instruction (VTI.Vec MQPR:$reg)))>;
2541 // Similarly, this tree represents vqneg, i.e. the following vectorized expression:
2542 // r == INT_MIN ? INT_MAX : -r
2543 def : Pat<(VTI.Vec (vselect
2544 (VTI.Pred (ARMvcmp (VTI.Vec MQPR:$reg), int_min, ARMCCeq)),
2546 (sub (VTI.Vec zero_vec), (VTI.Vec MQPR:$reg)))),
2547 (VTI.Vec (vqneg_instruction (VTI.Vec MQPR:$reg)))>;
2551 defm MVE_VQABSNEG_Ps8 : vqabsneg_pattern<MVE_v16i8,
2552 (v16i8 (ARMvmovImm (i32 3712))),
2553 (v16i8 (ARMvmovImm (i32 3711))),
2554 (bitconvert (v4i32 (ARMvmovImm (i32 0)))),
2555 MVE_VQABSs8, MVE_VQNEGs8>;
2556 defm MVE_VQABSNEG_Ps16 : vqabsneg_pattern<MVE_v8i16,
2557 (v8i16 (ARMvmovImm (i32 2688))),
2558 (v8i16 (ARMvmvnImm (i32 2688))),
2559 (bitconvert (v4i32 (ARMvmovImm (i32 0)))),
2560 MVE_VQABSs16, MVE_VQNEGs16>;
2561 defm MVE_VQABSNEG_Ps32 : vqabsneg_pattern<MVE_v4i32,
2562 (v4i32 (ARMvmovImm (i32 1664))),
2563 (v4i32 (ARMvmvnImm (i32 1664))),
2564 (ARMvmovImm (i32 0)),
2565 MVE_VQABSs32, MVE_VQNEGs32>;
2567 class MVE_mod_imm<string iname, string suffix, bits<4> cmode, bit op,
2568 dag iops, list<dag> pattern=[]>
2569 : MVE_p<(outs MQPR:$Qd), iops, NoItinerary, iname, suffix, "$Qd, $imm",
2570 vpred_r, "", pattern> {
2574 let Inst{28} = imm{7};
2575 let Inst{25-23} = 0b111;
2576 let Inst{22} = Qd{3};
2577 let Inst{21-19} = 0b000;
2578 let Inst{18-16} = imm{6-4};
2579 let Inst{15-13} = Qd{2-0};
2581 let Inst{11-8} = cmode{3-0};
2582 let Inst{7-6} = 0b01;
2585 let Inst{3-0} = imm{3-0};
2587 let DecoderMethod = "DecodeMVEModImmInstruction";
2588 let validForTailPredication = 1;
2591 let isReMaterializable = 1 in {
2592 let isAsCheapAsAMove = 1 in {
2593 def MVE_VMOVimmi8 : MVE_mod_imm<"vmov", "i8", {1,1,1,0}, 0b0, (ins nImmSplatI8:$imm)>;
2594 def MVE_VMOVimmi16 : MVE_mod_imm<"vmov", "i16", {1,0,?,0}, 0b0, (ins nImmSplatI16:$imm)> {
2595 let Inst{9} = imm{9};
2597 def MVE_VMOVimmi32 : MVE_mod_imm<"vmov", "i32", {?,?,?,?}, 0b0, (ins nImmVMOVI32:$imm)> {
2598 let Inst{11-8} = imm{11-8};
2600 def MVE_VMOVimmi64 : MVE_mod_imm<"vmov", "i64", {1,1,1,0}, 0b1, (ins nImmSplatI64:$imm)>;
2601 def MVE_VMOVimmf32 : MVE_mod_imm<"vmov", "f32", {1,1,1,1}, 0b0, (ins nImmVMOVF32:$imm)>;
2602 } // let isAsCheapAsAMove = 1
2604 def MVE_VMVNimmi16 : MVE_mod_imm<"vmvn", "i16", {1,0,?,0}, 0b1, (ins nImmSplatI16:$imm)> {
2605 let Inst{9} = imm{9};
2607 def MVE_VMVNimmi32 : MVE_mod_imm<"vmvn", "i32", {?,?,?,?}, 0b1, (ins nImmVMOVI32:$imm)> {
2608 let Inst{11-8} = imm{11-8};
2610 } // let isReMaterializable = 1
2612 let Predicates = [HasMVEInt] in {
2613 def : Pat<(v16i8 (ARMvmovImm timm:$simm)),
2614 (v16i8 (MVE_VMOVimmi8 nImmSplatI8:$simm))>;
2615 def : Pat<(v8i16 (ARMvmovImm timm:$simm)),
2616 (v8i16 (MVE_VMOVimmi16 nImmSplatI16:$simm))>;
2617 def : Pat<(v4i32 (ARMvmovImm timm:$simm)),
2618 (v4i32 (MVE_VMOVimmi32 nImmVMOVI32:$simm))>;
2619 def : Pat<(v2i64 (ARMvmovImm timm:$simm)),
2620 (v2i64 (MVE_VMOVimmi64 nImmSplatI64:$simm))>;
2622 def : Pat<(v8i16 (ARMvmvnImm timm:$simm)),
2623 (v8i16 (MVE_VMVNimmi16 nImmSplatI16:$simm))>;
2624 def : Pat<(v4i32 (ARMvmvnImm timm:$simm)),
2625 (v4i32 (MVE_VMVNimmi32 nImmVMOVI32:$simm))>;
2627 def : Pat<(v4f32 (ARMvmovFPImm timm:$simm)),
2628 (v4f32 (MVE_VMOVimmf32 nImmVMOVF32:$simm))>;
2630 def : Pat<(v8i16 (vselect (v8i1 VCCR:$pred), (ARMvmvnImm timm:$simm),
2632 (v8i16 (MVE_VMVNimmi16 nImmSplatI16:$simm,
2633 ARMVCCThen, VCCR:$pred, MQPR:$inactive))>;
2634 def : Pat<(v4i32 (vselect (v4i1 VCCR:$pred), (ARMvmvnImm timm:$simm),
2636 (v4i32 (MVE_VMVNimmi32 nImmSplatI32:$simm,
2637 ARMVCCThen, VCCR:$pred, MQPR:$inactive))>;
2640 class MVE_VMINMAXA<string iname, string suffix, bits<2> size,
2641 bit bit_12, list<dag> pattern=[]>
2642 : MVE_p<(outs MQPR:$Qd), (ins MQPR:$Qd_src, MQPR:$Qm),
2643 NoItinerary, iname, suffix, "$Qd, $Qm", vpred_n, "$Qd = $Qd_src",
2649 let Inst{25-23} = 0b100;
2650 let Inst{22} = Qd{3};
2651 let Inst{21-20} = 0b11;
2652 let Inst{19-18} = size;
2653 let Inst{17-16} = 0b11;
2654 let Inst{15-13} = Qd{2-0};
2655 let Inst{12} = bit_12;
2656 let Inst{11-6} = 0b111010;
2657 let Inst{5} = Qm{3};
2659 let Inst{3-1} = Qm{2-0};
2661 let validForTailPredication = 1;
2664 multiclass MVE_VMINMAXA_m<string iname, MVEVectorVTInfo VTI,
2665 SDNode unpred_op, Intrinsic pred_int, bit bit_12> {
2666 def "" : MVE_VMINMAXA<iname, VTI.Suffix, VTI.Size, bit_12>;
2667 defvar Inst = !cast<Instruction>(NAME);
2669 let Predicates = [HasMVEInt] in {
2670 // Unpredicated v(min|max)a
2671 def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$Qd), (abs (VTI.Vec MQPR:$Qm)))),
2672 (VTI.Vec (Inst (VTI.Vec MQPR:$Qd), (VTI.Vec MQPR:$Qm)))>;
2674 // Predicated v(min|max)a
2675 def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qd), (VTI.Vec MQPR:$Qm),
2676 (VTI.Pred VCCR:$mask))),
2677 (VTI.Vec (Inst (VTI.Vec MQPR:$Qd), (VTI.Vec MQPR:$Qm),
2678 ARMVCCThen, (VTI.Pred VCCR:$mask)))>;
2682 multiclass MVE_VMINA<MVEVectorVTInfo VTI>
2683 : MVE_VMINMAXA_m<"vmina", VTI, umin, int_arm_mve_vmina_predicated, 0b1>;
2685 defm MVE_VMINAs8 : MVE_VMINA<MVE_v16s8>;
2686 defm MVE_VMINAs16 : MVE_VMINA<MVE_v8s16>;
2687 defm MVE_VMINAs32 : MVE_VMINA<MVE_v4s32>;
2689 multiclass MVE_VMAXA<MVEVectorVTInfo VTI>
2690 : MVE_VMINMAXA_m<"vmaxa", VTI, umax, int_arm_mve_vmaxa_predicated, 0b0>;
2692 defm MVE_VMAXAs8 : MVE_VMAXA<MVE_v16s8>;
2693 defm MVE_VMAXAs16 : MVE_VMAXA<MVE_v8s16>;
2694 defm MVE_VMAXAs32 : MVE_VMAXA<MVE_v4s32>;
2696 // end of MVE Integer instructions
2698 // start of mve_imm_shift instructions
2700 def MVE_VSHLC : MVE_p<(outs rGPR:$RdmDest, MQPR:$Qd),
2701 (ins MQPR:$QdSrc, rGPR:$RdmSrc, long_shift:$imm),
2702 NoItinerary, "vshlc", "", "$QdSrc, $RdmSrc, $imm",
2703 vpred_n, "$RdmDest = $RdmSrc,$Qd = $QdSrc"> {
2709 let Inst{25-23} = 0b101;
2710 let Inst{22} = Qd{3};
2712 let Inst{20-16} = imm{4-0};
2713 let Inst{15-13} = Qd{2-0};
2714 let Inst{12-4} = 0b011111100;
2715 let Inst{3-0} = RdmDest{3-0};
2718 class MVE_shift_imm<dag oops, dag iops, string iname, string suffix,
2719 string ops, vpred_ops vpred, string cstr,
2720 list<dag> pattern=[]>
2721 : MVE_p<oops, iops, NoItinerary, iname, suffix, ops, vpred, cstr, pattern> {
2725 let Inst{22} = Qd{3};
2726 let Inst{15-13} = Qd{2-0};
2727 let Inst{5} = Qm{3};
2728 let Inst{3-1} = Qm{2-0};
2731 class MVE_VMOVL<string iname, string suffix, bits<2> sz, bit U, bit top,
2732 list<dag> pattern=[]>
2733 : MVE_shift_imm<(outs MQPR:$Qd), (ins MQPR:$Qm),
2734 iname, suffix, "$Qd, $Qm", vpred_r, "",
2737 let Inst{25-23} = 0b101;
2739 let Inst{20-19} = sz{1-0};
2740 let Inst{18-16} = 0b000;
2742 let Inst{11-6} = 0b111101;
2745 let doubleWidthResult = 1;
2748 multiclass MVE_VMOVL_m<bit top, string chr, MVEVectorVTInfo OutVTI,
2749 MVEVectorVTInfo InVTI> {
2750 def "": MVE_VMOVL<"vmovl" # chr, InVTI.Suffix, OutVTI.Size,
2751 InVTI.Unsigned, top>;
2752 defvar Inst = !cast<Instruction>(NAME);
2754 def : Pat<(OutVTI.Vec (int_arm_mve_vmovl_predicated (InVTI.Vec MQPR:$src),
2755 (i32 InVTI.Unsigned), (i32 top),
2756 (OutVTI.Pred VCCR:$pred),
2757 (OutVTI.Vec MQPR:$inactive))),
2758 (OutVTI.Vec (Inst (InVTI.Vec MQPR:$src), ARMVCCThen,
2759 (OutVTI.Pred VCCR:$pred),
2760 (OutVTI.Vec MQPR:$inactive)))>;
2763 defm MVE_VMOVLs8bh : MVE_VMOVL_m<0, "b", MVE_v8s16, MVE_v16s8>;
2764 defm MVE_VMOVLs8th : MVE_VMOVL_m<1, "t", MVE_v8s16, MVE_v16s8>;
2765 defm MVE_VMOVLu8bh : MVE_VMOVL_m<0, "b", MVE_v8u16, MVE_v16u8>;
2766 defm MVE_VMOVLu8th : MVE_VMOVL_m<1, "t", MVE_v8u16, MVE_v16u8>;
2767 defm MVE_VMOVLs16bh : MVE_VMOVL_m<0, "b", MVE_v4s32, MVE_v8s16>;
2768 defm MVE_VMOVLs16th : MVE_VMOVL_m<1, "t", MVE_v4s32, MVE_v8s16>;
2769 defm MVE_VMOVLu16bh : MVE_VMOVL_m<0, "b", MVE_v4s32, MVE_v8u16>;
2770 defm MVE_VMOVLu16th : MVE_VMOVL_m<1, "t", MVE_v4s32, MVE_v8u16>;
2772 let Predicates = [HasMVEInt] in {
2773 def : Pat<(sext_inreg (v4i32 MQPR:$src), v4i16),
2774 (MVE_VMOVLs16bh MQPR:$src)>;
2775 def : Pat<(sext_inreg (v8i16 MQPR:$src), v8i8),
2776 (MVE_VMOVLs8bh MQPR:$src)>;
2777 def : Pat<(sext_inreg (v4i32 MQPR:$src), v4i8),
2778 (MVE_VMOVLs16bh (MVE_VMOVLs8bh MQPR:$src))>;
2780 def : Pat<(sext_inreg (v8i16 (ARMVectorRegCast (ARMvrev16 (v16i8 MQPR:$src)))), v8i8),
2781 (MVE_VMOVLs8th MQPR:$src)>;
2782 def : Pat<(sext_inreg (v4i32 (ARMVectorRegCast (ARMvrev32 (v8i16 MQPR:$src)))), v4i16),
2783 (MVE_VMOVLs16th MQPR:$src)>;
2785 // zext_inreg 8 -> 16
2786 def : Pat<(ARMvbicImm (v8i16 MQPR:$src), (i32 0xAFF)),
2787 (MVE_VMOVLu8bh MQPR:$src)>;
2788 // zext_inreg 16 -> 32
2789 def : Pat<(and (v4i32 MQPR:$src), (v4i32 (ARMvmovImm (i32 0xCFF)))),
2790 (MVE_VMOVLu16bh MQPR:$src)>;
2791 // Same zext_inreg with vrevs, picking the top half
2792 def : Pat<(ARMvbicImm (v8i16 (ARMVectorRegCast (ARMvrev16 (v16i8 MQPR:$src)))), (i32 0xAFF)),
2793 (MVE_VMOVLu8th MQPR:$src)>;
2794 def : Pat<(and (v4i32 (ARMVectorRegCast (ARMvrev32 (v8i16 MQPR:$src)))),
2795 (v4i32 (ARMvmovImm (i32 0xCFF)))),
2796 (MVE_VMOVLu16th MQPR:$src)>;
2800 class MVE_VSHLL_imm<string iname, string suffix, bit U, bit th,
2801 Operand immtype, list<dag> pattern=[]>
2802 : MVE_shift_imm<(outs MQPR:$Qd), (ins MQPR:$Qm, immtype:$imm),
2803 iname, suffix, "$Qd, $Qm, $imm", vpred_r, "", pattern> {
2805 let Inst{25-23} = 0b101;
2808 let Inst{11-6} = 0b111101;
2812 // For the MVE_VSHLL_patterns multiclass to refer to
2813 Operand immediateType = immtype;
2815 let doubleWidthResult = 1;
2818 // The immediate VSHLL instructions accept shift counts from 1 up to
2819 // the lane width (8 or 16), but the full-width shifts have an
2820 // entirely separate encoding, given below with 'lw' in the name.
2822 class MVE_VSHLL_imm8<string iname, string suffix,
2823 bit U, bit th, list<dag> pattern=[]>
2824 : MVE_VSHLL_imm<iname, suffix, U, th, mve_shift_imm1_7, pattern> {
2826 let Inst{20-19} = 0b01;
2827 let Inst{18-16} = imm;
2830 class MVE_VSHLL_imm16<string iname, string suffix,
2831 bit U, bit th, list<dag> pattern=[]>
2832 : MVE_VSHLL_imm<iname, suffix, U, th, mve_shift_imm1_15, pattern> {
2835 let Inst{19-16} = imm;
2838 def MVE_VSHLL_imms8bh : MVE_VSHLL_imm8 <"vshllb", "s8", 0b0, 0b0>;
2839 def MVE_VSHLL_imms8th : MVE_VSHLL_imm8 <"vshllt", "s8", 0b0, 0b1>;
2840 def MVE_VSHLL_immu8bh : MVE_VSHLL_imm8 <"vshllb", "u8", 0b1, 0b0>;
2841 def MVE_VSHLL_immu8th : MVE_VSHLL_imm8 <"vshllt", "u8", 0b1, 0b1>;
2842 def MVE_VSHLL_imms16bh : MVE_VSHLL_imm16<"vshllb", "s16", 0b0, 0b0>;
2843 def MVE_VSHLL_imms16th : MVE_VSHLL_imm16<"vshllt", "s16", 0b0, 0b1>;
2844 def MVE_VSHLL_immu16bh : MVE_VSHLL_imm16<"vshllb", "u16", 0b1, 0b0>;
2845 def MVE_VSHLL_immu16th : MVE_VSHLL_imm16<"vshllt", "u16", 0b1, 0b1>;
2847 class MVE_VSHLL_by_lane_width<string iname, string suffix, bits<2> size,
2848 bit U, string ops, list<dag> pattern=[]>
2849 : MVE_shift_imm<(outs MQPR:$Qd), (ins MQPR:$Qm),
2850 iname, suffix, ops, vpred_r, "", pattern> {
2852 let Inst{25-23} = 0b100;
2853 let Inst{21-20} = 0b11;
2854 let Inst{19-18} = size{1-0};
2855 let Inst{17-16} = 0b01;
2856 let Inst{11-6} = 0b111000;
2859 let doubleWidthResult = 1;
2862 multiclass MVE_VSHLL_lw<string iname, string suffix, bits<2> sz, bit U,
2863 string ops, list<dag> pattern=[]> {
2864 def bh : MVE_VSHLL_by_lane_width<iname#"b", suffix, sz, U, ops, pattern> {
2867 def th : MVE_VSHLL_by_lane_width<iname#"t", suffix, sz, U, ops, pattern> {
2872 defm MVE_VSHLL_lws8 : MVE_VSHLL_lw<"vshll", "s8", 0b00, 0b0, "$Qd, $Qm, #8">;
2873 defm MVE_VSHLL_lws16 : MVE_VSHLL_lw<"vshll", "s16", 0b01, 0b0, "$Qd, $Qm, #16">;
2874 defm MVE_VSHLL_lwu8 : MVE_VSHLL_lw<"vshll", "u8", 0b00, 0b1, "$Qd, $Qm, #8">;
2875 defm MVE_VSHLL_lwu16 : MVE_VSHLL_lw<"vshll", "u16", 0b01, 0b1, "$Qd, $Qm, #16">;
2877 multiclass MVE_VSHLL_patterns<MVEVectorVTInfo VTI, int top> {
2878 defvar suffix = !strconcat(VTI.Suffix, !if(top, "th", "bh"));
2879 defvar inst_imm = !cast<MVE_VSHLL_imm>("MVE_VSHLL_imm" # suffix);
2880 defvar inst_lw = !cast<MVE_VSHLL_by_lane_width>("MVE_VSHLL_lw" # suffix);
2881 defvar unpred_int = int_arm_mve_vshll_imm;
2882 defvar pred_int = int_arm_mve_vshll_imm_predicated;
2883 defvar imm = inst_imm.immediateType;
2885 def : Pat<(VTI.DblVec (unpred_int (VTI.Vec MQPR:$src), imm:$imm,
2886 (i32 VTI.Unsigned), (i32 top))),
2887 (VTI.DblVec (inst_imm (VTI.Vec MQPR:$src), imm:$imm))>;
2888 def : Pat<(VTI.DblVec (unpred_int (VTI.Vec MQPR:$src), (i32 VTI.LaneBits),
2889 (i32 VTI.Unsigned), (i32 top))),
2890 (VTI.DblVec (inst_lw (VTI.Vec MQPR:$src)))>;
2892 def : Pat<(VTI.DblVec (pred_int (VTI.Vec MQPR:$src), imm:$imm,
2893 (i32 VTI.Unsigned), (i32 top),
2894 (VTI.DblPred VCCR:$mask),
2895 (VTI.DblVec MQPR:$inactive))),
2896 (VTI.DblVec (inst_imm (VTI.Vec MQPR:$src), imm:$imm,
2897 ARMVCCThen, (VTI.DblPred VCCR:$mask),
2898 (VTI.DblVec MQPR:$inactive)))>;
2899 def : Pat<(VTI.DblVec (pred_int (VTI.Vec MQPR:$src), (i32 VTI.LaneBits),
2900 (i32 VTI.Unsigned), (i32 top),
2901 (VTI.DblPred VCCR:$mask),
2902 (VTI.DblVec MQPR:$inactive))),
2903 (VTI.DblVec (inst_lw (VTI.Vec MQPR:$src), ARMVCCThen,
2904 (VTI.DblPred VCCR:$mask),
2905 (VTI.DblVec MQPR:$inactive)))>;
2908 foreach VTI = [MVE_v16s8, MVE_v8s16, MVE_v16u8, MVE_v8u16] in
2909 foreach top = [0, 1] in
2910 defm : MVE_VSHLL_patterns<VTI, top>;
2912 class MVE_shift_imm_partial<Operand imm, string iname, string suffix>
2913 : MVE_shift_imm<(outs MQPR:$Qd), (ins MQPR:$QdSrc, MQPR:$Qm, imm:$imm),
2914 iname, suffix, "$Qd, $Qm, $imm", vpred_n, "$Qd = $QdSrc"> {
2915 Operand immediateType = imm;
2918 class MVE_VxSHRN<string iname, string suffix, bit bit_12, bit bit_28,
2919 Operand imm, list<dag> pattern=[]>
2920 : MVE_shift_imm_partial<imm, iname, suffix> {
2923 let Inst{28} = bit_28;
2924 let Inst{25-23} = 0b101;
2926 let Inst{20-16} = imm{4-0};
2927 let Inst{12} = bit_12;
2928 let Inst{11-6} = 0b111111;
2931 let validForTailPredication = 1;
2932 let retainsPreviousHalfElement = 1;
2935 def MVE_VRSHRNi16bh : MVE_VxSHRN<"vrshrnb", "i16", 0b0, 0b1, shr_imm8> {
2936 let Inst{20-19} = 0b01;
2938 def MVE_VRSHRNi16th : MVE_VxSHRN<"vrshrnt", "i16", 0b1, 0b1, shr_imm8> {
2939 let Inst{20-19} = 0b01;
2941 def MVE_VRSHRNi32bh : MVE_VxSHRN<"vrshrnb", "i32", 0b0, 0b1, shr_imm16> {
2944 def MVE_VRSHRNi32th : MVE_VxSHRN<"vrshrnt", "i32", 0b1, 0b1, shr_imm16> {
2948 def MVE_VSHRNi16bh : MVE_VxSHRN<"vshrnb", "i16", 0b0, 0b0, shr_imm8> {
2949 let Inst{20-19} = 0b01;
2951 def MVE_VSHRNi16th : MVE_VxSHRN<"vshrnt", "i16", 0b1, 0b0, shr_imm8> {
2952 let Inst{20-19} = 0b01;
2954 def MVE_VSHRNi32bh : MVE_VxSHRN<"vshrnb", "i32", 0b0, 0b0, shr_imm16> {
2957 def MVE_VSHRNi32th : MVE_VxSHRN<"vshrnt", "i32", 0b1, 0b0, shr_imm16> {
2961 class MVE_VxQRSHRUN<string iname, string suffix, bit bit_28, bit bit_12,
2962 Operand imm, list<dag> pattern=[]>
2963 : MVE_shift_imm_partial<imm, iname, suffix> {
2966 let Inst{28} = bit_28;
2967 let Inst{25-23} = 0b101;
2969 let Inst{20-16} = imm{4-0};
2970 let Inst{12} = bit_12;
2971 let Inst{11-6} = 0b111111;
2974 let validForTailPredication = 1;
2975 let retainsPreviousHalfElement = 1;
2978 def MVE_VQRSHRUNs16bh : MVE_VxQRSHRUN<
2979 "vqrshrunb", "s16", 0b1, 0b0, shr_imm8> {
2980 let Inst{20-19} = 0b01;
2982 def MVE_VQRSHRUNs16th : MVE_VxQRSHRUN<
2983 "vqrshrunt", "s16", 0b1, 0b1, shr_imm8> {
2984 let Inst{20-19} = 0b01;
2986 def MVE_VQRSHRUNs32bh : MVE_VxQRSHRUN<
2987 "vqrshrunb", "s32", 0b1, 0b0, shr_imm16> {
2990 def MVE_VQRSHRUNs32th : MVE_VxQRSHRUN<
2991 "vqrshrunt", "s32", 0b1, 0b1, shr_imm16> {
2995 def MVE_VQSHRUNs16bh : MVE_VxQRSHRUN<
2996 "vqshrunb", "s16", 0b0, 0b0, shr_imm8> {
2997 let Inst{20-19} = 0b01;
2999 def MVE_VQSHRUNs16th : MVE_VxQRSHRUN<
3000 "vqshrunt", "s16", 0b0, 0b1, shr_imm8> {
3001 let Inst{20-19} = 0b01;
3003 def MVE_VQSHRUNs32bh : MVE_VxQRSHRUN<
3004 "vqshrunb", "s32", 0b0, 0b0, shr_imm16> {
3007 def MVE_VQSHRUNs32th : MVE_VxQRSHRUN<
3008 "vqshrunt", "s32", 0b0, 0b1, shr_imm16> {
3012 class MVE_VxQRSHRN<string iname, string suffix, bit bit_0, bit bit_12,
3013 Operand imm, list<dag> pattern=[]>
3014 : MVE_shift_imm_partial<imm, iname, suffix> {
3017 let Inst{25-23} = 0b101;
3019 let Inst{20-16} = imm{4-0};
3020 let Inst{12} = bit_12;
3021 let Inst{11-6} = 0b111101;
3023 let Inst{0} = bit_0;
3024 let validForTailPredication = 1;
3025 let retainsPreviousHalfElement = 1;
3028 multiclass MVE_VxQRSHRN_types<string iname, bit bit_0, bit bit_12> {
3029 def s16 : MVE_VxQRSHRN<iname, "s16", bit_0, bit_12, shr_imm8> {
3031 let Inst{20-19} = 0b01;
3033 def u16 : MVE_VxQRSHRN<iname, "u16", bit_0, bit_12, shr_imm8> {
3035 let Inst{20-19} = 0b01;
3037 def s32 : MVE_VxQRSHRN<iname, "s32", bit_0, bit_12, shr_imm16> {
3041 def u32 : MVE_VxQRSHRN<iname, "u32", bit_0, bit_12, shr_imm16> {
3047 defm MVE_VQRSHRNbh : MVE_VxQRSHRN_types<"vqrshrnb", 0b1, 0b0>;
3048 defm MVE_VQRSHRNth : MVE_VxQRSHRN_types<"vqrshrnt", 0b1, 0b1>;
3049 defm MVE_VQSHRNbh : MVE_VxQRSHRN_types<"vqshrnb", 0b0, 0b0>;
3050 defm MVE_VQSHRNth : MVE_VxQRSHRN_types<"vqshrnt", 0b0, 0b1>;
3052 multiclass MVE_VSHRN_patterns<MVE_shift_imm_partial inst,
3053 MVEVectorVTInfo OutVTI, MVEVectorVTInfo InVTI,
3054 bit q, bit r, bit top> {
3055 defvar inparams = (? (OutVTI.Vec MQPR:$QdSrc), (InVTI.Vec MQPR:$Qm),
3056 (inst.immediateType:$imm), (i32 q), (i32 r),
3057 (i32 OutVTI.Unsigned), (i32 InVTI.Unsigned), (i32 top));
3058 defvar outparams = (inst (OutVTI.Vec MQPR:$QdSrc), (InVTI.Vec MQPR:$Qm),
3061 def : Pat<(OutVTI.Vec !setdagop(inparams, int_arm_mve_vshrn)),
3062 (OutVTI.Vec outparams)>;
3063 def : Pat<(OutVTI.Vec !con(inparams, (int_arm_mve_vshrn_predicated
3064 (InVTI.Pred VCCR:$pred)))),
3065 (OutVTI.Vec !con(outparams, (? ARMVCCThen, VCCR:$pred)))>;
3068 defm : MVE_VSHRN_patterns<MVE_VSHRNi16bh, MVE_v16s8, MVE_v8s16, 0,0,0>;
3069 defm : MVE_VSHRN_patterns<MVE_VSHRNi16th, MVE_v16s8, MVE_v8s16, 0,0,1>;
3070 defm : MVE_VSHRN_patterns<MVE_VSHRNi32bh, MVE_v8s16, MVE_v4s32, 0,0,0>;
3071 defm : MVE_VSHRN_patterns<MVE_VSHRNi32th, MVE_v8s16, MVE_v4s32, 0,0,1>;
3072 defm : MVE_VSHRN_patterns<MVE_VSHRNi16bh, MVE_v16u8, MVE_v8u16, 0,0,0>;
3073 defm : MVE_VSHRN_patterns<MVE_VSHRNi16th, MVE_v16u8, MVE_v8u16, 0,0,1>;
3074 defm : MVE_VSHRN_patterns<MVE_VSHRNi32bh, MVE_v8u16, MVE_v4u32, 0,0,0>;
3075 defm : MVE_VSHRN_patterns<MVE_VSHRNi32th, MVE_v8u16, MVE_v4u32, 0,0,1>;
3076 defm : MVE_VSHRN_patterns<MVE_VRSHRNi16bh, MVE_v16s8, MVE_v8s16, 0,1,0>;
3077 defm : MVE_VSHRN_patterns<MVE_VRSHRNi16th, MVE_v16s8, MVE_v8s16, 0,1,1>;
3078 defm : MVE_VSHRN_patterns<MVE_VRSHRNi32bh, MVE_v8s16, MVE_v4s32, 0,1,0>;
3079 defm : MVE_VSHRN_patterns<MVE_VRSHRNi32th, MVE_v8s16, MVE_v4s32, 0,1,1>;
3080 defm : MVE_VSHRN_patterns<MVE_VRSHRNi16bh, MVE_v16u8, MVE_v8u16, 0,1,0>;
3081 defm : MVE_VSHRN_patterns<MVE_VRSHRNi16th, MVE_v16u8, MVE_v8u16, 0,1,1>;
3082 defm : MVE_VSHRN_patterns<MVE_VRSHRNi32bh, MVE_v8u16, MVE_v4u32, 0,1,0>;
3083 defm : MVE_VSHRN_patterns<MVE_VRSHRNi32th, MVE_v8u16, MVE_v4u32, 0,1,1>;
3084 defm : MVE_VSHRN_patterns<MVE_VQSHRNbhs16, MVE_v16s8, MVE_v8s16, 1,0,0>;
3085 defm : MVE_VSHRN_patterns<MVE_VQSHRNths16, MVE_v16s8, MVE_v8s16, 1,0,1>;
3086 defm : MVE_VSHRN_patterns<MVE_VQSHRNbhs32, MVE_v8s16, MVE_v4s32, 1,0,0>;
3087 defm : MVE_VSHRN_patterns<MVE_VQSHRNths32, MVE_v8s16, MVE_v4s32, 1,0,1>;
3088 defm : MVE_VSHRN_patterns<MVE_VQSHRNbhu16, MVE_v16u8, MVE_v8u16, 1,0,0>;
3089 defm : MVE_VSHRN_patterns<MVE_VQSHRNthu16, MVE_v16u8, MVE_v8u16, 1,0,1>;
3090 defm : MVE_VSHRN_patterns<MVE_VQSHRNbhu32, MVE_v8u16, MVE_v4u32, 1,0,0>;
3091 defm : MVE_VSHRN_patterns<MVE_VQSHRNthu32, MVE_v8u16, MVE_v4u32, 1,0,1>;
3092 defm : MVE_VSHRN_patterns<MVE_VQRSHRNbhs16, MVE_v16s8, MVE_v8s16, 1,1,0>;
3093 defm : MVE_VSHRN_patterns<MVE_VQRSHRNths16, MVE_v16s8, MVE_v8s16, 1,1,1>;
3094 defm : MVE_VSHRN_patterns<MVE_VQRSHRNbhs32, MVE_v8s16, MVE_v4s32, 1,1,0>;
3095 defm : MVE_VSHRN_patterns<MVE_VQRSHRNths32, MVE_v8s16, MVE_v4s32, 1,1,1>;
3096 defm : MVE_VSHRN_patterns<MVE_VQRSHRNbhu16, MVE_v16u8, MVE_v8u16, 1,1,0>;
3097 defm : MVE_VSHRN_patterns<MVE_VQRSHRNthu16, MVE_v16u8, MVE_v8u16, 1,1,1>;
3098 defm : MVE_VSHRN_patterns<MVE_VQRSHRNbhu32, MVE_v8u16, MVE_v4u32, 1,1,0>;
3099 defm : MVE_VSHRN_patterns<MVE_VQRSHRNthu32, MVE_v8u16, MVE_v4u32, 1,1,1>;
3100 defm : MVE_VSHRN_patterns<MVE_VQSHRUNs16bh, MVE_v16u8, MVE_v8s16, 1,0,0>;
3101 defm : MVE_VSHRN_patterns<MVE_VQSHRUNs16th, MVE_v16u8, MVE_v8s16, 1,0,1>;
3102 defm : MVE_VSHRN_patterns<MVE_VQSHRUNs32bh, MVE_v8u16, MVE_v4s32, 1,0,0>;
3103 defm : MVE_VSHRN_patterns<MVE_VQSHRUNs32th, MVE_v8u16, MVE_v4s32, 1,0,1>;
3104 defm : MVE_VSHRN_patterns<MVE_VQRSHRUNs16bh, MVE_v16u8, MVE_v8s16, 1,1,0>;
3105 defm : MVE_VSHRN_patterns<MVE_VQRSHRUNs16th, MVE_v16u8, MVE_v8s16, 1,1,1>;
3106 defm : MVE_VSHRN_patterns<MVE_VQRSHRUNs32bh, MVE_v8u16, MVE_v4s32, 1,1,0>;
3107 defm : MVE_VSHRN_patterns<MVE_VQRSHRUNs32th, MVE_v8u16, MVE_v4s32, 1,1,1>;
3109 // end of mve_imm_shift instructions
3111 // start of mve_shift instructions
3113 class MVE_shift_by_vec<string iname, string suffix, bit U,
3114 bits<2> size, bit bit_4, bit bit_8>
3115 : MVE_p<(outs MQPR:$Qd), (ins MQPR:$Qm, MQPR:$Qn), NoItinerary,
3116 iname, suffix, "$Qd, $Qm, $Qn", vpred_r, "", []> {
3117 // Shift instructions which take a vector of shift counts
3123 let Inst{25-24} = 0b11;
3125 let Inst{22} = Qd{3};
3126 let Inst{21-20} = size;
3127 let Inst{19-17} = Qn{2-0};
3129 let Inst{15-13} = Qd{2-0};
3130 let Inst{12-9} = 0b0010;
3131 let Inst{8} = bit_8;
3132 let Inst{7} = Qn{3};
3134 let Inst{5} = Qm{3};
3135 let Inst{4} = bit_4;
3136 let Inst{3-1} = Qm{2-0};
3138 let validForTailPredication = 1;
3141 multiclass MVE_shift_by_vec_p<string iname, MVEVectorVTInfo VTI, bit q, bit r> {
3142 def "" : MVE_shift_by_vec<iname, VTI.Suffix, VTI.Unsigned, VTI.Size, q, r>;
3143 defvar Inst = !cast<Instruction>(NAME);
3145 def : Pat<(VTI.Vec (int_arm_mve_vshl_vector
3146 (VTI.Vec MQPR:$in), (VTI.Vec MQPR:$sh),
3147 (i32 q), (i32 r), (i32 VTI.Unsigned))),
3148 (VTI.Vec (Inst (VTI.Vec MQPR:$in), (VTI.Vec MQPR:$sh)))>;
3150 def : Pat<(VTI.Vec (int_arm_mve_vshl_vector_predicated
3151 (VTI.Vec MQPR:$in), (VTI.Vec MQPR:$sh),
3152 (i32 q), (i32 r), (i32 VTI.Unsigned),
3153 (VTI.Pred VCCR:$mask), (VTI.Vec MQPR:$inactive))),
3154 (VTI.Vec (Inst (VTI.Vec MQPR:$in), (VTI.Vec MQPR:$sh),
3155 ARMVCCThen, (VTI.Pred VCCR:$mask),
3156 (VTI.Vec MQPR:$inactive)))>;
3159 multiclass mve_shift_by_vec_multi<string iname, bit bit_4, bit bit_8> {
3160 defm s8 : MVE_shift_by_vec_p<iname, MVE_v16s8, bit_4, bit_8>;
3161 defm s16 : MVE_shift_by_vec_p<iname, MVE_v8s16, bit_4, bit_8>;
3162 defm s32 : MVE_shift_by_vec_p<iname, MVE_v4s32, bit_4, bit_8>;
3163 defm u8 : MVE_shift_by_vec_p<iname, MVE_v16u8, bit_4, bit_8>;
3164 defm u16 : MVE_shift_by_vec_p<iname, MVE_v8u16, bit_4, bit_8>;
3165 defm u32 : MVE_shift_by_vec_p<iname, MVE_v4u32, bit_4, bit_8>;
3168 defm MVE_VSHL_by_vec : mve_shift_by_vec_multi<"vshl", 0b0, 0b0>;
3169 defm MVE_VQSHL_by_vec : mve_shift_by_vec_multi<"vqshl", 0b1, 0b0>;
3170 defm MVE_VQRSHL_by_vec : mve_shift_by_vec_multi<"vqrshl", 0b1, 0b1>;
3171 defm MVE_VRSHL_by_vec : mve_shift_by_vec_multi<"vrshl", 0b0, 0b1>;
3173 let Predicates = [HasMVEInt] in {
3174 def : Pat<(v4i32 (ARMvshlu (v4i32 MQPR:$Qm), (v4i32 MQPR:$Qn))),
3175 (v4i32 (MVE_VSHL_by_vecu32 (v4i32 MQPR:$Qm), (v4i32 MQPR:$Qn)))>;
3176 def : Pat<(v8i16 (ARMvshlu (v8i16 MQPR:$Qm), (v8i16 MQPR:$Qn))),
3177 (v8i16 (MVE_VSHL_by_vecu16 (v8i16 MQPR:$Qm), (v8i16 MQPR:$Qn)))>;
3178 def : Pat<(v16i8 (ARMvshlu (v16i8 MQPR:$Qm), (v16i8 MQPR:$Qn))),
3179 (v16i8 (MVE_VSHL_by_vecu8 (v16i8 MQPR:$Qm), (v16i8 MQPR:$Qn)))>;
3181 def : Pat<(v4i32 (ARMvshls (v4i32 MQPR:$Qm), (v4i32 MQPR:$Qn))),
3182 (v4i32 (MVE_VSHL_by_vecs32 (v4i32 MQPR:$Qm), (v4i32 MQPR:$Qn)))>;
3183 def : Pat<(v8i16 (ARMvshls (v8i16 MQPR:$Qm), (v8i16 MQPR:$Qn))),
3184 (v8i16 (MVE_VSHL_by_vecs16 (v8i16 MQPR:$Qm), (v8i16 MQPR:$Qn)))>;
3185 def : Pat<(v16i8 (ARMvshls (v16i8 MQPR:$Qm), (v16i8 MQPR:$Qn))),
3186 (v16i8 (MVE_VSHL_by_vecs8 (v16i8 MQPR:$Qm), (v16i8 MQPR:$Qn)))>;
3189 class MVE_shift_with_imm<string iname, string suffix, dag oops, dag iops,
3190 string ops, vpred_ops vpred, string cstr,
3191 list<dag> pattern=[]>
3192 : MVE_p<oops, iops, NoItinerary, iname, suffix, ops, vpred, cstr, pattern> {
3197 let Inst{22} = Qd{3};
3198 let Inst{15-13} = Qd{2-0};
3199 let Inst{12-11} = 0b00;
3200 let Inst{7-6} = 0b01;
3201 let Inst{5} = Qm{3};
3203 let Inst{3-1} = Qm{2-0};
3205 let validForTailPredication = 1;
3207 // For the MVE_shift_imm_patterns multiclass to refer to
3208 MVEVectorVTInfo VTI;
3209 Operand immediateType;
3210 Intrinsic unpred_int;
3212 dag unsignedFlag = (?);
3215 class MVE_VSxI_imm<string iname, string suffix, bit bit_8, Operand immType>
3216 : MVE_shift_with_imm<iname, suffix, (outs MQPR:$Qd),
3217 (ins MQPR:$Qd_src, MQPR:$Qm, immType:$imm),
3218 "$Qd, $Qm, $imm", vpred_n, "$Qd = $Qd_src"> {
3221 let Inst{25-24} = 0b11;
3222 let Inst{21-16} = imm;
3223 let Inst{10-9} = 0b10;
3224 let Inst{8} = bit_8;
3225 let validForTailPredication = 1;
3227 Operand immediateType = immType;
3230 def MVE_VSRIimm8 : MVE_VSxI_imm<"vsri", "8", 0b0, shr_imm8> {
3231 let Inst{21-19} = 0b001;
3234 def MVE_VSRIimm16 : MVE_VSxI_imm<"vsri", "16", 0b0, shr_imm16> {
3235 let Inst{21-20} = 0b01;
3238 def MVE_VSRIimm32 : MVE_VSxI_imm<"vsri", "32", 0b0, shr_imm32> {
3242 def MVE_VSLIimm8 : MVE_VSxI_imm<"vsli", "8", 0b1, imm0_7> {
3243 let Inst{21-19} = 0b001;
3246 def MVE_VSLIimm16 : MVE_VSxI_imm<"vsli", "16", 0b1, imm0_15> {
3247 let Inst{21-20} = 0b01;
3250 def MVE_VSLIimm32 : MVE_VSxI_imm<"vsli", "32", 0b1,imm0_31> {
3254 multiclass MVE_VSxI_patterns<MVE_VSxI_imm inst, string name,
3255 MVEVectorVTInfo VTI> {
3256 defvar inparams = (? (VTI.Vec MQPR:$QdSrc), (VTI.Vec MQPR:$Qm),
3257 (inst.immediateType:$imm));
3258 defvar outparams = (inst (VTI.Vec MQPR:$QdSrc), (VTI.Vec MQPR:$Qm),
3259 (inst.immediateType:$imm));
3260 defvar unpred_int = !cast<Intrinsic>("int_arm_mve_" # name);
3261 defvar pred_int = !cast<Intrinsic>("int_arm_mve_" # name # "_predicated");
3263 def : Pat<(VTI.Vec !setdagop(inparams, unpred_int)),
3264 (VTI.Vec outparams)>;
3265 def : Pat<(VTI.Vec !con(inparams, (pred_int (VTI.Pred VCCR:$pred)))),
3266 (VTI.Vec !con(outparams, (? ARMVCCThen, VCCR:$pred)))>;
3269 defm : MVE_VSxI_patterns<MVE_VSLIimm8, "vsli", MVE_v16i8>;
3270 defm : MVE_VSxI_patterns<MVE_VSLIimm16, "vsli", MVE_v8i16>;
3271 defm : MVE_VSxI_patterns<MVE_VSLIimm32, "vsli", MVE_v4i32>;
3272 defm : MVE_VSxI_patterns<MVE_VSRIimm8, "vsri", MVE_v16i8>;
3273 defm : MVE_VSxI_patterns<MVE_VSRIimm16, "vsri", MVE_v8i16>;
3274 defm : MVE_VSxI_patterns<MVE_VSRIimm32, "vsri", MVE_v4i32>;
3276 class MVE_VQSHL_imm<MVEVectorVTInfo VTI_, Operand immType>
3277 : MVE_shift_with_imm<"vqshl", VTI_.Suffix, (outs MQPR:$Qd),
3278 (ins MQPR:$Qm, immType:$imm), "$Qd, $Qm, $imm",
3282 let Inst{28} = VTI_.Unsigned;
3283 let Inst{25-24} = 0b11;
3284 let Inst{21-16} = imm;
3285 let Inst{10-8} = 0b111;
3288 let immediateType = immType;
3289 let unsignedFlag = (? (i32 VTI.Unsigned));
3292 let unpred_int = int_arm_mve_vqshl_imm,
3293 pred_int = int_arm_mve_vqshl_imm_predicated in {
3294 def MVE_VQSHLimms8 : MVE_VQSHL_imm<MVE_v16s8, imm0_7> {
3295 let Inst{21-19} = 0b001;
3297 def MVE_VQSHLimmu8 : MVE_VQSHL_imm<MVE_v16u8, imm0_7> {
3298 let Inst{21-19} = 0b001;
3301 def MVE_VQSHLimms16 : MVE_VQSHL_imm<MVE_v8s16, imm0_15> {
3302 let Inst{21-20} = 0b01;
3304 def MVE_VQSHLimmu16 : MVE_VQSHL_imm<MVE_v8u16, imm0_15> {
3305 let Inst{21-20} = 0b01;
3308 def MVE_VQSHLimms32 : MVE_VQSHL_imm<MVE_v4s32, imm0_31> {
3311 def MVE_VQSHLimmu32 : MVE_VQSHL_imm<MVE_v4u32, imm0_31> {
3316 class MVE_VQSHLU_imm<MVEVectorVTInfo VTI_, Operand immType>
3317 : MVE_shift_with_imm<"vqshlu", VTI_.Suffix, (outs MQPR:$Qd),
3318 (ins MQPR:$Qm, immType:$imm), "$Qd, $Qm, $imm",
3323 let Inst{25-24} = 0b11;
3324 let Inst{21-16} = imm;
3325 let Inst{10-8} = 0b110;
3328 let immediateType = immType;
3331 let unpred_int = int_arm_mve_vqshlu_imm,
3332 pred_int = int_arm_mve_vqshlu_imm_predicated in {
3333 def MVE_VQSHLU_imms8 : MVE_VQSHLU_imm<MVE_v16s8, imm0_7> {
3334 let Inst{21-19} = 0b001;
3337 def MVE_VQSHLU_imms16 : MVE_VQSHLU_imm<MVE_v8s16, imm0_15> {
3338 let Inst{21-20} = 0b01;
3341 def MVE_VQSHLU_imms32 : MVE_VQSHLU_imm<MVE_v4s32, imm0_31> {
3346 class MVE_VRSHR_imm<MVEVectorVTInfo VTI_, Operand immType>
3347 : MVE_shift_with_imm<"vrshr", VTI_.Suffix, (outs MQPR:$Qd),
3348 (ins MQPR:$Qm, immType:$imm), "$Qd, $Qm, $imm",
3352 let Inst{28} = VTI_.Unsigned;
3353 let Inst{25-24} = 0b11;
3354 let Inst{21-16} = imm;
3355 let Inst{10-8} = 0b010;
3358 let immediateType = immType;
3359 let unsignedFlag = (? (i32 VTI.Unsigned));
3362 let unpred_int = int_arm_mve_vrshr_imm,
3363 pred_int = int_arm_mve_vrshr_imm_predicated in {
3364 def MVE_VRSHR_imms8 : MVE_VRSHR_imm<MVE_v16s8, shr_imm8> {
3365 let Inst{21-19} = 0b001;
3368 def MVE_VRSHR_immu8 : MVE_VRSHR_imm<MVE_v16u8, shr_imm8> {
3369 let Inst{21-19} = 0b001;
3372 def MVE_VRSHR_imms16 : MVE_VRSHR_imm<MVE_v8s16, shr_imm16> {
3373 let Inst{21-20} = 0b01;
3376 def MVE_VRSHR_immu16 : MVE_VRSHR_imm<MVE_v8u16, shr_imm16> {
3377 let Inst{21-20} = 0b01;
3380 def MVE_VRSHR_imms32 : MVE_VRSHR_imm<MVE_v4s32, shr_imm32> {
3384 def MVE_VRSHR_immu32 : MVE_VRSHR_imm<MVE_v4u32, shr_imm32> {
3389 multiclass MVE_shift_imm_patterns<MVE_shift_with_imm inst> {
3390 def : Pat<(inst.VTI.Vec !con((inst.unpred_int (inst.VTI.Vec MQPR:$src),
3391 inst.immediateType:$imm),
3392 inst.unsignedFlag)),
3393 (inst.VTI.Vec (inst (inst.VTI.Vec MQPR:$src),
3394 inst.immediateType:$imm))>;
3396 def : Pat<(inst.VTI.Vec !con((inst.pred_int (inst.VTI.Vec MQPR:$src),
3397 inst.immediateType:$imm),
3399 (? (inst.VTI.Pred VCCR:$mask),
3400 (inst.VTI.Vec MQPR:$inactive)))),
3401 (inst.VTI.Vec (inst (inst.VTI.Vec MQPR:$src),
3402 inst.immediateType:$imm,
3403 ARMVCCThen, (inst.VTI.Pred VCCR:$mask),
3404 (inst.VTI.Vec MQPR:$inactive)))>;
3407 defm : MVE_shift_imm_patterns<MVE_VQSHLimms8>;
3408 defm : MVE_shift_imm_patterns<MVE_VQSHLimmu8>;
3409 defm : MVE_shift_imm_patterns<MVE_VQSHLimms16>;
3410 defm : MVE_shift_imm_patterns<MVE_VQSHLimmu16>;
3411 defm : MVE_shift_imm_patterns<MVE_VQSHLimms32>;
3412 defm : MVE_shift_imm_patterns<MVE_VQSHLimmu32>;
3413 defm : MVE_shift_imm_patterns<MVE_VQSHLU_imms8>;
3414 defm : MVE_shift_imm_patterns<MVE_VQSHLU_imms16>;
3415 defm : MVE_shift_imm_patterns<MVE_VQSHLU_imms32>;
3416 defm : MVE_shift_imm_patterns<MVE_VRSHR_imms8>;
3417 defm : MVE_shift_imm_patterns<MVE_VRSHR_immu8>;
3418 defm : MVE_shift_imm_patterns<MVE_VRSHR_imms16>;
3419 defm : MVE_shift_imm_patterns<MVE_VRSHR_immu16>;
3420 defm : MVE_shift_imm_patterns<MVE_VRSHR_imms32>;
3421 defm : MVE_shift_imm_patterns<MVE_VRSHR_immu32>;
3423 class MVE_VSHR_imm<string suffix, dag imm>
3424 : MVE_shift_with_imm<"vshr", suffix, (outs MQPR:$Qd),
3425 !con((ins MQPR:$Qm), imm), "$Qd, $Qm, $imm",
3429 let Inst{25-24} = 0b11;
3430 let Inst{21-16} = imm;
3431 let Inst{10-8} = 0b000;
3434 def MVE_VSHR_imms8 : MVE_VSHR_imm<"s8", (ins shr_imm8:$imm)> {
3436 let Inst{21-19} = 0b001;
3439 def MVE_VSHR_immu8 : MVE_VSHR_imm<"u8", (ins shr_imm8:$imm)> {
3441 let Inst{21-19} = 0b001;
3444 def MVE_VSHR_imms16 : MVE_VSHR_imm<"s16", (ins shr_imm16:$imm)> {
3446 let Inst{21-20} = 0b01;
3449 def MVE_VSHR_immu16 : MVE_VSHR_imm<"u16", (ins shr_imm16:$imm)> {
3451 let Inst{21-20} = 0b01;
3454 def MVE_VSHR_imms32 : MVE_VSHR_imm<"s32", (ins shr_imm32:$imm)> {
3459 def MVE_VSHR_immu32 : MVE_VSHR_imm<"u32", (ins shr_imm32:$imm)> {
3464 class MVE_VSHL_imm<string suffix, dag imm>
3465 : MVE_shift_with_imm<"vshl", suffix, (outs MQPR:$Qd),
3466 !con((ins MQPR:$Qm), imm), "$Qd, $Qm, $imm",
3471 let Inst{25-24} = 0b11;
3472 let Inst{21-16} = imm;
3473 let Inst{10-8} = 0b101;
3476 def MVE_VSHL_immi8 : MVE_VSHL_imm<"i8", (ins imm0_7:$imm)> {
3477 let Inst{21-19} = 0b001;
3480 def MVE_VSHL_immi16 : MVE_VSHL_imm<"i16", (ins imm0_15:$imm)> {
3481 let Inst{21-20} = 0b01;
3484 def MVE_VSHL_immi32 : MVE_VSHL_imm<"i32", (ins imm0_31:$imm)> {
3488 multiclass MVE_immediate_shift_patterns_inner<
3489 MVEVectorVTInfo VTI, Operand imm_operand_type, SDNode unpred_op,
3490 Intrinsic pred_int, Instruction inst, list<int> unsignedFlag = []> {
3492 def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$src), imm_operand_type:$imm)),
3493 (VTI.Vec (inst (VTI.Vec MQPR:$src), imm_operand_type:$imm))>;
3495 def : Pat<(VTI.Vec !con((pred_int (VTI.Vec MQPR:$src), imm_operand_type:$imm),
3496 !dag(pred_int, unsignedFlag, ?),
3497 (pred_int (VTI.Pred VCCR:$mask),
3498 (VTI.Vec MQPR:$inactive)))),
3499 (VTI.Vec (inst (VTI.Vec MQPR:$src), imm_operand_type:$imm,
3500 ARMVCCThen, (VTI.Pred VCCR:$mask),
3501 (VTI.Vec MQPR:$inactive)))>;
3504 multiclass MVE_immediate_shift_patterns<MVEVectorVTInfo VTI,
3505 Operand imm_operand_type> {
3506 defm : MVE_immediate_shift_patterns_inner<VTI, imm_operand_type,
3507 ARMvshlImm, int_arm_mve_shl_imm_predicated,
3508 !cast<Instruction>("MVE_VSHL_immi" # VTI.BitsSuffix)>;
3509 defm : MVE_immediate_shift_patterns_inner<VTI, imm_operand_type,
3510 ARMvshruImm, int_arm_mve_shr_imm_predicated,
3511 !cast<Instruction>("MVE_VSHR_immu" # VTI.BitsSuffix), [1]>;
3512 defm : MVE_immediate_shift_patterns_inner<VTI, imm_operand_type,
3513 ARMvshrsImm, int_arm_mve_shr_imm_predicated,
3514 !cast<Instruction>("MVE_VSHR_imms" # VTI.BitsSuffix), [0]>;
3517 let Predicates = [HasMVEInt] in {
3518 defm : MVE_immediate_shift_patterns<MVE_v16i8, imm0_7>;
3519 defm : MVE_immediate_shift_patterns<MVE_v8i16, imm0_15>;
3520 defm : MVE_immediate_shift_patterns<MVE_v4i32, imm0_31>;
3523 // end of mve_shift instructions
3525 // start of MVE Floating Point instructions
3527 class MVE_float<string iname, string suffix, dag oops, dag iops, string ops,
3528 vpred_ops vpred, string cstr, list<dag> pattern=[]>
3529 : MVE_f<oops, iops, NoItinerary, iname, suffix, ops, vpred, cstr, pattern> {
3534 let Inst{5} = Qm{3};
3535 let Inst{3-1} = Qm{2-0};
3539 class MVE_VRINT<string rmode, bits<3> op, string suffix, bits<2> size,
3540 list<dag> pattern=[]>
3541 : MVE_float<!strconcat("vrint", rmode), suffix, (outs MQPR:$Qd),
3542 (ins MQPR:$Qm), "$Qd, $Qm", vpred_r, "", pattern> {
3546 let Inst{25-23} = 0b111;
3547 let Inst{22} = Qd{3};
3548 let Inst{21-20} = 0b11;
3549 let Inst{19-18} = size;
3550 let Inst{17-16} = 0b10;
3551 let Inst{15-13} = Qd{2-0};
3552 let Inst{11-10} = 0b01;
3553 let Inst{9-7} = op{2-0};
3555 let validForTailPredication = 1;
3559 multiclass MVE_VRINT_m<MVEVectorVTInfo VTI, string suffix, bits<3> opcode,
3560 SDPatternOperator unpred_op> {
3561 def "": MVE_VRINT<suffix, opcode, VTI.Suffix, VTI.Size>;
3562 defvar Inst = !cast<Instruction>(NAME);
3563 defvar pred_int = !cast<Intrinsic>("int_arm_mve_vrint"#suffix#"_predicated");
3565 let Predicates = [HasMVEFloat] in {
3566 def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$val))),
3567 (VTI.Vec (Inst (VTI.Vec MQPR:$val)))>;
3568 def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$val), (VTI.Pred VCCR:$pred),
3569 (VTI.Vec MQPR:$inactive))),
3570 (VTI.Vec (Inst (VTI.Vec MQPR:$val), ARMVCCThen,
3571 (VTI.Pred VCCR:$pred), (VTI.Vec MQPR:$inactive)))>;
3575 multiclass MVE_VRINT_ops<MVEVectorVTInfo VTI> {
3576 defm N : MVE_VRINT_m<VTI, "n", 0b000, int_arm_mve_vrintn>;
3577 defm X : MVE_VRINT_m<VTI, "x", 0b001, frint>;
3578 defm A : MVE_VRINT_m<VTI, "a", 0b010, fround>;
3579 defm Z : MVE_VRINT_m<VTI, "z", 0b011, ftrunc>;
3580 defm M : MVE_VRINT_m<VTI, "m", 0b101, ffloor>;
3581 defm P : MVE_VRINT_m<VTI, "p", 0b111, fceil>;
3584 defm MVE_VRINTf16 : MVE_VRINT_ops<MVE_v8f16>;
3585 defm MVE_VRINTf32 : MVE_VRINT_ops<MVE_v4f32>;
3587 class MVEFloatArithNeon<string iname, string suffix, bit size,
3588 dag oops, dag iops, string ops,
3589 vpred_ops vpred, string cstr, list<dag> pattern=[]>
3590 : MVE_float<iname, suffix, oops, iops, ops, vpred, cstr, pattern> {
3591 let Inst{20} = size;
3595 class MVE_VMUL_fp<string iname, string suffix, bit size, list<dag> pattern=[]>
3596 : MVEFloatArithNeon<iname, suffix, size, (outs MQPR:$Qd),
3597 (ins MQPR:$Qn, MQPR:$Qm), "$Qd, $Qn, $Qm", vpred_r, "",
3603 let Inst{25-23} = 0b110;
3604 let Inst{22} = Qd{3};
3606 let Inst{19-17} = Qn{2-0};
3607 let Inst{15-13} = Qd{2-0};
3608 let Inst{12-8} = 0b01101;
3609 let Inst{7} = Qn{3};
3611 let validForTailPredication = 1;
3614 multiclass MVE_VMULT_fp_m<string iname, bit bit_21, MVEVectorVTInfo VTI,
3615 SDNode Op, Intrinsic PredInt> {
3616 def "" : MVE_VMUL_fp<iname, VTI.Suffix, VTI.Size{0}>;
3617 defvar Inst = !cast<Instruction>(NAME);
3619 let Predicates = [HasMVEFloat] in {
3620 defm : MVE_TwoOpPattern<VTI, Op, PredInt, (? ), !cast<Instruction>(NAME)>;
3624 multiclass MVE_VMUL_fp_m<MVEVectorVTInfo VTI>
3625 : MVE_VMULT_fp_m<"vmul", 0, VTI, fmul, int_arm_mve_mul_predicated>;
3627 defm MVE_VMULf32 : MVE_VMUL_fp_m<MVE_v4f32>;
3628 defm MVE_VMULf16 : MVE_VMUL_fp_m<MVE_v8f16>;
3630 class MVE_VCMLA<string suffix, bit size>
3631 : MVEFloatArithNeon<"vcmla", suffix, size, (outs MQPR:$Qd),
3632 (ins MQPR:$Qd_src, MQPR:$Qn, MQPR:$Qm, complexrotateop:$rot),
3633 "$Qd, $Qn, $Qm, $rot", vpred_n, "$Qd = $Qd_src", []> {
3640 let Inst{24-23} = rot;
3641 let Inst{22} = Qd{3};
3643 let Inst{19-17} = Qn{2-0};
3644 let Inst{15-13} = Qd{2-0};
3645 let Inst{12-8} = 0b01000;
3646 let Inst{7} = Qn{3};
3650 multiclass MVE_VCMLA_m<MVEVectorVTInfo VTI, bit size> {
3651 def "" : MVE_VCMLA<VTI.Suffix, size>;
3652 defvar Inst = !cast<Instruction>(NAME);
3654 let Predicates = [HasMVEFloat] in {
3655 def : Pat<(VTI.Vec (int_arm_mve_vcmlaq
3656 imm:$rot, (VTI.Vec MQPR:$Qd_src),
3657 (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm))),
3658 (VTI.Vec (Inst (VTI.Vec MQPR:$Qd_src),
3659 (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm),
3662 def : Pat<(VTI.Vec (int_arm_mve_vcmlaq_predicated
3663 imm:$rot, (VTI.Vec MQPR:$Qd_src),
3664 (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm),
3665 (VTI.Pred VCCR:$mask))),
3666 (VTI.Vec (Inst (VTI.Vec MQPR:$Qd_src), (VTI.Vec MQPR:$Qn),
3667 (VTI.Vec MQPR:$Qm), imm:$rot,
3668 ARMVCCThen, (VTI.Pred VCCR:$mask)))>;
3673 defm MVE_VCMLAf16 : MVE_VCMLA_m<MVE_v8f16, 0b0>;
3674 defm MVE_VCMLAf32 : MVE_VCMLA_m<MVE_v4f32, 0b1>;
3676 class MVE_VADDSUBFMA_fp<string iname, string suffix, bit size, bit bit_4,
3677 bit bit_8, bit bit_21, dag iops=(ins),
3678 vpred_ops vpred=vpred_r, string cstr="",
3679 list<dag> pattern=[]>
3680 : MVEFloatArithNeon<iname, suffix, size, (outs MQPR:$Qd),
3681 !con(iops, (ins MQPR:$Qn, MQPR:$Qm)), "$Qd, $Qn, $Qm",
3682 vpred, cstr, pattern> {
3687 let Inst{25-23} = 0b110;
3688 let Inst{22} = Qd{3};
3689 let Inst{21} = bit_21;
3690 let Inst{19-17} = Qn{2-0};
3691 let Inst{15-13} = Qd{2-0};
3692 let Inst{11-9} = 0b110;
3693 let Inst{8} = bit_8;
3694 let Inst{7} = Qn{3};
3695 let Inst{4} = bit_4;
3696 let validForTailPredication = 1;
3699 multiclass MVE_VFMA_fp_multi<string iname, bit fms, MVEVectorVTInfo VTI> {
3700 def "" : MVE_VADDSUBFMA_fp<iname, VTI.Suffix, VTI.Size{0}, 0b1, 0b0, fms,
3701 (ins MQPR:$Qd_src), vpred_n, "$Qd = $Qd_src">;
3702 defvar Inst = !cast<Instruction>(NAME);
3703 defvar pred_int = int_arm_mve_fma_predicated;
3704 defvar m1 = (VTI.Vec MQPR:$m1);
3705 defvar m2 = (VTI.Vec MQPR:$m2);
3706 defvar add = (VTI.Vec MQPR:$add);
3707 defvar pred = (VTI.Pred VCCR:$pred);
3709 let Predicates = [HasMVEFloat] in {
3711 def : Pat<(VTI.Vec (fma (fneg m1), m2, add)),
3712 (Inst $add, $m1, $m2)>;
3713 def : Pat<(VTI.Vec (vselect (VTI.Pred VCCR:$pred),
3714 (VTI.Vec (fma (fneg m1), m2, add)),
3716 (Inst $add, $m1, $m2, ARMVCCThen, $pred)>;
3717 def : Pat<(VTI.Vec (pred_int (fneg m1), m2, add, pred)),
3718 (Inst $add, $m1, $m2, ARMVCCThen, $pred)>;
3719 def : Pat<(VTI.Vec (pred_int m1, (fneg m2), add, pred)),
3720 (Inst $add, $m1, $m2, ARMVCCThen, $pred)>;
3722 def : Pat<(VTI.Vec (fma m1, m2, add)),
3723 (Inst $add, $m1, $m2)>;
3724 def : Pat<(VTI.Vec (vselect (VTI.Pred VCCR:$pred),
3725 (VTI.Vec (fma m1, m2, add)),
3727 (Inst $add, $m1, $m2, ARMVCCThen, $pred)>;
3728 def : Pat<(VTI.Vec (pred_int m1, m2, add, pred)),
3729 (Inst $add, $m1, $m2, ARMVCCThen, $pred)>;
3734 defm MVE_VFMAf32 : MVE_VFMA_fp_multi<"vfma", 0, MVE_v4f32>;
3735 defm MVE_VFMAf16 : MVE_VFMA_fp_multi<"vfma", 0, MVE_v8f16>;
3736 defm MVE_VFMSf32 : MVE_VFMA_fp_multi<"vfms", 1, MVE_v4f32>;
3737 defm MVE_VFMSf16 : MVE_VFMA_fp_multi<"vfms", 1, MVE_v8f16>;
3739 multiclass MVE_VADDSUB_fp_m<string iname, bit bit_21, MVEVectorVTInfo VTI,
3740 SDNode Op, Intrinsic PredInt> {
3741 def "" : MVE_VADDSUBFMA_fp<iname, VTI.Suffix, VTI.Size{0}, 0, 1, bit_21> {
3742 let validForTailPredication = 1;
3744 defvar Inst = !cast<Instruction>(NAME);
3746 let Predicates = [HasMVEFloat] in {
3747 defm : MVE_TwoOpPattern<VTI, Op, PredInt, (? ), !cast<Instruction>(NAME)>;
3751 multiclass MVE_VADD_fp_m<MVEVectorVTInfo VTI>
3752 : MVE_VADDSUB_fp_m<"vadd", 0, VTI, fadd, int_arm_mve_add_predicated>;
3753 multiclass MVE_VSUB_fp_m<MVEVectorVTInfo VTI>
3754 : MVE_VADDSUB_fp_m<"vsub", 1, VTI, fsub, int_arm_mve_sub_predicated>;
3756 defm MVE_VADDf32 : MVE_VADD_fp_m<MVE_v4f32>;
3757 defm MVE_VADDf16 : MVE_VADD_fp_m<MVE_v8f16>;
3759 defm MVE_VSUBf32 : MVE_VSUB_fp_m<MVE_v4f32>;
3760 defm MVE_VSUBf16 : MVE_VSUB_fp_m<MVE_v8f16>;
3762 class MVE_VCADD<string suffix, bit size, string cstr="">
3763 : MVEFloatArithNeon<"vcadd", suffix, size, (outs MQPR:$Qd),
3764 (ins MQPR:$Qn, MQPR:$Qm, complexrotateopodd:$rot),
3765 "$Qd, $Qn, $Qm, $rot", vpred_r, cstr, []> {
3774 let Inst{22} = Qd{3};
3776 let Inst{19-17} = Qn{2-0};
3777 let Inst{15-13} = Qd{2-0};
3778 let Inst{12-8} = 0b01000;
3779 let Inst{7} = Qn{3};
3783 multiclass MVE_VCADD_m<MVEVectorVTInfo VTI, bit size, string cstr=""> {
3784 def "" : MVE_VCADD<VTI.Suffix, size, cstr>;
3785 defvar Inst = !cast<Instruction>(NAME);
3787 let Predicates = [HasMVEFloat] in {
3788 def : Pat<(VTI.Vec (int_arm_mve_vcaddq (i32 1),
3789 imm:$rot, (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm))),
3790 (VTI.Vec (Inst (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm),
3793 def : Pat<(VTI.Vec (int_arm_mve_vcaddq_predicated (i32 1),
3794 imm:$rot, (VTI.Vec MQPR:$inactive),
3795 (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm),
3796 (VTI.Pred VCCR:$mask))),
3797 (VTI.Vec (Inst (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm),
3798 imm:$rot, ARMVCCThen, (VTI.Pred VCCR:$mask),
3799 (VTI.Vec MQPR:$inactive)))>;
3804 defm MVE_VCADDf16 : MVE_VCADD_m<MVE_v8f16, 0b0>;
3805 defm MVE_VCADDf32 : MVE_VCADD_m<MVE_v4f32, 0b1, "@earlyclobber $Qd">;
3807 class MVE_VABD_fp<string suffix, bit size>
3808 : MVE_float<"vabd", suffix, (outs MQPR:$Qd), (ins MQPR:$Qn, MQPR:$Qm),
3809 "$Qd, $Qn, $Qm", vpred_r, ""> {
3814 let Inst{25-23} = 0b110;
3815 let Inst{22} = Qd{3};
3817 let Inst{20} = size;
3818 let Inst{19-17} = Qn{2-0};
3820 let Inst{15-13} = Qd{2-0};
3821 let Inst{11-8} = 0b1101;
3822 let Inst{7} = Qn{3};
3824 let validForTailPredication = 1;
3827 multiclass MVE_VABDT_fp_m<MVEVectorVTInfo VTI,
3828 Intrinsic unpred_int, Intrinsic pred_int> {
3829 def "" : MVE_VABD_fp<VTI.Suffix, VTI.Size{0}>;
3830 defvar Inst = !cast<Instruction>(NAME);
3832 let Predicates = [HasMVEFloat] in {
3833 def : Pat<(VTI.Vec (unpred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
3835 (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>;
3836 def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
3837 (i32 0), (VTI.Pred VCCR:$mask),
3838 (VTI.Vec MQPR:$inactive))),
3839 (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
3840 ARMVCCThen, (VTI.Pred VCCR:$mask),
3841 (VTI.Vec MQPR:$inactive)))>;
3845 multiclass MVE_VABD_fp_m<MVEVectorVTInfo VTI>
3846 : MVE_VABDT_fp_m<VTI, int_arm_mve_vabd, int_arm_mve_abd_predicated>;
3848 defm MVE_VABDf32 : MVE_VABD_fp_m<MVE_v4f32>;
3849 defm MVE_VABDf16 : MVE_VABD_fp_m<MVE_v8f16>;
3851 let Predicates = [HasMVEFloat] in {
3852 def : Pat<(v8f16 (fabs (fsub (v8f16 MQPR:$Qm), (v8f16 MQPR:$Qn)))),
3853 (MVE_VABDf16 MQPR:$Qm, MQPR:$Qn)>;
3854 def : Pat<(v4f32 (fabs (fsub (v4f32 MQPR:$Qm), (v4f32 MQPR:$Qn)))),
3855 (MVE_VABDf32 MQPR:$Qm, MQPR:$Qn)>;
3858 class MVE_VCVT_fix<string suffix, bit fsi, bit U, bit op,
3859 Operand imm_operand_type>
3860 : MVE_float<"vcvt", suffix,
3861 (outs MQPR:$Qd), (ins MQPR:$Qm, imm_operand_type:$imm6),
3862 "$Qd, $Qm, $imm6", vpred_r, "", []> {
3867 let Inst{25-23} = 0b111;
3868 let Inst{22} = Qd{3};
3870 let Inst{19-16} = imm6{3-0};
3871 let Inst{15-13} = Qd{2-0};
3872 let Inst{11-10} = 0b11;
3878 let DecoderMethod = "DecodeMVEVCVTt1fp";
3879 let validForTailPredication = 1;
3882 class MVE_VCVT_imm_asmop<int Bits> : AsmOperandClass {
3883 let PredicateMethod = "isImmediate<1," # Bits # ">";
3884 let DiagnosticString =
3885 "MVE fixed-point immediate operand must be between 1 and " # Bits;
3886 let Name = "MVEVcvtImm" # Bits;
3887 let RenderMethod = "addImmOperands";
3889 class MVE_VCVT_imm<int Bits>: Operand<i32> {
3890 let ParserMatchClass = MVE_VCVT_imm_asmop<Bits>;
3891 let EncoderMethod = "getNEONVcvtImm32OpValue";
3892 let DecoderMethod = "DecodeVCVTImmOperand";
3895 class MVE_VCVT_fix_f32<string suffix, bit U, bit op>
3896 : MVE_VCVT_fix<suffix, 0b1, U, op, MVE_VCVT_imm<32>> {
3897 let Inst{20} = imm6{4};
3899 class MVE_VCVT_fix_f16<string suffix, bit U, bit op>
3900 : MVE_VCVT_fix<suffix, 0b0, U, op, MVE_VCVT_imm<16>> {
3904 multiclass MVE_VCVT_fix_patterns<Instruction Inst, bit U, MVEVectorVTInfo DestVTI,
3905 MVEVectorVTInfo SrcVTI> {
3906 let Predicates = [HasMVEFloat] in {
3907 def : Pat<(DestVTI.Vec (int_arm_mve_vcvt_fix
3908 (i32 U), (SrcVTI.Vec MQPR:$Qm), imm:$scale)),
3909 (DestVTI.Vec (Inst (SrcVTI.Vec MQPR:$Qm), imm:$scale))>;
3910 def : Pat<(DestVTI.Vec (int_arm_mve_vcvt_fix_predicated (i32 U),
3911 (DestVTI.Vec MQPR:$inactive),
3912 (SrcVTI.Vec MQPR:$Qm),
3914 (DestVTI.Pred VCCR:$mask))),
3915 (DestVTI.Vec (Inst (SrcVTI.Vec MQPR:$Qm), imm:$scale,
3916 ARMVCCThen, (DestVTI.Pred VCCR:$mask),
3917 (DestVTI.Vec MQPR:$inactive)))>;
3921 multiclass MVE_VCVT_fix_f32_m<bit U, bit op,
3922 MVEVectorVTInfo DestVTI, MVEVectorVTInfo SrcVTI> {
3923 def "" : MVE_VCVT_fix_f32<DestVTI.Suffix#"."#SrcVTI.Suffix, U, op>;
3924 defm : MVE_VCVT_fix_patterns<!cast<Instruction>(NAME), U, DestVTI, SrcVTI>;
3927 multiclass MVE_VCVT_fix_f16_m<bit U, bit op,
3928 MVEVectorVTInfo DestVTI, MVEVectorVTInfo SrcVTI> {
3929 def "" : MVE_VCVT_fix_f16<DestVTI.Suffix#"."#SrcVTI.Suffix, U, op>;
3930 defm : MVE_VCVT_fix_patterns<!cast<Instruction>(NAME), U, DestVTI, SrcVTI>;
3933 defm MVE_VCVTf16s16_fix : MVE_VCVT_fix_f16_m<0b0, 0b0, MVE_v8f16, MVE_v8s16>;
3934 defm MVE_VCVTs16f16_fix : MVE_VCVT_fix_f16_m<0b0, 0b1, MVE_v8s16, MVE_v8f16>;
3935 defm MVE_VCVTf16u16_fix : MVE_VCVT_fix_f16_m<0b1, 0b0, MVE_v8f16, MVE_v8u16>;
3936 defm MVE_VCVTu16f16_fix : MVE_VCVT_fix_f16_m<0b1, 0b1, MVE_v8u16, MVE_v8f16>;
3937 defm MVE_VCVTf32s32_fix : MVE_VCVT_fix_f32_m<0b0, 0b0, MVE_v4f32, MVE_v4s32>;
3938 defm MVE_VCVTs32f32_fix : MVE_VCVT_fix_f32_m<0b0, 0b1, MVE_v4s32, MVE_v4f32>;
3939 defm MVE_VCVTf32u32_fix : MVE_VCVT_fix_f32_m<0b1, 0b0, MVE_v4f32, MVE_v4u32>;
3940 defm MVE_VCVTu32f32_fix : MVE_VCVT_fix_f32_m<0b1, 0b1, MVE_v4u32, MVE_v4f32>;
3942 class MVE_VCVT_fp_int_anpm<string suffix, bits<2> size, bit op, string anpm,
3943 bits<2> rm, list<dag> pattern=[]>
3944 : MVE_float<!strconcat("vcvt", anpm), suffix, (outs MQPR:$Qd),
3945 (ins MQPR:$Qm), "$Qd, $Qm", vpred_r, "", pattern> {
3949 let Inst{25-23} = 0b111;
3950 let Inst{22} = Qd{3};
3951 let Inst{21-20} = 0b11;
3952 let Inst{19-18} = size;
3953 let Inst{17-16} = 0b11;
3954 let Inst{15-13} = Qd{2-0};
3955 let Inst{12-10} = 0b000;
3959 let validForTailPredication = 1;
3962 multiclass MVE_VCVT_fp_int_anpm_inner<MVEVectorVTInfo Int, MVEVectorVTInfo Flt,
3963 string anpm, bits<2> rm> {
3964 def "": MVE_VCVT_fp_int_anpm<Int.Suffix # "." # Flt.Suffix, Int.Size,
3965 Int.Unsigned, anpm, rm>;
3967 defvar Inst = !cast<Instruction>(NAME);
3968 defvar IntrBaseName = "int_arm_mve_vcvt" # anpm;
3969 defvar UnpredIntr = !cast<Intrinsic>(IntrBaseName);
3970 defvar PredIntr = !cast<Intrinsic>(IntrBaseName # "_predicated");
3972 let Predicates = [HasMVEFloat] in {
3973 def : Pat<(Int.Vec (UnpredIntr (i32 Int.Unsigned), (Flt.Vec MQPR:$in))),
3974 (Int.Vec (Inst (Flt.Vec MQPR:$in)))>;
3976 def : Pat<(Int.Vec (PredIntr (i32 Int.Unsigned), (Int.Vec MQPR:$inactive),
3977 (Flt.Vec MQPR:$in), (Flt.Pred VCCR:$pred))),
3978 (Int.Vec (Inst (Flt.Vec MQPR:$in), ARMVCCThen,
3979 (Flt.Pred VCCR:$pred), (Int.Vec MQPR:$inactive)))>;
3983 multiclass MVE_VCVT_fp_int_anpm_outer<MVEVectorVTInfo Int,
3984 MVEVectorVTInfo Flt> {
3985 defm a : MVE_VCVT_fp_int_anpm_inner<Int, Flt, "a", 0b00>;
3986 defm n : MVE_VCVT_fp_int_anpm_inner<Int, Flt, "n", 0b01>;
3987 defm p : MVE_VCVT_fp_int_anpm_inner<Int, Flt, "p", 0b10>;
3988 defm m : MVE_VCVT_fp_int_anpm_inner<Int, Flt, "m", 0b11>;
3991 // This defines instructions such as MVE_VCVTu16f16a, with an explicit
3992 // rounding-mode suffix on the mnemonic. The class below will define
3993 // the bare MVE_VCVTu16f16 (with implied rounding toward zero).
3994 defm MVE_VCVTs16f16 : MVE_VCVT_fp_int_anpm_outer<MVE_v8s16, MVE_v8f16>;
3995 defm MVE_VCVTu16f16 : MVE_VCVT_fp_int_anpm_outer<MVE_v8u16, MVE_v8f16>;
3996 defm MVE_VCVTs32f32 : MVE_VCVT_fp_int_anpm_outer<MVE_v4s32, MVE_v4f32>;
3997 defm MVE_VCVTu32f32 : MVE_VCVT_fp_int_anpm_outer<MVE_v4u32, MVE_v4f32>;
3999 class MVE_VCVT_fp_int<string suffix, bits<2> size, bit toint, bit unsigned,
4000 list<dag> pattern=[]>
4001 : MVE_float<"vcvt", suffix, (outs MQPR:$Qd),
4002 (ins MQPR:$Qm), "$Qd, $Qm", vpred_r, "", pattern> {
4006 let Inst{25-23} = 0b111;
4007 let Inst{22} = Qd{3};
4008 let Inst{21-20} = 0b11;
4009 let Inst{19-18} = size;
4010 let Inst{17-16} = 0b11;
4011 let Inst{15-13} = Qd{2-0};
4012 let Inst{12-9} = 0b0011;
4013 let Inst{8} = toint;
4014 let Inst{7} = unsigned;
4016 let validForTailPredication = 1;
4019 multiclass MVE_VCVT_fp_int_m<MVEVectorVTInfo Dest, MVEVectorVTInfo Src,
4021 defvar Unsigned = !or(!eq(Dest.SuffixLetter,"u"), !eq(Src.SuffixLetter,"u"));
4022 defvar ToInt = !eq(Src.SuffixLetter,"f");
4024 def "" : MVE_VCVT_fp_int<Dest.Suffix # "." # Src.Suffix, Dest.Size,
4026 defvar Inst = !cast<Instruction>(NAME);
4028 let Predicates = [HasMVEFloat] in {
4029 def : Pat<(Dest.Vec (unpred_op (Src.Vec MQPR:$src))),
4030 (Dest.Vec (Inst (Src.Vec MQPR:$src)))>;
4031 def : Pat<(Dest.Vec (int_arm_mve_vcvt_fp_int_predicated
4032 (Src.Vec MQPR:$src), (i32 Unsigned),
4033 (Src.Pred VCCR:$mask), (Dest.Vec MQPR:$inactive))),
4034 (Dest.Vec (Inst (Src.Vec MQPR:$src), ARMVCCThen,
4035 (Src.Pred VCCR:$mask),
4036 (Dest.Vec MQPR:$inactive)))>;
4039 // The unsuffixed VCVT for float->int implicitly rounds toward zero,
4040 // which I reflect here in the llvm instruction names
4041 defm MVE_VCVTs16f16z : MVE_VCVT_fp_int_m<MVE_v8s16, MVE_v8f16, fp_to_sint>;
4042 defm MVE_VCVTu16f16z : MVE_VCVT_fp_int_m<MVE_v8u16, MVE_v8f16, fp_to_uint>;
4043 defm MVE_VCVTs32f32z : MVE_VCVT_fp_int_m<MVE_v4s32, MVE_v4f32, fp_to_sint>;
4044 defm MVE_VCVTu32f32z : MVE_VCVT_fp_int_m<MVE_v4u32, MVE_v4f32, fp_to_uint>;
4045 // Whereas VCVT for int->float rounds to nearest
4046 defm MVE_VCVTf16s16n : MVE_VCVT_fp_int_m<MVE_v8f16, MVE_v8s16, sint_to_fp>;
4047 defm MVE_VCVTf16u16n : MVE_VCVT_fp_int_m<MVE_v8f16, MVE_v8u16, uint_to_fp>;
4048 defm MVE_VCVTf32s32n : MVE_VCVT_fp_int_m<MVE_v4f32, MVE_v4s32, sint_to_fp>;
4049 defm MVE_VCVTf32u32n : MVE_VCVT_fp_int_m<MVE_v4f32, MVE_v4u32, uint_to_fp>;
4051 class MVE_VABSNEG_fp<string iname, string suffix, bits<2> size, bit negate,
4052 list<dag> pattern=[]>
4053 : MVE_float<iname, suffix, (outs MQPR:$Qd),
4054 (ins MQPR:$Qm), "$Qd, $Qm", vpred_r, "", pattern> {
4058 let Inst{25-23} = 0b111;
4059 let Inst{22} = Qd{3};
4060 let Inst{21-20} = 0b11;
4061 let Inst{19-18} = size;
4062 let Inst{17-16} = 0b01;
4063 let Inst{15-13} = Qd{2-0};
4064 let Inst{11-8} = 0b0111;
4065 let Inst{7} = negate;
4067 let validForTailPredication = 1;
4070 multiclass MVE_VABSNEG_fp_m<string iname, SDNode unpred_op, Intrinsic pred_int,
4071 MVEVectorVTInfo VTI, bit opcode> {
4072 def "" : MVE_VABSNEG_fp<iname, VTI.Suffix, VTI.Size, opcode>;
4073 defvar Inst = !cast<Instruction>(NAME);
4075 let Predicates = [HasMVEInt] in {
4076 def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$v))),
4077 (VTI.Vec (Inst $v))>;
4078 def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$v), (VTI.Pred VCCR:$mask),
4079 (VTI.Vec MQPR:$inactive))),
4080 (VTI.Vec (Inst $v, ARMVCCThen, $mask, $inactive))>;
4084 defm MVE_VABSf16 : MVE_VABSNEG_fp_m<"vabs", fabs, int_arm_mve_abs_predicated,
4086 defm MVE_VABSf32 : MVE_VABSNEG_fp_m<"vabs", fabs, int_arm_mve_abs_predicated,
4088 defm MVE_VNEGf16 : MVE_VABSNEG_fp_m<"vneg", fneg, int_arm_mve_neg_predicated,
4090 defm MVE_VNEGf32 : MVE_VABSNEG_fp_m<"vneg", fneg, int_arm_mve_neg_predicated,
4093 class MVE_VMAXMINNMA<string iname, string suffix, bit size, bit bit_12,
4094 list<dag> pattern=[]>
4095 : MVE_f<(outs MQPR:$Qd), (ins MQPR:$Qd_src, MQPR:$Qm),
4096 NoItinerary, iname, suffix, "$Qd, $Qm", vpred_n, "$Qd = $Qd_src",
4101 let Inst{28} = size;
4102 let Inst{25-23} = 0b100;
4103 let Inst{22} = Qd{3};
4104 let Inst{21-16} = 0b111111;
4105 let Inst{15-13} = Qd{2-0};
4106 let Inst{12} = bit_12;
4107 let Inst{11-6} = 0b111010;
4108 let Inst{5} = Qm{3};
4110 let Inst{3-1} = Qm{2-0};
4113 let isCommutable = 1;
4116 multiclass MVE_VMAXMINNMA_m<string iname, MVEVectorVTInfo VTI,
4117 SDNode unpred_op, Intrinsic pred_int,
4119 def "" : MVE_VMAXMINNMA<iname, VTI.Suffix, VTI.Size{0}, bit_12>;
4120 defvar Inst = !cast<Instruction>(NAME);
4122 let Predicates = [HasMVEInt] in {
4123 // Unpredicated v(max|min)nma
4124 def : Pat<(VTI.Vec (unpred_op (fabs (VTI.Vec MQPR:$Qd)),
4125 (fabs (VTI.Vec MQPR:$Qm)))),
4126 (VTI.Vec (Inst (VTI.Vec MQPR:$Qd), (VTI.Vec MQPR:$Qm)))>;
4128 // Predicated v(max|min)nma
4129 def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qd), (VTI.Vec MQPR:$Qm),
4130 (VTI.Pred VCCR:$mask))),
4131 (VTI.Vec (Inst (VTI.Vec MQPR:$Qd), (VTI.Vec MQPR:$Qm),
4132 ARMVCCThen, (VTI.Pred VCCR:$mask)))>;
4136 multiclass MVE_VMAXNMA<MVEVectorVTInfo VTI, bit bit_12>
4137 : MVE_VMAXMINNMA_m<"vmaxnma", VTI, fmaxnum, int_arm_mve_vmaxnma_predicated, bit_12>;
4139 defm MVE_VMAXNMAf32 : MVE_VMAXNMA<MVE_v4f32, 0b0>;
4140 defm MVE_VMAXNMAf16 : MVE_VMAXNMA<MVE_v8f16, 0b0>;
4142 multiclass MVE_VMINNMA<MVEVectorVTInfo VTI, bit bit_12>
4143 : MVE_VMAXMINNMA_m<"vminnma", VTI, fminnum, int_arm_mve_vminnma_predicated, bit_12>;
4145 defm MVE_VMINNMAf32 : MVE_VMINNMA<MVE_v4f32, 0b1>;
4146 defm MVE_VMINNMAf16 : MVE_VMINNMA<MVE_v8f16, 0b1>;
4148 // end of MVE Floating Point instructions
4150 // start of MVE compares
4152 class MVE_VCMPqq<string suffix, bit bit_28, bits<2> bits_21_20,
4153 VCMPPredicateOperand predtype, list<dag> pattern=[]>
4154 : MVE_p<(outs VCCR:$P0), (ins MQPR:$Qn, MQPR:$Qm, predtype:$fc),
4155 NoItinerary, "vcmp", suffix, "$fc, $Qn, $Qm", vpred_n, "", pattern> {
4156 // Base class for comparing two vector registers
4161 let Inst{28} = bit_28;
4162 let Inst{25-22} = 0b1000;
4163 let Inst{21-20} = bits_21_20;
4164 let Inst{19-17} = Qn{2-0};
4165 let Inst{16-13} = 0b1000;
4166 let Inst{12} = fc{2};
4167 let Inst{11-8} = 0b1111;
4168 let Inst{7} = fc{0};
4170 let Inst{5} = Qm{3};
4172 let Inst{3-1} = Qm{2-0};
4173 let Inst{0} = fc{1};
4175 let Constraints = "";
4177 // We need a custom decoder method for these instructions because of
4178 // the output VCCR operand, which isn't encoded in the instruction
4179 // bits anywhere (there is only one choice for it) but has to be
4180 // included in the MC operands so that codegen will be able to track
4181 // its data flow between instructions, spill/reload it when
4182 // necessary, etc. There seems to be no way to get the Tablegen
4183 // decoder to emit an operand that isn't affected by any instruction
4185 let DecoderMethod = "DecodeMVEVCMP<false," # predtype.DecoderMethod # ">";
4186 let validForTailPredication = 1;
4189 class MVE_VCMPqqf<string suffix, bit size>
4190 : MVE_VCMPqq<suffix, size, 0b11, pred_basic_fp> {
4191 let Predicates = [HasMVEFloat];
4194 class MVE_VCMPqqi<string suffix, bits<2> size>
4195 : MVE_VCMPqq<suffix, 0b1, size, pred_basic_i> {
4200 class MVE_VCMPqqu<string suffix, bits<2> size>
4201 : MVE_VCMPqq<suffix, 0b1, size, pred_basic_u> {
4206 class MVE_VCMPqqs<string suffix, bits<2> size>
4207 : MVE_VCMPqq<suffix, 0b1, size, pred_basic_s> {
4211 def MVE_VCMPf32 : MVE_VCMPqqf<"f32", 0b0>;
4212 def MVE_VCMPf16 : MVE_VCMPqqf<"f16", 0b1>;
4214 def MVE_VCMPi8 : MVE_VCMPqqi<"i8", 0b00>;
4215 def MVE_VCMPi16 : MVE_VCMPqqi<"i16", 0b01>;
4216 def MVE_VCMPi32 : MVE_VCMPqqi<"i32", 0b10>;
4218 def MVE_VCMPu8 : MVE_VCMPqqu<"u8", 0b00>;
4219 def MVE_VCMPu16 : MVE_VCMPqqu<"u16", 0b01>;
4220 def MVE_VCMPu32 : MVE_VCMPqqu<"u32", 0b10>;
4222 def MVE_VCMPs8 : MVE_VCMPqqs<"s8", 0b00>;
4223 def MVE_VCMPs16 : MVE_VCMPqqs<"s16", 0b01>;
4224 def MVE_VCMPs32 : MVE_VCMPqqs<"s32", 0b10>;
4226 class MVE_VCMPqr<string suffix, bit bit_28, bits<2> bits_21_20,
4227 VCMPPredicateOperand predtype, list<dag> pattern=[]>
4228 : MVE_p<(outs VCCR:$P0), (ins MQPR:$Qn, GPRwithZR:$Rm, predtype:$fc),
4229 NoItinerary, "vcmp", suffix, "$fc, $Qn, $Rm", vpred_n, "", pattern> {
4230 // Base class for comparing a vector register with a scalar
4235 let Inst{28} = bit_28;
4236 let Inst{25-22} = 0b1000;
4237 let Inst{21-20} = bits_21_20;
4238 let Inst{19-17} = Qn{2-0};
4239 let Inst{16-13} = 0b1000;
4240 let Inst{12} = fc{2};
4241 let Inst{11-8} = 0b1111;
4242 let Inst{7} = fc{0};
4244 let Inst{5} = fc{1};
4246 let Inst{3-0} = Rm{3-0};
4248 let Constraints = "";
4249 // Custom decoder method, for the same reason as MVE_VCMPqq
4250 let DecoderMethod = "DecodeMVEVCMP<true," # predtype.DecoderMethod # ">";
4251 let validForTailPredication = 1;
4254 class MVE_VCMPqrf<string suffix, bit size>
4255 : MVE_VCMPqr<suffix, size, 0b11, pred_basic_fp> {
4256 let Predicates = [HasMVEFloat];
4259 class MVE_VCMPqri<string suffix, bits<2> size>
4260 : MVE_VCMPqr<suffix, 0b1, size, pred_basic_i> {
4265 class MVE_VCMPqru<string suffix, bits<2> size>
4266 : MVE_VCMPqr<suffix, 0b1, size, pred_basic_u> {
4271 class MVE_VCMPqrs<string suffix, bits<2> size>
4272 : MVE_VCMPqr<suffix, 0b1, size, pred_basic_s> {
4276 def MVE_VCMPf32r : MVE_VCMPqrf<"f32", 0b0>;
4277 def MVE_VCMPf16r : MVE_VCMPqrf<"f16", 0b1>;
4279 def MVE_VCMPi8r : MVE_VCMPqri<"i8", 0b00>;
4280 def MVE_VCMPi16r : MVE_VCMPqri<"i16", 0b01>;
4281 def MVE_VCMPi32r : MVE_VCMPqri<"i32", 0b10>;
4283 def MVE_VCMPu8r : MVE_VCMPqru<"u8", 0b00>;
4284 def MVE_VCMPu16r : MVE_VCMPqru<"u16", 0b01>;
4285 def MVE_VCMPu32r : MVE_VCMPqru<"u32", 0b10>;
4287 def MVE_VCMPs8r : MVE_VCMPqrs<"s8", 0b00>;
4288 def MVE_VCMPs16r : MVE_VCMPqrs<"s16", 0b01>;
4289 def MVE_VCMPs32r : MVE_VCMPqrs<"s32", 0b10>;
4291 multiclass unpred_vcmp_z<string suffix, PatLeaf fc> {
4292 def i8 : Pat<(v16i1 (ARMvcmpz (v16i8 MQPR:$v1), fc)),
4293 (v16i1 (!cast<Instruction>("MVE_VCMP"#suffix#"8r") (v16i8 MQPR:$v1), ZR, fc))>;
4294 def i16 : Pat<(v8i1 (ARMvcmpz (v8i16 MQPR:$v1), fc)),
4295 (v8i1 (!cast<Instruction>("MVE_VCMP"#suffix#"16r") (v8i16 MQPR:$v1), ZR, fc))>;
4296 def i32 : Pat<(v4i1 (ARMvcmpz (v4i32 MQPR:$v1), fc)),
4297 (v4i1 (!cast<Instruction>("MVE_VCMP"#suffix#"32r") (v4i32 MQPR:$v1), ZR, fc))>;
4299 def : Pat<(v16i1 (and (v16i1 VCCR:$p1), (v16i1 (ARMvcmpz (v16i8 MQPR:$v1), fc)))),
4300 (v16i1 (!cast<Instruction>("MVE_VCMP"#suffix#"8r") (v16i8 MQPR:$v1), ZR, fc, ARMVCCThen, VCCR:$p1))>;
4301 def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmpz (v8i16 MQPR:$v1), fc)))),
4302 (v8i1 (!cast<Instruction>("MVE_VCMP"#suffix#"16r") (v8i16 MQPR:$v1), ZR, fc, ARMVCCThen, VCCR:$p1))>;
4303 def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmpz (v4i32 MQPR:$v1), fc)))),
4304 (v4i1 (!cast<Instruction>("MVE_VCMP"#suffix#"32r") (v4i32 MQPR:$v1), ZR, fc, ARMVCCThen, VCCR:$p1))>;
4307 multiclass unpred_vcmp_r<string suffix, PatLeaf fc> {
4308 def i8 : Pat<(v16i1 (ARMvcmp (v16i8 MQPR:$v1), (v16i8 MQPR:$v2), fc)),
4309 (v16i1 (!cast<Instruction>("MVE_VCMP"#suffix#"8") (v16i8 MQPR:$v1), (v16i8 MQPR:$v2), fc))>;
4310 def i16 : Pat<(v8i1 (ARMvcmp (v8i16 MQPR:$v1), (v8i16 MQPR:$v2), fc)),
4311 (v8i1 (!cast<Instruction>("MVE_VCMP"#suffix#"16") (v8i16 MQPR:$v1), (v8i16 MQPR:$v2), fc))>;
4312 def i32 : Pat<(v4i1 (ARMvcmp (v4i32 MQPR:$v1), (v4i32 MQPR:$v2), fc)),
4313 (v4i1 (!cast<Instruction>("MVE_VCMP"#suffix#"32") (v4i32 MQPR:$v1), (v4i32 MQPR:$v2), fc))>;
4315 def i8r : Pat<(v16i1 (ARMvcmp (v16i8 MQPR:$v1), (v16i8 (ARMvdup rGPR:$v2)), fc)),
4316 (v16i1 (!cast<Instruction>("MVE_VCMP"#suffix#"8r") (v16i8 MQPR:$v1), (i32 rGPR:$v2), fc))>;
4317 def i16r : Pat<(v8i1 (ARMvcmp (v8i16 MQPR:$v1), (v8i16 (ARMvdup rGPR:$v2)), fc)),
4318 (v8i1 (!cast<Instruction>("MVE_VCMP"#suffix#"16r") (v8i16 MQPR:$v1), (i32 rGPR:$v2), fc))>;
4319 def i32r : Pat<(v4i1 (ARMvcmp (v4i32 MQPR:$v1), (v4i32 (ARMvdup rGPR:$v2)), fc)),
4320 (v4i1 (!cast<Instruction>("MVE_VCMP"#suffix#"32r") (v4i32 MQPR:$v1), (i32 rGPR:$v2), fc))>;
4322 def : Pat<(v16i1 (and (v16i1 VCCR:$p1), (v16i1 (ARMvcmp (v16i8 MQPR:$v1), (v16i8 MQPR:$v2), fc)))),
4323 (v16i1 (!cast<Instruction>("MVE_VCMP"#suffix#"8") (v16i8 MQPR:$v1), (v16i8 MQPR:$v2), fc, ARMVCCThen, VCCR:$p1))>;
4324 def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmp (v8i16 MQPR:$v1), (v8i16 MQPR:$v2), fc)))),
4325 (v8i1 (!cast<Instruction>("MVE_VCMP"#suffix#"16") (v8i16 MQPR:$v1), (v8i16 MQPR:$v2), fc, ARMVCCThen, VCCR:$p1))>;
4326 def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmp (v4i32 MQPR:$v1), (v4i32 MQPR:$v2), fc)))),
4327 (v4i1 (!cast<Instruction>("MVE_VCMP"#suffix#"32") (v4i32 MQPR:$v1), (v4i32 MQPR:$v2), fc, ARMVCCThen, VCCR:$p1))>;
4329 def : Pat<(v16i1 (and (v16i1 VCCR:$p1), (v16i1 (ARMvcmp (v16i8 MQPR:$v1), (v16i8 (ARMvdup rGPR:$v2)), fc)))),
4330 (v16i1 (!cast<Instruction>("MVE_VCMP"#suffix#"8r") (v16i8 MQPR:$v1), (i32 rGPR:$v2), fc, ARMVCCThen, VCCR:$p1))>;
4331 def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmp (v8i16 MQPR:$v1), (v8i16 (ARMvdup rGPR:$v2)), fc)))),
4332 (v8i1 (!cast<Instruction>("MVE_VCMP"#suffix#"16r") (v8i16 MQPR:$v1), (i32 rGPR:$v2), fc, ARMVCCThen, VCCR:$p1))>;
4333 def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmp (v4i32 MQPR:$v1), (v4i32 (ARMvdup rGPR:$v2)), fc)))),
4334 (v4i1 (!cast<Instruction>("MVE_VCMP"#suffix#"32r") (v4i32 MQPR:$v1), (i32 rGPR:$v2), fc, ARMVCCThen, VCCR:$p1))>;
4337 multiclass unpred_vcmpf_z<PatLeaf fc> {
4338 def f16 : Pat<(v8i1 (ARMvcmpz (v8f16 MQPR:$v1), fc)),
4339 (v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v1), ZR, fc))>;
4340 def f32 : Pat<(v4i1 (ARMvcmpz (v4f32 MQPR:$v1), fc)),
4341 (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), ZR, fc))>;
4343 def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmpz (v8f16 MQPR:$v1), fc)))),
4344 (v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v1), ZR, fc, ARMVCCThen, VCCR:$p1))>;
4345 def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmpz (v4f32 MQPR:$v1), fc)))),
4346 (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), ZR, fc, ARMVCCThen, VCCR:$p1))>;
4349 multiclass unpred_vcmpf_r<PatLeaf fc> {
4350 def : Pat<(v8i1 (ARMvcmp (v8f16 MQPR:$v1), (v8f16 MQPR:$v2), fc)),
4351 (v8i1 (MVE_VCMPf16 (v8f16 MQPR:$v1), (v8f16 MQPR:$v2), fc))>;
4352 def : Pat<(v4i1 (ARMvcmp (v4f32 MQPR:$v1), (v4f32 MQPR:$v2), fc)),
4353 (v4i1 (MVE_VCMPf32 (v4f32 MQPR:$v1), (v4f32 MQPR:$v2), fc))>;
4355 def : Pat<(v8i1 (ARMvcmp (v8f16 MQPR:$v1), (v8f16 (ARMvdup rGPR:$v2)), fc)),
4356 (v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v1), (i32 rGPR:$v2), fc))>;
4357 def : Pat<(v4i1 (ARMvcmp (v4f32 MQPR:$v1), (v4f32 (ARMvdup rGPR:$v2)), fc)),
4358 (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), (i32 rGPR:$v2), fc))>;
4360 def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmp (v8f16 MQPR:$v1), (v8f16 MQPR:$v2), fc)))),
4361 (v8i1 (MVE_VCMPf16 (v8f16 MQPR:$v1), (v8f16 MQPR:$v2), fc, ARMVCCThen, VCCR:$p1))>;
4362 def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmp (v4f32 MQPR:$v1), (v4f32 MQPR:$v2), fc)))),
4363 (v4i1 (MVE_VCMPf32 (v4f32 MQPR:$v1), (v4f32 MQPR:$v2), fc, ARMVCCThen, VCCR:$p1))>;
4365 def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmp (v8f16 MQPR:$v1), (v8f16 (ARMvdup rGPR:$v2)), fc)))),
4366 (v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v1), (i32 rGPR:$v2), fc, ARMVCCThen, VCCR:$p1))>;
4367 def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmp (v4f32 MQPR:$v1), (v4f32 (ARMvdup rGPR:$v2)), fc)))),
4368 (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), (i32 rGPR:$v2), fc, ARMVCCThen, VCCR:$p1))>;
4371 let Predicates = [HasMVEInt] in {
4372 defm MVE_VCEQZ : unpred_vcmp_z<"i", ARMCCeq>;
4373 defm MVE_VCNEZ : unpred_vcmp_z<"i", ARMCCne>;
4374 defm MVE_VCGEZ : unpred_vcmp_z<"s", ARMCCge>;
4375 defm MVE_VCLTZ : unpred_vcmp_z<"s", ARMCClt>;
4376 defm MVE_VCGTZ : unpred_vcmp_z<"s", ARMCCgt>;
4377 defm MVE_VCLEZ : unpred_vcmp_z<"s", ARMCCle>;
4378 defm MVE_VCGTUZ : unpred_vcmp_z<"u", ARMCChi>;
4379 defm MVE_VCGEUZ : unpred_vcmp_z<"u", ARMCChs>;
4381 defm MVE_VCEQ : unpred_vcmp_r<"i", ARMCCeq>;
4382 defm MVE_VCNE : unpred_vcmp_r<"i", ARMCCne>;
4383 defm MVE_VCGE : unpred_vcmp_r<"s", ARMCCge>;
4384 defm MVE_VCLT : unpred_vcmp_r<"s", ARMCClt>;
4385 defm MVE_VCGT : unpred_vcmp_r<"s", ARMCCgt>;
4386 defm MVE_VCLE : unpred_vcmp_r<"s", ARMCCle>;
4387 defm MVE_VCGTU : unpred_vcmp_r<"u", ARMCChi>;
4388 defm MVE_VCGEU : unpred_vcmp_r<"u", ARMCChs>;
4391 let Predicates = [HasMVEFloat] in {
4392 defm MVE_VFCEQZ : unpred_vcmpf_z<ARMCCeq>;
4393 defm MVE_VFCNEZ : unpred_vcmpf_z<ARMCCne>;
4394 defm MVE_VFCGEZ : unpred_vcmpf_z<ARMCCge>;
4395 defm MVE_VFCLTZ : unpred_vcmpf_z<ARMCClt>;
4396 defm MVE_VFCGTZ : unpred_vcmpf_z<ARMCCgt>;
4397 defm MVE_VFCLEZ : unpred_vcmpf_z<ARMCCle>;
4399 defm MVE_VFCEQ : unpred_vcmpf_r<ARMCCeq>;
4400 defm MVE_VFCNE : unpred_vcmpf_r<ARMCCne>;
4401 defm MVE_VFCGE : unpred_vcmpf_r<ARMCCge>;
4402 defm MVE_VFCLT : unpred_vcmpf_r<ARMCClt>;
4403 defm MVE_VFCGT : unpred_vcmpf_r<ARMCCgt>;
4404 defm MVE_VFCLE : unpred_vcmpf_r<ARMCCle>;
4408 // Extra "worst case" and/or/xor patterns, going into and out of GRP
4409 multiclass two_predops<SDPatternOperator opnode, Instruction insn> {
4410 def v16i1 : Pat<(v16i1 (opnode (v16i1 VCCR:$p1), (v16i1 VCCR:$p2))),
4411 (v16i1 (COPY_TO_REGCLASS
4412 (insn (i32 (COPY_TO_REGCLASS (v16i1 VCCR:$p1), rGPR)),
4413 (i32 (COPY_TO_REGCLASS (v16i1 VCCR:$p2), rGPR))),
4415 def v8i1 : Pat<(v8i1 (opnode (v8i1 VCCR:$p1), (v8i1 VCCR:$p2))),
4416 (v8i1 (COPY_TO_REGCLASS
4417 (insn (i32 (COPY_TO_REGCLASS (v8i1 VCCR:$p1), rGPR)),
4418 (i32 (COPY_TO_REGCLASS (v8i1 VCCR:$p2), rGPR))),
4420 def v4i1 : Pat<(v4i1 (opnode (v4i1 VCCR:$p1), (v4i1 VCCR:$p2))),
4421 (v4i1 (COPY_TO_REGCLASS
4422 (insn (i32 (COPY_TO_REGCLASS (v4i1 VCCR:$p1), rGPR)),
4423 (i32 (COPY_TO_REGCLASS (v4i1 VCCR:$p2), rGPR))),
4427 let Predicates = [HasMVEInt] in {
4428 defm POR : two_predops<or, t2ORRrr>;
4429 defm PAND : two_predops<and, t2ANDrr>;
4430 defm PEOR : two_predops<xor, t2EORrr>;
4433 // Occasionally we need to cast between a i32 and a boolean vector, for
4434 // example when moving between rGPR and VPR.P0 as part of predicate vector
4435 // shuffles. We also sometimes need to cast between different predicate
4436 // vector types (v4i1<>v8i1, etc.) also as part of lowering vector shuffles.
4437 def predicate_cast : SDNode<"ARMISD::PREDICATE_CAST", SDTUnaryOp>;
4439 def load_align4 : PatFrag<(ops node:$ptr), (load node:$ptr), [{
4440 return cast<LoadSDNode>(N)->getAlignment() >= 4;
4443 let Predicates = [HasMVEInt] in {
4444 foreach VT = [ v4i1, v8i1, v16i1 ] in {
4445 def : Pat<(i32 (predicate_cast (VT VCCR:$src))),
4446 (i32 (COPY_TO_REGCLASS (VT VCCR:$src), VCCR))>;
4447 def : Pat<(VT (predicate_cast (i32 VCCR:$src))),
4448 (VT (COPY_TO_REGCLASS (i32 VCCR:$src), VCCR))>;
4450 foreach VT2 = [ v4i1, v8i1, v16i1 ] in
4451 def : Pat<(VT (predicate_cast (VT2 VCCR:$src))),
4452 (VT (COPY_TO_REGCLASS (VT2 VCCR:$src), VCCR))>;
4455 // If we happen to be casting from a load we can convert that straight
4456 // into a predicate load, so long as the load is of the correct type.
4457 foreach VT = [ v4i1, v8i1, v16i1 ] in {
4458 def : Pat<(VT (predicate_cast (i32 (load_align4 taddrmode_imm7<2>:$addr)))),
4459 (VT (VLDR_P0_off taddrmode_imm7<2>:$addr))>;
4462 // Here we match the specific SDNode type 'ARMVectorRegCastImpl'
4463 // rather than the more general 'ARMVectorRegCast' which would also
4464 // match some bitconverts. If we use the latter in cases where the
4465 // input and output types are the same, the bitconvert gets elided
4466 // and we end up generating a nonsense match of nothing.
4468 foreach VT = [ v16i8, v8i16, v8f16, v4i32, v4f32, v2i64, v2f64 ] in
4469 foreach VT2 = [ v16i8, v8i16, v8f16, v4i32, v4f32, v2i64, v2f64 ] in
4470 def : Pat<(VT (ARMVectorRegCastImpl (VT2 MQPR:$src))),
4474 // end of MVE compares
4476 // start of MVE_qDest_qSrc
4478 class MVE_qDest_qSrc<string iname, string suffix, dag oops, dag iops,
4479 string ops, vpred_ops vpred, string cstr,
4480 list<dag> pattern=[]>
4481 : MVE_p<oops, iops, NoItinerary, iname, suffix,
4482 ops, vpred, cstr, pattern> {
4486 let Inst{25-23} = 0b100;
4487 let Inst{22} = Qd{3};
4488 let Inst{15-13} = Qd{2-0};
4489 let Inst{11-9} = 0b111;
4491 let Inst{5} = Qm{3};
4493 let Inst{3-1} = Qm{2-0};
4496 class MVE_VQxDMLxDH<string iname, bit exch, bit round, bit subtract,
4497 string suffix, bits<2> size, string cstr="", list<dag> pattern=[]>
4498 : MVE_qDest_qSrc<iname, suffix, (outs MQPR:$Qd),
4499 (ins MQPR:$Qd_src, MQPR:$Qn, MQPR:$Qm), "$Qd, $Qn, $Qm",
4500 vpred_n, "$Qd = $Qd_src"#cstr, pattern> {
4503 let Inst{28} = subtract;
4504 let Inst{21-20} = size;
4505 let Inst{19-17} = Qn{2-0};
4507 let Inst{12} = exch;
4509 let Inst{7} = Qn{3};
4510 let Inst{0} = round;
4513 multiclass MVE_VQxDMLxDH_p<string iname, bit exch, bit round, bit subtract,
4514 MVEVectorVTInfo VTI> {
4515 def "": MVE_VQxDMLxDH<iname, exch, round, subtract, VTI.Suffix, VTI.Size,
4516 !if(!eq(VTI.LaneBits, 32), ",@earlyclobber $Qd", "")>;
4517 defvar Inst = !cast<Instruction>(NAME);
4518 defvar ConstParams = (? (i32 exch), (i32 round), (i32 subtract));
4519 defvar unpred_intr = int_arm_mve_vqdmlad;
4520 defvar pred_intr = int_arm_mve_vqdmlad_predicated;
4522 def : Pat<(VTI.Vec !con((unpred_intr (VTI.Vec MQPR:$a), (VTI.Vec MQPR:$b),
4523 (VTI.Vec MQPR:$c)), ConstParams)),
4524 (VTI.Vec (Inst (VTI.Vec MQPR:$a), (VTI.Vec MQPR:$b),
4525 (VTI.Vec MQPR:$c)))>;
4526 def : Pat<(VTI.Vec !con((pred_intr (VTI.Vec MQPR:$a), (VTI.Vec MQPR:$b),
4527 (VTI.Vec MQPR:$c)), ConstParams,
4528 (? (VTI.Pred VCCR:$pred)))),
4529 (VTI.Vec (Inst (VTI.Vec MQPR:$a), (VTI.Vec MQPR:$b),
4531 ARMVCCThen, (VTI.Pred VCCR:$pred)))>;
4534 multiclass MVE_VQxDMLxDH_multi<string iname, bit exch,
4535 bit round, bit subtract> {
4536 defm s8 : MVE_VQxDMLxDH_p<iname, exch, round, subtract, MVE_v16s8>;
4537 defm s16 : MVE_VQxDMLxDH_p<iname, exch, round, subtract, MVE_v8s16>;
4538 defm s32 : MVE_VQxDMLxDH_p<iname, exch, round, subtract, MVE_v4s32>;
4541 defm MVE_VQDMLADH : MVE_VQxDMLxDH_multi<"vqdmladh", 0b0, 0b0, 0b0>;
4542 defm MVE_VQDMLADHX : MVE_VQxDMLxDH_multi<"vqdmladhx", 0b1, 0b0, 0b0>;
4543 defm MVE_VQRDMLADH : MVE_VQxDMLxDH_multi<"vqrdmladh", 0b0, 0b1, 0b0>;
4544 defm MVE_VQRDMLADHX : MVE_VQxDMLxDH_multi<"vqrdmladhx", 0b1, 0b1, 0b0>;
4545 defm MVE_VQDMLSDH : MVE_VQxDMLxDH_multi<"vqdmlsdh", 0b0, 0b0, 0b1>;
4546 defm MVE_VQDMLSDHX : MVE_VQxDMLxDH_multi<"vqdmlsdhx", 0b1, 0b0, 0b1>;
4547 defm MVE_VQRDMLSDH : MVE_VQxDMLxDH_multi<"vqrdmlsdh", 0b0, 0b1, 0b1>;
4548 defm MVE_VQRDMLSDHX : MVE_VQxDMLxDH_multi<"vqrdmlsdhx", 0b1, 0b1, 0b1>;
4550 class MVE_VCMUL<string iname, string suffix, bit size, string cstr="">
4551 : MVE_qDest_qSrc<iname, suffix, (outs MQPR:$Qd),
4552 (ins MQPR:$Qn, MQPR:$Qm, complexrotateop:$rot),
4553 "$Qd, $Qn, $Qm, $rot", vpred_r, cstr, []> {
4557 let Inst{28} = size;
4558 let Inst{21-20} = 0b11;
4559 let Inst{19-17} = Qn{2-0};
4561 let Inst{12} = rot{1};
4563 let Inst{7} = Qn{3};
4564 let Inst{0} = rot{0};
4566 let Predicates = [HasMVEFloat];
4569 multiclass MVE_VCMUL_m<string iname, MVEVectorVTInfo VTI,
4570 bit size, string cstr=""> {
4571 def "" : MVE_VCMUL<iname, VTI.Suffix, size, cstr>;
4572 defvar Inst = !cast<Instruction>(NAME);
4574 let Predicates = [HasMVEFloat] in {
4575 def : Pat<(VTI.Vec (int_arm_mve_vcmulq
4576 imm:$rot, (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm))),
4577 (VTI.Vec (Inst (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm),
4580 def : Pat<(VTI.Vec (int_arm_mve_vcmulq_predicated
4581 imm:$rot, (VTI.Vec MQPR:$inactive),
4582 (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm),
4583 (VTI.Pred VCCR:$mask))),
4584 (VTI.Vec (Inst (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm),
4585 imm:$rot, ARMVCCThen, (VTI.Pred VCCR:$mask),
4586 (VTI.Vec MQPR:$inactive)))>;
4591 defm MVE_VCMULf16 : MVE_VCMUL_m<"vcmul", MVE_v8f16, 0b0>;
4592 defm MVE_VCMULf32 : MVE_VCMUL_m<"vcmul", MVE_v4f32, 0b1, "@earlyclobber $Qd">;
4594 class MVE_VMULL<string iname, string suffix, bit bit_28, bits<2> bits_21_20,
4595 bit T, string cstr, list<dag> pattern=[]>
4596 : MVE_qDest_qSrc<iname, suffix, (outs MQPR:$Qd),
4597 (ins MQPR:$Qn, MQPR:$Qm), "$Qd, $Qn, $Qm",
4598 vpred_r, cstr, pattern> {
4603 let Inst{28} = bit_28;
4604 let Inst{21-20} = bits_21_20;
4605 let Inst{19-17} = Qn{2-0};
4609 let Inst{7} = Qn{3};
4611 let validForTailPredication = 1;
4612 let doubleWidthResult = 1;
4615 multiclass MVE_VMULL_m<MVEVectorVTInfo VTI,
4616 SDPatternOperator unpred_op, Intrinsic pred_int,
4617 bit Top, string cstr=""> {
4618 def "" : MVE_VMULL<"vmull" # !if(Top, "t", "b"), VTI.Suffix, VTI.Unsigned,
4619 VTI.Size, Top, cstr>;
4620 defvar Inst = !cast<Instruction>(NAME);
4622 let Predicates = [HasMVEInt] in {
4623 defvar uflag = !if(!eq(VTI.SuffixLetter, "p"), (?), (? (i32 VTI.Unsigned)));
4625 // Unpredicated multiply
4626 def : Pat<(VTI.DblVec !con((unpred_op (VTI.Vec MQPR:$Qm),
4627 (VTI.Vec MQPR:$Qn)),
4628 uflag, (? (i32 Top)))),
4629 (VTI.DblVec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>;
4631 // Predicated multiply
4632 def : Pat<(VTI.DblVec !con((pred_int (VTI.Vec MQPR:$Qm),
4633 (VTI.Vec MQPR:$Qn)),
4634 uflag, (? (i32 Top), (VTI.DblPred VCCR:$mask),
4635 (VTI.DblVec MQPR:$inactive)))),
4636 (VTI.DblVec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
4637 ARMVCCThen, (VTI.DblPred VCCR:$mask),
4638 (VTI.DblVec MQPR:$inactive)))>;
4642 // For polynomial multiplies, the size bits take the unused value 0b11, and
4643 // the unsigned bit switches to encoding the size.
4645 defm MVE_VMULLBs8 : MVE_VMULL_m<MVE_v16s8, int_arm_mve_vmull,
4646 int_arm_mve_mull_int_predicated, 0b0>;
4647 defm MVE_VMULLTs8 : MVE_VMULL_m<MVE_v16s8, int_arm_mve_vmull,
4648 int_arm_mve_mull_int_predicated, 0b1>;
4649 defm MVE_VMULLBs16 : MVE_VMULL_m<MVE_v8s16, int_arm_mve_vmull,
4650 int_arm_mve_mull_int_predicated, 0b0>;
4651 defm MVE_VMULLTs16 : MVE_VMULL_m<MVE_v8s16, int_arm_mve_vmull,
4652 int_arm_mve_mull_int_predicated, 0b1>;
4653 defm MVE_VMULLBs32 : MVE_VMULL_m<MVE_v4s32, int_arm_mve_vmull,
4654 int_arm_mve_mull_int_predicated, 0b0,
4655 "@earlyclobber $Qd">;
4656 defm MVE_VMULLTs32 : MVE_VMULL_m<MVE_v4s32, int_arm_mve_vmull,
4657 int_arm_mve_mull_int_predicated, 0b1,
4658 "@earlyclobber $Qd">;
4660 defm MVE_VMULLBu8 : MVE_VMULL_m<MVE_v16u8, int_arm_mve_vmull,
4661 int_arm_mve_mull_int_predicated, 0b0>;
4662 defm MVE_VMULLTu8 : MVE_VMULL_m<MVE_v16u8, int_arm_mve_vmull,
4663 int_arm_mve_mull_int_predicated, 0b1>;
4664 defm MVE_VMULLBu16 : MVE_VMULL_m<MVE_v8u16, int_arm_mve_vmull,
4665 int_arm_mve_mull_int_predicated, 0b0>;
4666 defm MVE_VMULLTu16 : MVE_VMULL_m<MVE_v8u16, int_arm_mve_vmull,
4667 int_arm_mve_mull_int_predicated, 0b1>;
4668 defm MVE_VMULLBu32 : MVE_VMULL_m<MVE_v4u32, int_arm_mve_vmull,
4669 int_arm_mve_mull_int_predicated, 0b0,
4670 "@earlyclobber $Qd">;
4671 defm MVE_VMULLTu32 : MVE_VMULL_m<MVE_v4u32, int_arm_mve_vmull,
4672 int_arm_mve_mull_int_predicated, 0b1,
4673 "@earlyclobber $Qd">;
4675 defm MVE_VMULLBp8 : MVE_VMULL_m<MVE_v16p8, int_arm_mve_vmull_poly,
4676 int_arm_mve_mull_poly_predicated, 0b0>;
4677 defm MVE_VMULLTp8 : MVE_VMULL_m<MVE_v16p8, int_arm_mve_vmull_poly,
4678 int_arm_mve_mull_poly_predicated, 0b1>;
4679 defm MVE_VMULLBp16 : MVE_VMULL_m<MVE_v8p16, int_arm_mve_vmull_poly,
4680 int_arm_mve_mull_poly_predicated, 0b0>;
4681 defm MVE_VMULLTp16 : MVE_VMULL_m<MVE_v8p16, int_arm_mve_vmull_poly,
4682 int_arm_mve_mull_poly_predicated, 0b1>;
4684 let Predicates = [HasMVEInt] in {
4685 def : Pat<(v2i64 (ARMvmulls (v4i32 MQPR:$src1), (v4i32 MQPR:$src2))),
4686 (MVE_VMULLBs32 MQPR:$src1, MQPR:$src2)>;
4687 def : Pat<(v2i64 (ARMvmulls (v4i32 (ARMvrev64 (v4i32 MQPR:$src1))),
4688 (v4i32 (ARMvrev64 (v4i32 MQPR:$src2))))),
4689 (MVE_VMULLTs32 MQPR:$src1, MQPR:$src2)>;
4691 def : Pat<(mul (sext_inreg (v4i32 MQPR:$src1), v4i16),
4692 (sext_inreg (v4i32 MQPR:$src2), v4i16)),
4693 (MVE_VMULLBs16 MQPR:$src1, MQPR:$src2)>;
4694 def : Pat<(mul (sext_inreg (v4i32 (ARMVectorRegCast (ARMvrev32 (v8i16 MQPR:$src1)))), v4i16),
4695 (sext_inreg (v4i32 (ARMVectorRegCast (ARMvrev32 (v8i16 MQPR:$src2)))), v4i16)),
4696 (MVE_VMULLTs16 MQPR:$src1, MQPR:$src2)>;
4698 def : Pat<(mul (sext_inreg (v8i16 MQPR:$src1), v8i8),
4699 (sext_inreg (v8i16 MQPR:$src2), v8i8)),
4700 (MVE_VMULLBs8 MQPR:$src1, MQPR:$src2)>;
4701 def : Pat<(mul (sext_inreg (v8i16 (ARMVectorRegCast (ARMvrev16 (v16i8 MQPR:$src1)))), v8i8),
4702 (sext_inreg (v8i16 (ARMVectorRegCast (ARMvrev16 (v16i8 MQPR:$src2)))), v8i8)),
4703 (MVE_VMULLTs8 MQPR:$src1, MQPR:$src2)>;
4705 def : Pat<(v2i64 (ARMvmullu (v4i32 MQPR:$src1), (v4i32 MQPR:$src2))),
4706 (MVE_VMULLBu32 MQPR:$src1, MQPR:$src2)>;
4707 def : Pat<(v2i64 (ARMvmullu (v4i32 (ARMvrev64 (v4i32 MQPR:$src1))),
4708 (v4i32 (ARMvrev64 (v4i32 MQPR:$src2))))),
4709 (MVE_VMULLTu32 MQPR:$src1, MQPR:$src2)>;
4711 def : Pat<(mul (and (v4i32 MQPR:$src1), (v4i32 (ARMvmovImm (i32 0xCFF)))),
4712 (and (v4i32 MQPR:$src2), (v4i32 (ARMvmovImm (i32 0xCFF))))),
4713 (MVE_VMULLBu16 MQPR:$src1, MQPR:$src2)>;
4714 def : Pat<(mul (and (v4i32 (ARMVectorRegCast (ARMvrev32 (v8i16 MQPR:$src1)))),
4715 (v4i32 (ARMvmovImm (i32 0xCFF)))),
4716 (and (v4i32 (ARMVectorRegCast (ARMvrev32 (v8i16 MQPR:$src2)))),
4717 (v4i32 (ARMvmovImm (i32 0xCFF))))),
4718 (MVE_VMULLTu16 MQPR:$src1, MQPR:$src2)>;
4720 def : Pat<(mul (ARMvbicImm (v8i16 MQPR:$src1), (i32 0xAFF)),
4721 (ARMvbicImm (v8i16 MQPR:$src2), (i32 0xAFF))),
4722 (MVE_VMULLBu8 MQPR:$src1, MQPR:$src2)>;
4723 def : Pat<(mul (ARMvbicImm (v8i16 (ARMVectorRegCast (ARMvrev16 (v16i8 MQPR:$src1)))), (i32 0xAFF)),
4724 (ARMvbicImm (v8i16 (ARMVectorRegCast (ARMvrev16 (v16i8 MQPR:$src2)))), (i32 0xAFF))),
4725 (MVE_VMULLTu8 MQPR:$src1, MQPR:$src2)>;
4728 class MVE_VxMULH<string iname, string suffix, bit U, bits<2> size, bit round,
4729 list<dag> pattern=[]>
4730 : MVE_qDest_qSrc<iname, suffix, (outs MQPR:$Qd),
4731 (ins MQPR:$Qn, MQPR:$Qm), "$Qd, $Qn, $Qm",
4732 vpred_r, "", pattern> {
4736 let Inst{21-20} = size;
4737 let Inst{19-17} = Qn{2-0};
4739 let Inst{12} = round;
4741 let Inst{7} = Qn{3};
4743 let validForTailPredication = 1;
4746 multiclass MVE_VxMULH_m<string iname, MVEVectorVTInfo VTI, SDNode unpred_op,
4747 Intrinsic PredInt, bit round> {
4748 def "" : MVE_VxMULH<iname, VTI.Suffix, VTI.Unsigned, VTI.Size, round>;
4749 defvar Inst = !cast<Instruction>(NAME);
4751 let Predicates = [HasMVEInt] in {
4752 if !eq(round, 0b0) then {
4753 defvar mulh = !if(VTI.Unsigned, mulhu, mulhs);
4754 defm : MVE_TwoOpPattern<VTI, mulh, PredInt, (? (i32 VTI.Unsigned)),
4755 !cast<Instruction>(NAME)>;
4757 // Predicated multiply returning high bits
4758 def : Pat<(VTI.Vec (PredInt (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
4759 (i32 VTI.Unsigned), (VTI.Pred VCCR:$mask),
4760 (VTI.Vec MQPR:$inactive))),
4761 (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
4762 ARMVCCThen, (VTI.Pred VCCR:$mask),
4763 (VTI.Vec MQPR:$inactive)))>;
4766 // Unpredicated intrinsic
4767 def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
4768 (i32 VTI.Unsigned))),
4769 (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>;
4773 multiclass MVE_VMULT<string iname, MVEVectorVTInfo VTI, bit round>
4774 : MVE_VxMULH_m<iname, VTI, !if(round, int_arm_mve_vrmulh, int_arm_mve_vmulh),
4775 !if(round, int_arm_mve_rmulh_predicated,
4776 int_arm_mve_mulh_predicated),
4779 defm MVE_VMULHs8 : MVE_VMULT<"vmulh", MVE_v16s8, 0b0>;
4780 defm MVE_VMULHs16 : MVE_VMULT<"vmulh", MVE_v8s16, 0b0>;
4781 defm MVE_VMULHs32 : MVE_VMULT<"vmulh", MVE_v4s32, 0b0>;
4782 defm MVE_VMULHu8 : MVE_VMULT<"vmulh", MVE_v16u8, 0b0>;
4783 defm MVE_VMULHu16 : MVE_VMULT<"vmulh", MVE_v8u16, 0b0>;
4784 defm MVE_VMULHu32 : MVE_VMULT<"vmulh", MVE_v4u32, 0b0>;
4786 defm MVE_VRMULHs8 : MVE_VMULT<"vrmulh", MVE_v16s8, 0b1>;
4787 defm MVE_VRMULHs16 : MVE_VMULT<"vrmulh", MVE_v8s16, 0b1>;
4788 defm MVE_VRMULHs32 : MVE_VMULT<"vrmulh", MVE_v4s32, 0b1>;
4789 defm MVE_VRMULHu8 : MVE_VMULT<"vrmulh", MVE_v16u8, 0b1>;
4790 defm MVE_VRMULHu16 : MVE_VMULT<"vrmulh", MVE_v8u16, 0b1>;
4791 defm MVE_VRMULHu32 : MVE_VMULT<"vrmulh", MVE_v4u32, 0b1>;
4793 class MVE_VxMOVxN<string iname, string suffix, bit bit_28, bit bit_17,
4794 bits<2> size, bit T, list<dag> pattern=[]>
4795 : MVE_qDest_qSrc<iname, suffix, (outs MQPR:$Qd),
4796 (ins MQPR:$Qd_src, MQPR:$Qm), "$Qd, $Qm",
4797 vpred_n, "$Qd = $Qd_src", pattern> {
4799 let Inst{28} = bit_28;
4800 let Inst{21-20} = 0b11;
4801 let Inst{19-18} = size;
4802 let Inst{17} = bit_17;
4806 let Inst{7} = !not(bit_17);
4808 let validForTailPredication = 1;
4809 let retainsPreviousHalfElement = 1;
4812 multiclass MVE_VxMOVxN_halves<string iname, string suffix,
4813 bit bit_28, bit bit_17, bits<2> size> {
4814 def bh : MVE_VxMOVxN<iname # "b", suffix, bit_28, bit_17, size, 0b0>;
4815 def th : MVE_VxMOVxN<iname # "t", suffix, bit_28, bit_17, size, 0b1>;
4818 defm MVE_VMOVNi16 : MVE_VxMOVxN_halves<"vmovn", "i16", 0b1, 0b0, 0b00>;
4819 defm MVE_VMOVNi32 : MVE_VxMOVxN_halves<"vmovn", "i32", 0b1, 0b0, 0b01>;
4820 defm MVE_VQMOVNs16 : MVE_VxMOVxN_halves<"vqmovn", "s16", 0b0, 0b1, 0b00>;
4821 defm MVE_VQMOVNs32 : MVE_VxMOVxN_halves<"vqmovn", "s32", 0b0, 0b1, 0b01>;
4822 defm MVE_VQMOVNu16 : MVE_VxMOVxN_halves<"vqmovn", "u16", 0b1, 0b1, 0b00>;
4823 defm MVE_VQMOVNu32 : MVE_VxMOVxN_halves<"vqmovn", "u32", 0b1, 0b1, 0b01>;
4824 defm MVE_VQMOVUNs16 : MVE_VxMOVxN_halves<"vqmovun", "s16", 0b0, 0b0, 0b00>;
4825 defm MVE_VQMOVUNs32 : MVE_VxMOVxN_halves<"vqmovun", "s32", 0b0, 0b0, 0b01>;
4827 def MVEvmovn : SDNode<"ARMISD::VMOVN", SDTARMVEXT>;
4829 multiclass MVE_VMOVN_p<Instruction Inst, bit top,
4830 MVEVectorVTInfo VTI, MVEVectorVTInfo InVTI> {
4831 // Match the most obvious MVEvmovn(a,b,t), which overwrites the odd or even
4832 // lanes of a (depending on t) with the even lanes of b.
4833 def : Pat<(VTI.Vec (MVEvmovn (VTI.Vec MQPR:$Qd_src),
4834 (VTI.Vec MQPR:$Qm), (i32 top))),
4835 (VTI.Vec (Inst (VTI.Vec MQPR:$Qd_src), (VTI.Vec MQPR:$Qm)))>;
4838 // If we see MVEvmovn(a,ARMvrev(b),1), that wants to overwrite the odd
4839 // lanes of a with the odd lanes of b. In other words, the lanes we're
4840 // _keeping_ from a are the even ones. So we can flip it round and say that
4841 // this is the same as overwriting the even lanes of b with the even lanes
4842 // of a, i.e. it's a VMOVNB with the operands reversed.
4843 defvar vrev = !cast<SDNode>("ARMvrev" # InVTI.LaneBits);
4844 def : Pat<(VTI.Vec (MVEvmovn (VTI.Vec MQPR:$Qm),
4845 (VTI.Vec (vrev MQPR:$Qd_src)), (i32 1))),
4846 (VTI.Vec (Inst (VTI.Vec MQPR:$Qd_src), (VTI.Vec MQPR:$Qm)))>;
4849 // Match the IR intrinsic for a predicated VMOVN. This regards the Qm input
4850 // as having wider lanes that we're narrowing, instead of already-narrow
4851 // lanes that we're taking every other one of.
4852 def : Pat<(VTI.Vec (int_arm_mve_vmovn_predicated (VTI.Vec MQPR:$Qd_src),
4853 (InVTI.Vec MQPR:$Qm), (i32 top),
4854 (InVTI.Pred VCCR:$pred))),
4855 (VTI.Vec (Inst (VTI.Vec MQPR:$Qd_src),
4856 (InVTI.Vec MQPR:$Qm),
4857 ARMVCCThen, (InVTI.Pred VCCR:$pred)))>;
4860 defm : MVE_VMOVN_p<MVE_VMOVNi32bh, 0, MVE_v8i16, MVE_v4i32>;
4861 defm : MVE_VMOVN_p<MVE_VMOVNi32th, 1, MVE_v8i16, MVE_v4i32>;
4862 defm : MVE_VMOVN_p<MVE_VMOVNi16bh, 0, MVE_v16i8, MVE_v8i16>;
4863 defm : MVE_VMOVN_p<MVE_VMOVNi16th, 1, MVE_v16i8, MVE_v8i16>;
4865 multiclass MVE_VQMOVN_p<Instruction Inst, bit outU, bit inU, bit top,
4866 MVEVectorVTInfo VTI, MVEVectorVTInfo InVTI> {
4867 def : Pat<(VTI.Vec (int_arm_mve_vqmovn (VTI.Vec MQPR:$Qd_src),
4868 (InVTI.Vec MQPR:$Qm),
4869 (i32 outU), (i32 inU), (i32 top))),
4870 (VTI.Vec (Inst (VTI.Vec MQPR:$Qd_src),
4871 (InVTI.Vec MQPR:$Qm)))>;
4873 def : Pat<(VTI.Vec (int_arm_mve_vqmovn_predicated (VTI.Vec MQPR:$Qd_src),
4874 (InVTI.Vec MQPR:$Qm),
4875 (i32 outU), (i32 inU), (i32 top),
4876 (InVTI.Pred VCCR:$pred))),
4877 (VTI.Vec (Inst (VTI.Vec MQPR:$Qd_src),
4878 (InVTI.Vec MQPR:$Qm),
4879 ARMVCCThen, (InVTI.Pred VCCR:$pred)))>;
4882 defm : MVE_VQMOVN_p<MVE_VQMOVNs32bh, 0, 0, 0, MVE_v8i16, MVE_v4i32>;
4883 defm : MVE_VQMOVN_p<MVE_VQMOVNs32th, 0, 0, 1, MVE_v8i16, MVE_v4i32>;
4884 defm : MVE_VQMOVN_p<MVE_VQMOVNs16bh, 0, 0, 0, MVE_v16i8, MVE_v8i16>;
4885 defm : MVE_VQMOVN_p<MVE_VQMOVNs16th, 0, 0, 1, MVE_v16i8, MVE_v8i16>;
4886 defm : MVE_VQMOVN_p<MVE_VQMOVNu32bh, 1, 1, 0, MVE_v8i16, MVE_v4i32>;
4887 defm : MVE_VQMOVN_p<MVE_VQMOVNu32th, 1, 1, 1, MVE_v8i16, MVE_v4i32>;
4888 defm : MVE_VQMOVN_p<MVE_VQMOVNu16bh, 1, 1, 0, MVE_v16i8, MVE_v8i16>;
4889 defm : MVE_VQMOVN_p<MVE_VQMOVNu16th, 1, 1, 1, MVE_v16i8, MVE_v8i16>;
4890 defm : MVE_VQMOVN_p<MVE_VQMOVUNs32bh, 1, 0, 0, MVE_v8i16, MVE_v4i32>;
4891 defm : MVE_VQMOVN_p<MVE_VQMOVUNs32th, 1, 0, 1, MVE_v8i16, MVE_v4i32>;
4892 defm : MVE_VQMOVN_p<MVE_VQMOVUNs16bh, 1, 0, 0, MVE_v16i8, MVE_v8i16>;
4893 defm : MVE_VQMOVN_p<MVE_VQMOVUNs16th, 1, 0, 1, MVE_v16i8, MVE_v8i16>;
4895 def SDTARMVMOVNQ : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
4896 SDTCisVec<2>, SDTCisVT<3, i32>]>;
4897 def MVEvqmovns : SDNode<"ARMISD::VQMOVNs", SDTARMVMOVNQ>;
4898 def MVEvqmovnu : SDNode<"ARMISD::VQMOVNu", SDTARMVMOVNQ>;
4900 let Predicates = [HasMVEInt] in {
4901 def : Pat<(v8i16 (MVEvqmovns (v8i16 MQPR:$Qd_src), (v4i32 MQPR:$Qm), (i32 0))),
4902 (v8i16 (MVE_VQMOVNs32bh (v8i16 MQPR:$Qd_src), (v4i32 MQPR:$Qm)))>;
4903 def : Pat<(v8i16 (MVEvqmovns (v8i16 MQPR:$Qd_src), (v4i32 MQPR:$Qm), (i32 1))),
4904 (v8i16 (MVE_VQMOVNs32th (v8i16 MQPR:$Qd_src), (v4i32 MQPR:$Qm)))>;
4905 def : Pat<(v16i8 (MVEvqmovns (v16i8 MQPR:$Qd_src), (v8i16 MQPR:$Qm), (i32 0))),
4906 (v16i8 (MVE_VQMOVNs16bh (v16i8 MQPR:$Qd_src), (v8i16 MQPR:$Qm)))>;
4907 def : Pat<(v16i8 (MVEvqmovns (v16i8 MQPR:$Qd_src), (v8i16 MQPR:$Qm), (i32 1))),
4908 (v16i8 (MVE_VQMOVNs16th (v16i8 MQPR:$Qd_src), (v8i16 MQPR:$Qm)))>;
4910 def : Pat<(v8i16 (MVEvqmovnu (v8i16 MQPR:$Qd_src), (v4i32 MQPR:$Qm), (i32 0))),
4911 (v8i16 (MVE_VQMOVNu32bh (v8i16 MQPR:$Qd_src), (v4i32 MQPR:$Qm)))>;
4912 def : Pat<(v8i16 (MVEvqmovnu (v8i16 MQPR:$Qd_src), (v4i32 MQPR:$Qm), (i32 1))),
4913 (v8i16 (MVE_VQMOVNu32th (v8i16 MQPR:$Qd_src), (v4i32 MQPR:$Qm)))>;
4914 def : Pat<(v16i8 (MVEvqmovnu (v16i8 MQPR:$Qd_src), (v8i16 MQPR:$Qm), (i32 0))),
4915 (v16i8 (MVE_VQMOVNu16bh (v16i8 MQPR:$Qd_src), (v8i16 MQPR:$Qm)))>;
4916 def : Pat<(v16i8 (MVEvqmovnu (v16i8 MQPR:$Qd_src), (v8i16 MQPR:$Qm), (i32 1))),
4917 (v16i8 (MVE_VQMOVNu16th (v16i8 MQPR:$Qd_src), (v8i16 MQPR:$Qm)))>;
4919 def : Pat<(v8i16 (MVEvqmovns (v8i16 MQPR:$Qd_src), (v4i32 (ARMvshrsImm (v4i32 MQPR:$Qm), imm0_31:$imm)), (i32 0))),
4920 (v8i16 (MVE_VQSHRNbhs32 (v8i16 MQPR:$Qd_src), (v4i32 MQPR:$Qm), imm0_31:$imm))>;
4921 def : Pat<(v16i8 (MVEvqmovns (v16i8 MQPR:$Qd_src), (v8i16 (ARMvshrsImm (v8i16 MQPR:$Qm), imm0_15:$imm)), (i32 0))),
4922 (v16i8 (MVE_VQSHRNbhs16 (v16i8 MQPR:$Qd_src), (v8i16 MQPR:$Qm), imm0_15:$imm))>;
4923 def : Pat<(v8i16 (MVEvqmovns (v8i16 MQPR:$Qd_src), (v4i32 (ARMvshrsImm (v4i32 MQPR:$Qm), imm0_31:$imm)), (i32 1))),
4924 (v8i16 (MVE_VQSHRNths32 (v8i16 MQPR:$Qd_src), (v4i32 MQPR:$Qm), imm0_31:$imm))>;
4925 def : Pat<(v16i8 (MVEvqmovns (v16i8 MQPR:$Qd_src), (v8i16 (ARMvshrsImm (v8i16 MQPR:$Qm), imm0_15:$imm)), (i32 1))),
4926 (v16i8 (MVE_VQSHRNths16 (v16i8 MQPR:$Qd_src), (v8i16 MQPR:$Qm), imm0_15:$imm))>;
4928 def : Pat<(v8i16 (MVEvqmovnu (v8i16 MQPR:$Qd_src), (v4i32 (ARMvshruImm (v4i32 MQPR:$Qm), imm0_31:$imm)), (i32 0))),
4929 (v8i16 (MVE_VQSHRNbhu32 (v8i16 MQPR:$Qd_src), (v4i32 MQPR:$Qm), imm0_31:$imm))>;
4930 def : Pat<(v16i8 (MVEvqmovnu (v16i8 MQPR:$Qd_src), (v8i16 (ARMvshruImm (v8i16 MQPR:$Qm), imm0_15:$imm)), (i32 0))),
4931 (v16i8 (MVE_VQSHRNbhu16 (v16i8 MQPR:$Qd_src), (v8i16 MQPR:$Qm), imm0_15:$imm))>;
4932 def : Pat<(v8i16 (MVEvqmovnu (v8i16 MQPR:$Qd_src), (v4i32 (ARMvshruImm (v4i32 MQPR:$Qm), imm0_31:$imm)), (i32 1))),
4933 (v8i16 (MVE_VQSHRNthu32 (v8i16 MQPR:$Qd_src), (v4i32 MQPR:$Qm), imm0_31:$imm))>;
4934 def : Pat<(v16i8 (MVEvqmovnu (v16i8 MQPR:$Qd_src), (v8i16 (ARMvshruImm (v8i16 MQPR:$Qm), imm0_15:$imm)), (i32 1))),
4935 (v16i8 (MVE_VQSHRNthu16 (v16i8 MQPR:$Qd_src), (v8i16 MQPR:$Qm), imm0_15:$imm))>;
4938 class MVE_VCVT_ff<string iname, string suffix, bit op, bit T,
4939 dag iops_extra, vpred_ops vpred, string cstr>
4940 : MVE_qDest_qSrc<iname, suffix, (outs MQPR:$Qd),
4941 !con(iops_extra, (ins MQPR:$Qm)), "$Qd, $Qm",
4944 let Inst{21-16} = 0b111111;
4946 let Inst{8-7} = 0b00;
4949 let Predicates = [HasMVEFloat];
4950 let retainsPreviousHalfElement = 1;
4953 def SDTARMVCVTL : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
4955 def MVEvcvtn : SDNode<"ARMISD::VCVTN", SDTARMVMOVNQ>;
4956 def MVEvcvtl : SDNode<"ARMISD::VCVTL", SDTARMVCVTL>;
4958 multiclass MVE_VCVT_f2h_m<string iname, int half> {
4959 def "": MVE_VCVT_ff<iname, "f16.f32", 0b0, half,
4960 (ins MQPR:$Qd_src), vpred_n, "$Qd = $Qd_src">;
4961 defvar Inst = !cast<Instruction>(NAME);
4963 let Predicates = [HasMVEFloat] in {
4964 def : Pat<(v8f16 (int_arm_mve_vcvt_narrow
4965 (v8f16 MQPR:$Qd_src), (v4f32 MQPR:$Qm), (i32 half))),
4966 (v8f16 (Inst (v8f16 MQPR:$Qd_src), (v4f32 MQPR:$Qm)))>;
4967 def : Pat<(v8f16 (int_arm_mve_vcvt_narrow_predicated
4968 (v8f16 MQPR:$Qd_src), (v4f32 MQPR:$Qm), (i32 half),
4969 (v4i1 VCCR:$mask))),
4970 (v8f16 (Inst (v8f16 MQPR:$Qd_src), (v4f32 MQPR:$Qm),
4971 ARMVCCThen, (v4i1 VCCR:$mask)))>;
4973 def : Pat<(v8f16 (MVEvcvtn (v8f16 MQPR:$Qd_src), (v4f32 MQPR:$Qm), (i32 half))),
4974 (v8f16 (Inst (v8f16 MQPR:$Qd_src), (v4f32 MQPR:$Qm)))>;
4978 multiclass MVE_VCVT_h2f_m<string iname, int half> {
4979 def "": MVE_VCVT_ff<iname, "f32.f16", 0b1, half, (ins), vpred_r, "">;
4980 defvar Inst = !cast<Instruction>(NAME);
4982 let Predicates = [HasMVEFloat] in {
4983 def : Pat<(v4f32 (int_arm_mve_vcvt_widen (v8f16 MQPR:$Qm), (i32 half))),
4984 (v4f32 (Inst (v8f16 MQPR:$Qm)))>;
4985 def : Pat<(v4f32 (int_arm_mve_vcvt_widen_predicated
4986 (v4f32 MQPR:$inactive), (v8f16 MQPR:$Qm), (i32 half),
4987 (v4i1 VCCR:$mask))),
4988 (v4f32 (Inst (v8f16 MQPR:$Qm), ARMVCCThen,
4989 (v4i1 VCCR:$mask), (v4f32 MQPR:$inactive)))>;
4991 def : Pat<(v4f32 (MVEvcvtl (v8f16 MQPR:$Qm), (i32 half))),
4992 (v4f32 (Inst (v8f16 MQPR:$Qm)))>;
4996 defm MVE_VCVTf16f32bh : MVE_VCVT_f2h_m<"vcvtb", 0b0>;
4997 defm MVE_VCVTf16f32th : MVE_VCVT_f2h_m<"vcvtt", 0b1>;
4998 defm MVE_VCVTf32f16bh : MVE_VCVT_h2f_m<"vcvtb", 0b0>;
4999 defm MVE_VCVTf32f16th : MVE_VCVT_h2f_m<"vcvtt", 0b1>;
5001 class MVE_VxCADD<string iname, string suffix, bits<2> size, bit halve,
5003 : MVE_qDest_qSrc<iname, suffix, (outs MQPR:$Qd),
5004 (ins MQPR:$Qn, MQPR:$Qm, complexrotateopodd:$rot),
5005 "$Qd, $Qn, $Qm, $rot", vpred_r, cstr, []> {
5009 let Inst{28} = halve;
5010 let Inst{21-20} = size;
5011 let Inst{19-17} = Qn{2-0};
5015 let Inst{7} = Qn{3};
5019 multiclass MVE_VxCADD_m<string iname, MVEVectorVTInfo VTI,
5020 bit halve, string cstr=""> {
5021 def "" : MVE_VxCADD<iname, VTI.Suffix, VTI.Size, halve, cstr>;
5022 defvar Inst = !cast<Instruction>(NAME);
5024 let Predicates = [HasMVEInt] in {
5025 def : Pat<(VTI.Vec (int_arm_mve_vcaddq halve,
5026 imm:$rot, (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm))),
5027 (VTI.Vec (Inst (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm),
5030 def : Pat<(VTI.Vec (int_arm_mve_vcaddq_predicated halve,
5031 imm:$rot, (VTI.Vec MQPR:$inactive),
5032 (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm),
5033 (VTI.Pred VCCR:$mask))),
5034 (VTI.Vec (Inst (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm),
5035 imm:$rot, ARMVCCThen, (VTI.Pred VCCR:$mask),
5036 (VTI.Vec MQPR:$inactive)))>;
5041 defm MVE_VCADDi8 : MVE_VxCADD_m<"vcadd", MVE_v16i8, 0b1>;
5042 defm MVE_VCADDi16 : MVE_VxCADD_m<"vcadd", MVE_v8i16, 0b1>;
5043 defm MVE_VCADDi32 : MVE_VxCADD_m<"vcadd", MVE_v4i32, 0b1, "@earlyclobber $Qd">;
5045 defm MVE_VHCADDs8 : MVE_VxCADD_m<"vhcadd", MVE_v16s8, 0b0>;
5046 defm MVE_VHCADDs16 : MVE_VxCADD_m<"vhcadd", MVE_v8s16, 0b0>;
5047 defm MVE_VHCADDs32 : MVE_VxCADD_m<"vhcadd", MVE_v4s32, 0b0, "@earlyclobber $Qd">;
5049 class MVE_VADCSBC<string iname, bit I, bit subtract,
5050 dag carryin, list<dag> pattern=[]>
5051 : MVE_qDest_qSrc<iname, "i32", (outs MQPR:$Qd, cl_FPSCR_NZCV:$carryout),
5052 !con((ins MQPR:$Qn, MQPR:$Qm), carryin),
5053 "$Qd, $Qn, $Qm", vpred_r, "", pattern> {
5056 let Inst{28} = subtract;
5057 let Inst{21-20} = 0b11;
5058 let Inst{19-17} = Qn{2-0};
5062 let Inst{7} = Qn{3};
5065 // Custom decoder method in order to add the FPSCR operand(s), which
5066 // Tablegen won't do right
5067 let DecoderMethod = "DecodeMVEVADCInstruction";
5070 def MVE_VADC : MVE_VADCSBC<"vadc", 0b0, 0b0, (ins cl_FPSCR_NZCV:$carryin)>;
5071 def MVE_VADCI : MVE_VADCSBC<"vadci", 0b1, 0b0, (ins)>;
5073 def MVE_VSBC : MVE_VADCSBC<"vsbc", 0b0, 0b1, (ins cl_FPSCR_NZCV:$carryin)>;
5074 def MVE_VSBCI : MVE_VADCSBC<"vsbci", 0b1, 0b1, (ins)>;
5076 class MVE_VQDMULL<string iname, string suffix, bit size, bit T,
5077 string cstr="", list<dag> pattern=[]>
5078 : MVE_qDest_qSrc<iname, suffix, (outs MQPR:$Qd),
5079 (ins MQPR:$Qn, MQPR:$Qm), "$Qd, $Qn, $Qm",
5080 vpred_r, cstr, pattern> {
5083 let Inst{28} = size;
5084 let Inst{21-20} = 0b11;
5085 let Inst{19-17} = Qn{2-0};
5089 let Inst{7} = Qn{3};
5091 let validForTailPredication = 1;
5092 let doubleWidthResult = 1;
5095 multiclass MVE_VQDMULL_m<string iname, MVEVectorVTInfo VTI, bit size, bit T,
5097 def "" : MVE_VQDMULL<iname, VTI.Suffix, size, T, cstr>;
5098 defvar Inst = !cast<Instruction>(NAME);
5100 let Predicates = [HasMVEInt] in {
5101 // Unpredicated saturating multiply
5102 def : Pat<(VTI.DblVec (int_arm_mve_vqdmull (VTI.Vec MQPR:$Qm),
5103 (VTI.Vec MQPR:$Qn), (i32 T))),
5104 (VTI.DblVec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>;
5105 // Predicated saturating multiply
5106 def : Pat<(VTI.DblVec (int_arm_mve_vqdmull_predicated
5107 (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
5108 (i32 T), (VTI.DblPred VCCR:$mask),
5109 (VTI.DblVec MQPR:$inactive))),
5110 (VTI.DblVec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
5111 ARMVCCThen, (VTI.DblPred VCCR:$mask),
5112 (VTI.DblVec MQPR:$inactive)))>;
5116 multiclass MVE_VQDMULL_halves<MVEVectorVTInfo VTI, bit size, string cstr=""> {
5117 defm bh : MVE_VQDMULL_m<"vqdmullb", VTI, size, 0b0, cstr>;
5118 defm th : MVE_VQDMULL_m<"vqdmullt", VTI, size, 0b1, cstr>;
5121 defm MVE_VQDMULLs16 : MVE_VQDMULL_halves<MVE_v8s16, 0b0>;
5122 defm MVE_VQDMULLs32 : MVE_VQDMULL_halves<MVE_v4s32, 0b1, "@earlyclobber $Qd">;
5124 // end of mve_qDest_qSrc
5126 // start of mve_qDest_rSrc
5128 class MVE_qr_base<dag oops, dag iops, InstrItinClass itin, string iname,
5129 string suffix, string ops, vpred_ops vpred, string cstr,
5130 list<dag> pattern=[]>
5131 : MVE_p<oops, iops, NoItinerary, iname, suffix, ops, vpred, cstr, pattern> {
5136 let Inst{25-23} = 0b100;
5137 let Inst{22} = Qd{3};
5138 let Inst{19-17} = Qn{2-0};
5139 let Inst{15-13} = Qd{2-0};
5140 let Inst{11-9} = 0b111;
5141 let Inst{7} = Qn{3};
5144 let Inst{3-0} = Rm{3-0};
5147 class MVE_qDest_rSrc<string iname, string suffix, string cstr="", list<dag> pattern=[]>
5148 : MVE_qr_base<(outs MQPR:$Qd), (ins MQPR:$Qn, rGPR:$Rm),
5149 NoItinerary, iname, suffix, "$Qd, $Qn, $Rm", vpred_r, cstr,
5152 class MVE_qDestSrc_rSrc<string iname, string suffix, list<dag> pattern=[]>
5153 : MVE_qr_base<(outs MQPR:$Qd), (ins MQPR:$Qd_src, MQPR:$Qn, rGPR:$Rm),
5154 NoItinerary, iname, suffix, "$Qd, $Qn, $Rm", vpred_n, "$Qd = $Qd_src",
5157 class MVE_qDest_single_rSrc<string iname, string suffix, list<dag> pattern=[]>
5158 : MVE_p<(outs MQPR:$Qd), (ins MQPR:$Qd_src, rGPR:$Rm), NoItinerary, iname,
5159 suffix, "$Qd, $Rm", vpred_n, "$Qd = $Qd_src", pattern> {
5163 let Inst{22} = Qd{3};
5164 let Inst{15-13} = Qd{2-0};
5165 let Inst{3-0} = Rm{3-0};
5168 // Patterns for vector-scalar instructions with integer operands
5169 multiclass MVE_vec_scalar_int_pat_m<Instruction inst, MVEVectorVTInfo VTI,
5170 SDPatternOperator unpred_op,
5171 SDPatternOperator pred_op,
5172 bit unpred_has_sign = 0,
5173 bit pred_has_sign = 0> {
5174 defvar UnpredSign = !if(unpred_has_sign, (? (i32 VTI.Unsigned)), (?));
5175 defvar PredSign = !if(pred_has_sign, (? (i32 VTI.Unsigned)), (?));
5177 let Predicates = [HasMVEInt] in {
5178 // Unpredicated version
5179 def : Pat<(VTI.Vec !con((unpred_op (VTI.Vec MQPR:$Qm),
5180 (VTI.Vec (ARMvdup rGPR:$val))),
5182 (VTI.Vec (inst (VTI.Vec MQPR:$Qm), (i32 rGPR:$val)))>;
5183 // Predicated version
5184 def : Pat<(VTI.Vec !con((pred_op (VTI.Vec MQPR:$Qm),
5185 (VTI.Vec (ARMvdup rGPR:$val))),
5187 (pred_op (VTI.Pred VCCR:$mask),
5188 (VTI.Vec MQPR:$inactive)))),
5189 (VTI.Vec (inst (VTI.Vec MQPR:$Qm), (i32 rGPR:$val),
5190 ARMVCCThen, (VTI.Pred VCCR:$mask),
5191 (VTI.Vec MQPR:$inactive)))>;
5195 class MVE_VADDSUB_qr<string iname, string suffix, bits<2> size,
5196 bit bit_5, bit bit_12, bit bit_16, bit bit_28>
5197 : MVE_qDest_rSrc<iname, suffix, ""> {
5199 let Inst{28} = bit_28;
5200 let Inst{21-20} = size;
5201 let Inst{16} = bit_16;
5202 let Inst{12} = bit_12;
5204 let Inst{5} = bit_5;
5205 let validForTailPredication = 1;
5208 // Vector-scalar add/sub
5209 multiclass MVE_VADDSUB_qr_m<string iname, MVEVectorVTInfo VTI, bit subtract,
5210 SDNode Op, Intrinsic PredInt> {
5211 def "" : MVE_VADDSUB_qr<iname, VTI.Suffix, VTI.Size, 0b0, subtract, 0b1, 0b0>;
5212 let Predicates = [HasMVEInt] in {
5213 defm : MVE_TwoOpPatternDup<VTI, Op, PredInt, (? ), !cast<Instruction>(NAME), ARMimmAllZerosV>;
5217 multiclass MVE_VADD_qr_m<MVEVectorVTInfo VTI>
5218 : MVE_VADDSUB_qr_m<"vadd", VTI, 0b0, add, int_arm_mve_add_predicated>;
5220 multiclass MVE_VSUB_qr_m<MVEVectorVTInfo VTI>
5221 : MVE_VADDSUB_qr_m<"vsub", VTI, 0b1, sub, int_arm_mve_sub_predicated>;
5223 defm MVE_VADD_qr_i8 : MVE_VADD_qr_m<MVE_v16i8>;
5224 defm MVE_VADD_qr_i16 : MVE_VADD_qr_m<MVE_v8i16>;
5225 defm MVE_VADD_qr_i32 : MVE_VADD_qr_m<MVE_v4i32>;
5227 defm MVE_VSUB_qr_i8 : MVE_VSUB_qr_m<MVE_v16i8>;
5228 defm MVE_VSUB_qr_i16 : MVE_VSUB_qr_m<MVE_v8i16>;
5229 defm MVE_VSUB_qr_i32 : MVE_VSUB_qr_m<MVE_v4i32>;
5231 // Vector-scalar saturating add/sub
5232 multiclass MVE_VQADDSUB_qr_m<string iname, MVEVectorVTInfo VTI, bit subtract,
5233 SDNode Op, Intrinsic PredInt> {
5234 def "" : MVE_VADDSUB_qr<iname, VTI.Suffix, VTI.Size, 0b1, subtract,
5237 let Predicates = [HasMVEInt] in {
5238 defm : MVE_TwoOpPatternDup<VTI, Op, PredInt, (? (i32 VTI.Unsigned)),
5239 !cast<Instruction>(NAME)>;
5243 multiclass MVE_VQADD_qr_m<MVEVectorVTInfo VTI, SDNode Op>
5244 : MVE_VQADDSUB_qr_m<"vqadd", VTI, 0b0, Op, int_arm_mve_qadd_predicated>;
5246 multiclass MVE_VQSUB_qr_m<MVEVectorVTInfo VTI, SDNode Op>
5247 : MVE_VQADDSUB_qr_m<"vqsub", VTI, 0b1, Op, int_arm_mve_qsub_predicated>;
5249 defm MVE_VQADD_qr_s8 : MVE_VQADD_qr_m<MVE_v16s8, saddsat>;
5250 defm MVE_VQADD_qr_s16 : MVE_VQADD_qr_m<MVE_v8s16, saddsat>;
5251 defm MVE_VQADD_qr_s32 : MVE_VQADD_qr_m<MVE_v4s32, saddsat>;
5252 defm MVE_VQADD_qr_u8 : MVE_VQADD_qr_m<MVE_v16u8, uaddsat>;
5253 defm MVE_VQADD_qr_u16 : MVE_VQADD_qr_m<MVE_v8u16, uaddsat>;
5254 defm MVE_VQADD_qr_u32 : MVE_VQADD_qr_m<MVE_v4u32, uaddsat>;
5256 defm MVE_VQSUB_qr_s8 : MVE_VQSUB_qr_m<MVE_v16s8, ssubsat>;
5257 defm MVE_VQSUB_qr_s16 : MVE_VQSUB_qr_m<MVE_v8s16, ssubsat>;
5258 defm MVE_VQSUB_qr_s32 : MVE_VQSUB_qr_m<MVE_v4s32, ssubsat>;
5259 defm MVE_VQSUB_qr_u8 : MVE_VQSUB_qr_m<MVE_v16u8, usubsat>;
5260 defm MVE_VQSUB_qr_u16 : MVE_VQSUB_qr_m<MVE_v8u16, usubsat>;
5261 defm MVE_VQSUB_qr_u32 : MVE_VQSUB_qr_m<MVE_v4u32, usubsat>;
5263 class MVE_VQDMULL_qr<string iname, string suffix, bit size,
5264 bit T, string cstr="", list<dag> pattern=[]>
5265 : MVE_qDest_rSrc<iname, suffix, cstr, pattern> {
5267 let Inst{28} = size;
5268 let Inst{21-20} = 0b11;
5273 let validForTailPredication = 1;
5274 let doubleWidthResult = 1;
5277 multiclass MVE_VQDMULL_qr_m<string iname, MVEVectorVTInfo VTI, bit size,
5278 bit T, string cstr> {
5279 def "" : MVE_VQDMULL_qr<iname, VTI.Suffix, size, T, cstr>;
5280 defvar Inst = !cast<Instruction>(NAME);
5282 let Predicates = [HasMVEInt] in {
5283 // Unpredicated saturating multiply
5284 def : Pat<(VTI.DblVec (int_arm_mve_vqdmull (VTI.Vec MQPR:$Qm),
5285 (VTI.Vec (ARMvdup rGPR:$val)),
5287 (VTI.DblVec (Inst (VTI.Vec MQPR:$Qm), (i32 rGPR:$val)))>;
5288 // Predicated saturating multiply
5289 def : Pat<(VTI.DblVec (int_arm_mve_vqdmull_predicated
5291 (VTI.Vec (ARMvdup rGPR:$val)),
5293 (VTI.DblPred VCCR:$mask),
5294 (VTI.DblVec MQPR:$inactive))),
5295 (VTI.DblVec (Inst (VTI.Vec MQPR:$Qm), (i32 rGPR:$val),
5296 ARMVCCThen, (VTI.DblPred VCCR:$mask),
5297 (VTI.DblVec MQPR:$inactive)))>;
5301 multiclass MVE_VQDMULL_qr_halves<MVEVectorVTInfo VTI, bit size, string cstr=""> {
5302 defm bh : MVE_VQDMULL_qr_m<"vqdmullb", VTI, size, 0b0, cstr>;
5303 defm th : MVE_VQDMULL_qr_m<"vqdmullt", VTI, size, 0b1, cstr>;
5306 defm MVE_VQDMULL_qr_s16 : MVE_VQDMULL_qr_halves<MVE_v8s16, 0b0>;
5307 defm MVE_VQDMULL_qr_s32 : MVE_VQDMULL_qr_halves<MVE_v4s32, 0b1, "@earlyclobber $Qd">;
5309 class MVE_VxADDSUB_qr<string iname, string suffix,
5310 bit bit_28, bits<2> bits_21_20, bit subtract,
5311 list<dag> pattern=[]>
5312 : MVE_qDest_rSrc<iname, suffix, "", pattern> {
5314 let Inst{28} = bit_28;
5315 let Inst{21-20} = bits_21_20;
5317 let Inst{12} = subtract;
5320 let validForTailPredication = 1;
5323 multiclass MVE_VHADDSUB_qr_m<string iname, MVEVectorVTInfo VTI, bit subtract,
5324 Intrinsic unpred_int, Intrinsic pred_int> {
5325 def "" : MVE_VxADDSUB_qr<iname, VTI.Suffix, VTI.Unsigned, VTI.Size, subtract>;
5326 defm : MVE_vec_scalar_int_pat_m<!cast<Instruction>(NAME),
5327 VTI, unpred_int, pred_int, 1, 1>;
5330 multiclass MVE_VHADD_qr_m<MVEVectorVTInfo VTI> :
5331 MVE_VHADDSUB_qr_m<"vhadd", VTI, 0b0, int_arm_mve_vhadd,
5332 int_arm_mve_hadd_predicated>;
5334 multiclass MVE_VHSUB_qr_m<MVEVectorVTInfo VTI> :
5335 MVE_VHADDSUB_qr_m<"vhsub", VTI, 0b1, int_arm_mve_vhsub,
5336 int_arm_mve_hsub_predicated>;
5338 defm MVE_VHADD_qr_s8 : MVE_VHADD_qr_m<MVE_v16s8>;
5339 defm MVE_VHADD_qr_s16 : MVE_VHADD_qr_m<MVE_v8s16>;
5340 defm MVE_VHADD_qr_s32 : MVE_VHADD_qr_m<MVE_v4s32>;
5341 defm MVE_VHADD_qr_u8 : MVE_VHADD_qr_m<MVE_v16u8>;
5342 defm MVE_VHADD_qr_u16 : MVE_VHADD_qr_m<MVE_v8u16>;
5343 defm MVE_VHADD_qr_u32 : MVE_VHADD_qr_m<MVE_v4u32>;
5345 defm MVE_VHSUB_qr_s8 : MVE_VHSUB_qr_m<MVE_v16s8>;
5346 defm MVE_VHSUB_qr_s16 : MVE_VHSUB_qr_m<MVE_v8s16>;
5347 defm MVE_VHSUB_qr_s32 : MVE_VHSUB_qr_m<MVE_v4s32>;
5348 defm MVE_VHSUB_qr_u8 : MVE_VHSUB_qr_m<MVE_v16u8>;
5349 defm MVE_VHSUB_qr_u16 : MVE_VHSUB_qr_m<MVE_v8u16>;
5350 defm MVE_VHSUB_qr_u32 : MVE_VHSUB_qr_m<MVE_v4u32>;
5352 multiclass MVE_VADDSUB_qr_f<string iname, MVEVectorVTInfo VTI, bit subtract,
5353 SDNode Op, Intrinsic PredInt> {
5354 def "" : MVE_VxADDSUB_qr<iname, VTI.Suffix, VTI.Size{0}, 0b11, subtract>;
5355 defm : MVE_TwoOpPatternDup<VTI, Op, PredInt, (? ),
5356 !cast<Instruction>(NAME)>;
5359 let Predicates = [HasMVEFloat] in {
5360 defm MVE_VADD_qr_f32 : MVE_VADDSUB_qr_f<"vadd", MVE_v4f32, 0b0, fadd,
5361 int_arm_mve_add_predicated>;
5362 defm MVE_VADD_qr_f16 : MVE_VADDSUB_qr_f<"vadd", MVE_v8f16, 0b0, fadd,
5363 int_arm_mve_add_predicated>;
5365 defm MVE_VSUB_qr_f32 : MVE_VADDSUB_qr_f<"vsub", MVE_v4f32, 0b1, fsub,
5366 int_arm_mve_sub_predicated>;
5367 defm MVE_VSUB_qr_f16 : MVE_VADDSUB_qr_f<"vsub", MVE_v8f16, 0b1, fsub,
5368 int_arm_mve_sub_predicated>;
5371 class MVE_VxSHL_qr<string iname, string suffix, bit U, bits<2> size,
5372 bit bit_7, bit bit_17, list<dag> pattern=[]>
5373 : MVE_qDest_single_rSrc<iname, suffix, pattern> {
5376 let Inst{25-23} = 0b100;
5377 let Inst{21-20} = 0b11;
5378 let Inst{19-18} = size;
5379 let Inst{17} = bit_17;
5381 let Inst{12-8} = 0b11110;
5382 let Inst{7} = bit_7;
5383 let Inst{6-4} = 0b110;
5384 let validForTailPredication = 1;
5387 multiclass MVE_VxSHL_qr_p<string iname, MVEVectorVTInfo VTI, bit q, bit r> {
5388 def "" : MVE_VxSHL_qr<iname, VTI.Suffix, VTI.Unsigned, VTI.Size, q, r>;
5389 defvar Inst = !cast<Instruction>(NAME);
5391 def : Pat<(VTI.Vec (int_arm_mve_vshl_scalar
5392 (VTI.Vec MQPR:$in), (i32 rGPR:$sh),
5393 (i32 q), (i32 r), (i32 VTI.Unsigned))),
5394 (VTI.Vec (Inst (VTI.Vec MQPR:$in), (i32 rGPR:$sh)))>;
5396 def : Pat<(VTI.Vec (int_arm_mve_vshl_scalar_predicated
5397 (VTI.Vec MQPR:$in), (i32 rGPR:$sh),
5398 (i32 q), (i32 r), (i32 VTI.Unsigned),
5399 (VTI.Pred VCCR:$mask))),
5400 (VTI.Vec (Inst (VTI.Vec MQPR:$in), (i32 rGPR:$sh),
5401 ARMVCCThen, (VTI.Pred VCCR:$mask)))>;
5404 multiclass MVE_VxSHL_qr_types<string iname, bit bit_7, bit bit_17> {
5405 defm s8 : MVE_VxSHL_qr_p<iname, MVE_v16s8, bit_7, bit_17>;
5406 defm s16 : MVE_VxSHL_qr_p<iname, MVE_v8s16, bit_7, bit_17>;
5407 defm s32 : MVE_VxSHL_qr_p<iname, MVE_v4s32, bit_7, bit_17>;
5408 defm u8 : MVE_VxSHL_qr_p<iname, MVE_v16u8, bit_7, bit_17>;
5409 defm u16 : MVE_VxSHL_qr_p<iname, MVE_v8u16, bit_7, bit_17>;
5410 defm u32 : MVE_VxSHL_qr_p<iname, MVE_v4u32, bit_7, bit_17>;
5413 defm MVE_VSHL_qr : MVE_VxSHL_qr_types<"vshl", 0b0, 0b0>;
5414 defm MVE_VRSHL_qr : MVE_VxSHL_qr_types<"vrshl", 0b0, 0b1>;
5415 defm MVE_VQSHL_qr : MVE_VxSHL_qr_types<"vqshl", 0b1, 0b0>;
5416 defm MVE_VQRSHL_qr : MVE_VxSHL_qr_types<"vqrshl", 0b1, 0b1>;
5418 let Predicates = [HasMVEInt] in {
5419 def : Pat<(v4i32 (ARMvshlu (v4i32 MQPR:$Qm), (v4i32 (ARMvdup rGPR:$Rm)))),
5420 (v4i32 (MVE_VSHL_qru32 (v4i32 MQPR:$Qm), rGPR:$Rm))>;
5421 def : Pat<(v8i16 (ARMvshlu (v8i16 MQPR:$Qm), (v8i16 (ARMvdup rGPR:$Rm)))),
5422 (v8i16 (MVE_VSHL_qru16 (v8i16 MQPR:$Qm), rGPR:$Rm))>;
5423 def : Pat<(v16i8 (ARMvshlu (v16i8 MQPR:$Qm), (v16i8 (ARMvdup rGPR:$Rm)))),
5424 (v16i8 (MVE_VSHL_qru8 (v16i8 MQPR:$Qm), rGPR:$Rm))>;
5426 def : Pat<(v4i32 (ARMvshls (v4i32 MQPR:$Qm), (v4i32 (ARMvdup rGPR:$Rm)))),
5427 (v4i32 (MVE_VSHL_qrs32 (v4i32 MQPR:$Qm), rGPR:$Rm))>;
5428 def : Pat<(v8i16 (ARMvshls (v8i16 MQPR:$Qm), (v8i16 (ARMvdup rGPR:$Rm)))),
5429 (v8i16 (MVE_VSHL_qrs16 (v8i16 MQPR:$Qm), rGPR:$Rm))>;
5430 def : Pat<(v16i8 (ARMvshls (v16i8 MQPR:$Qm), (v16i8 (ARMvdup rGPR:$Rm)))),
5431 (v16i8 (MVE_VSHL_qrs8 (v16i8 MQPR:$Qm), rGPR:$Rm))>;
5434 class MVE_VBRSR<string iname, string suffix, bits<2> size, list<dag> pattern=[]>
5435 : MVE_qDest_rSrc<iname, suffix, "", pattern> {
5438 let Inst{21-20} = size;
5443 let validForTailPredication = 1;
5446 def MVE_VBRSR8 : MVE_VBRSR<"vbrsr", "8", 0b00>;
5447 def MVE_VBRSR16 : MVE_VBRSR<"vbrsr", "16", 0b01>;
5448 def MVE_VBRSR32 : MVE_VBRSR<"vbrsr", "32", 0b10>;
5450 multiclass MVE_VBRSR_pat_m<MVEVectorVTInfo VTI, Instruction Inst> {
5452 def : Pat<(VTI.Vec (int_arm_mve_vbrsr (VTI.Vec MQPR:$Qn), (i32 rGPR:$Rm))),
5453 (VTI.Vec (Inst (VTI.Vec MQPR:$Qn), (i32 rGPR:$Rm)))>;
5455 def : Pat<(VTI.Vec (int_arm_mve_vbrsr_predicated
5456 (VTI.Vec MQPR:$inactive),
5457 (VTI.Vec MQPR:$Qn), (i32 rGPR:$Rm),
5458 (VTI.Pred VCCR:$mask))),
5459 (VTI.Vec (Inst (VTI.Vec MQPR:$Qn), (i32 rGPR:$Rm),
5460 ARMVCCThen, (VTI.Pred VCCR:$mask),
5461 (VTI.Vec MQPR:$inactive)))>;
5464 let Predicates = [HasMVEInt] in {
5465 def : Pat<(v16i8 ( bitreverse (v16i8 MQPR:$val1))),
5466 (v16i8 ( MVE_VBRSR8 (v16i8 MQPR:$val1), (t2MOVi (i32 8)) ))>;
5468 def : Pat<(v4i32 ( bitreverse (v4i32 MQPR:$val1))),
5469 (v4i32 ( MVE_VBRSR32 (v4i32 MQPR:$val1), (t2MOVi (i32 32)) ))>;
5471 def : Pat<(v8i16 ( bitreverse (v8i16 MQPR:$val1))),
5472 (v8i16 ( MVE_VBRSR16 (v8i16 MQPR:$val1), (t2MOVi (i32 16)) ))>;
5474 defm : MVE_VBRSR_pat_m<MVE_v16i8, MVE_VBRSR8>;
5475 defm : MVE_VBRSR_pat_m<MVE_v8i16, MVE_VBRSR16>;
5476 defm : MVE_VBRSR_pat_m<MVE_v4i32, MVE_VBRSR32>;
5479 let Predicates = [HasMVEFloat] in {
5480 defm : MVE_VBRSR_pat_m<MVE_v8f16, MVE_VBRSR16>;
5481 defm : MVE_VBRSR_pat_m<MVE_v4f32, MVE_VBRSR32>;
5484 class MVE_VMUL_qr_int<string iname, string suffix, bits<2> size>
5485 : MVE_qDest_rSrc<iname, suffix, ""> {
5488 let Inst{21-20} = size;
5493 let validForTailPredication = 1;
5496 multiclass MVE_VMUL_qr_int_m<MVEVectorVTInfo VTI> {
5497 def "" : MVE_VMUL_qr_int<"vmul", VTI.Suffix, VTI.Size>;
5498 let Predicates = [HasMVEInt] in {
5499 defm : MVE_TwoOpPatternDup<VTI, mul, int_arm_mve_mul_predicated, (? ),
5500 !cast<Instruction>(NAME), ARMimmOneV>;
5504 defm MVE_VMUL_qr_i8 : MVE_VMUL_qr_int_m<MVE_v16i8>;
5505 defm MVE_VMUL_qr_i16 : MVE_VMUL_qr_int_m<MVE_v8i16>;
5506 defm MVE_VMUL_qr_i32 : MVE_VMUL_qr_int_m<MVE_v4i32>;
5508 class MVE_VxxMUL_qr<string iname, string suffix,
5509 bit bit_28, bits<2> bits_21_20, list<dag> pattern=[]>
5510 : MVE_qDest_rSrc<iname, suffix, "", pattern> {
5512 let Inst{28} = bit_28;
5513 let Inst{21-20} = bits_21_20;
5518 let validForTailPredication = 1;
5521 multiclass MVE_VxxMUL_qr_m<string iname, MVEVectorVTInfo VTI, bit bit_28,
5522 PatFrag Op, Intrinsic int_unpred, Intrinsic int_pred> {
5523 def "" : MVE_VxxMUL_qr<iname, VTI.Suffix, bit_28, VTI.Size>;
5525 let Predicates = [HasMVEInt] in {
5526 defm : MVE_TwoOpPatternDup<VTI, Op, int_pred, (? ), !cast<Instruction>(NAME)>;
5528 defm : MVE_vec_scalar_int_pat_m<!cast<Instruction>(NAME), VTI, int_unpred, int_pred>;
5531 multiclass MVE_VQDMULH_qr_m<MVEVectorVTInfo VTI> :
5532 MVE_VxxMUL_qr_m<"vqdmulh", VTI, 0b0, MVEvqdmulh,
5533 int_arm_mve_vqdmulh, int_arm_mve_qdmulh_predicated>;
5535 multiclass MVE_VQRDMULH_qr_m<MVEVectorVTInfo VTI> :
5536 MVE_VxxMUL_qr_m<"vqrdmulh", VTI, 0b1, null_frag,
5537 int_arm_mve_vqrdmulh, int_arm_mve_qrdmulh_predicated>;
5539 defm MVE_VQDMULH_qr_s8 : MVE_VQDMULH_qr_m<MVE_v16s8>;
5540 defm MVE_VQDMULH_qr_s16 : MVE_VQDMULH_qr_m<MVE_v8s16>;
5541 defm MVE_VQDMULH_qr_s32 : MVE_VQDMULH_qr_m<MVE_v4s32>;
5543 defm MVE_VQRDMULH_qr_s8 : MVE_VQRDMULH_qr_m<MVE_v16s8>;
5544 defm MVE_VQRDMULH_qr_s16 : MVE_VQRDMULH_qr_m<MVE_v8s16>;
5545 defm MVE_VQRDMULH_qr_s32 : MVE_VQRDMULH_qr_m<MVE_v4s32>;
5547 multiclass MVE_VxxMUL_qr_f_m<MVEVectorVTInfo VTI> {
5548 let validForTailPredication = 1 in
5549 def "" : MVE_VxxMUL_qr<"vmul", VTI.Suffix, VTI.Size{0}, 0b11>;
5550 defm : MVE_TwoOpPatternDup<VTI, fmul, int_arm_mve_mul_predicated, (? ),
5551 !cast<Instruction>(NAME)>;
5554 let Predicates = [HasMVEFloat] in {
5555 defm MVE_VMUL_qr_f16 : MVE_VxxMUL_qr_f_m<MVE_v8f16>;
5556 defm MVE_VMUL_qr_f32 : MVE_VxxMUL_qr_f_m<MVE_v4f32>;
5559 class MVE_VFMAMLA_qr<string iname, string suffix,
5560 bit bit_28, bits<2> bits_21_20, bit S,
5561 list<dag> pattern=[]>
5562 : MVE_qDestSrc_rSrc<iname, suffix, pattern> {
5564 let Inst{28} = bit_28;
5565 let Inst{21-20} = bits_21_20;
5570 let validForTailPredication = 1;
5571 let hasSideEffects = 0;
5574 multiclass MVE_VMLA_qr_multi<string iname, MVEVectorVTInfo VTI,
5575 bit scalar_addend> {
5576 def "": MVE_VFMAMLA_qr<iname, VTI.Suffix, VTI.Unsigned, VTI.Size,
5578 defvar Inst = !cast<Instruction>(NAME);
5579 defvar pred_int = !cast<Intrinsic>("int_arm_mve_" # iname # "_n_predicated");
5580 defvar v1 = (VTI.Vec MQPR:$v1);
5581 defvar v2 = (VTI.Vec MQPR:$v2);
5582 defvar vs = (VTI.Vec (ARMvdup rGPR:$s));
5583 defvar s = (i32 rGPR:$s);
5584 defvar pred = (VTI.Pred VCCR:$pred);
5586 // The signed and unsigned variants of this instruction have different
5587 // encodings, but they're functionally identical. For the sake of
5588 // determinism, we generate only the unsigned variant.
5589 if VTI.Unsigned then let Predicates = [HasMVEInt] in {
5590 if scalar_addend then {
5591 def : Pat<(VTI.Vec (add (mul v1, v2), vs)),
5592 (VTI.Vec (Inst v1, v2, s))>;
5594 def : Pat<(VTI.Vec (add (mul v2, vs), v1)),
5595 (VTI.Vec (Inst v1, v2, s))>;
5598 def : Pat<(VTI.Vec (pred_int v1, v2, s, pred)),
5599 (VTI.Vec (Inst v1, v2, s, ARMVCCThen, pred))>;
5603 defm MVE_VMLA_qr_s8 : MVE_VMLA_qr_multi<"vmla", MVE_v16s8, 0b0>;
5604 defm MVE_VMLA_qr_s16 : MVE_VMLA_qr_multi<"vmla", MVE_v8s16, 0b0>;
5605 defm MVE_VMLA_qr_s32 : MVE_VMLA_qr_multi<"vmla", MVE_v4s32, 0b0>;
5606 defm MVE_VMLA_qr_u8 : MVE_VMLA_qr_multi<"vmla", MVE_v16u8, 0b0>;
5607 defm MVE_VMLA_qr_u16 : MVE_VMLA_qr_multi<"vmla", MVE_v8u16, 0b0>;
5608 defm MVE_VMLA_qr_u32 : MVE_VMLA_qr_multi<"vmla", MVE_v4u32, 0b0>;
5610 defm MVE_VMLAS_qr_s8 : MVE_VMLA_qr_multi<"vmlas", MVE_v16s8, 0b1>;
5611 defm MVE_VMLAS_qr_s16 : MVE_VMLA_qr_multi<"vmlas", MVE_v8s16, 0b1>;
5612 defm MVE_VMLAS_qr_s32 : MVE_VMLA_qr_multi<"vmlas", MVE_v4s32, 0b1>;
5613 defm MVE_VMLAS_qr_u8 : MVE_VMLA_qr_multi<"vmlas", MVE_v16u8, 0b1>;
5614 defm MVE_VMLAS_qr_u16 : MVE_VMLA_qr_multi<"vmlas", MVE_v8u16, 0b1>;
5615 defm MVE_VMLAS_qr_u32 : MVE_VMLA_qr_multi<"vmlas", MVE_v4u32, 0b1>;
5617 multiclass MVE_VFMA_qr_multi<string iname, MVEVectorVTInfo VTI,
5618 bit scalar_addend> {
5619 def "": MVE_VFMAMLA_qr<iname, VTI.Suffix, VTI.Size{0}, 0b11, scalar_addend>;
5620 defvar Inst = !cast<Instruction>(NAME);
5621 defvar pred_int = int_arm_mve_fma_predicated;
5622 defvar v1 = (VTI.Vec MQPR:$v1);
5623 defvar v2 = (VTI.Vec MQPR:$v2);
5624 defvar vs = (VTI.Vec (ARMvdup (i32 rGPR:$s)));
5625 defvar is = (i32 rGPR:$s);
5626 defvar pred = (VTI.Pred VCCR:$pred);
5628 let Predicates = [HasMVEFloat] in {
5629 if scalar_addend then {
5630 def : Pat<(VTI.Vec (fma v1, v2, vs)),
5631 (VTI.Vec (Inst v1, v2, is))>;
5632 def : Pat<(VTI.Vec (vselect (VTI.Pred VCCR:$pred),
5633 (VTI.Vec (fma v1, v2, vs)),
5635 (VTI.Vec (Inst v1, v2, is, ARMVCCThen, $pred))>;
5636 def : Pat<(VTI.Vec (pred_int v1, v2, vs, pred)),
5637 (VTI.Vec (Inst v1, v2, is, ARMVCCThen, pred))>;
5639 def : Pat<(VTI.Vec (fma v1, vs, v2)),
5640 (VTI.Vec (Inst v2, v1, is))>;
5641 def : Pat<(VTI.Vec (fma vs, v1, v2)),
5642 (VTI.Vec (Inst v2, v1, is))>;
5643 def : Pat<(VTI.Vec (vselect (VTI.Pred VCCR:$pred),
5644 (VTI.Vec (fma vs, v2, v1)),
5646 (VTI.Vec (Inst v1, v2, is, ARMVCCThen, $pred))>;
5647 def : Pat<(VTI.Vec (vselect (VTI.Pred VCCR:$pred),
5648 (VTI.Vec (fma v2, vs, v1)),
5650 (VTI.Vec (Inst v1, v2, is, ARMVCCThen, $pred))>;
5651 def : Pat<(VTI.Vec (pred_int v1, vs, v2, pred)),
5652 (VTI.Vec (Inst v2, v1, is, ARMVCCThen, pred))>;
5653 def : Pat<(VTI.Vec (pred_int vs, v1, v2, pred)),
5654 (VTI.Vec (Inst v2, v1, is, ARMVCCThen, pred))>;
5659 let Predicates = [HasMVEFloat] in {
5660 defm MVE_VFMA_qr_f16 : MVE_VFMA_qr_multi<"vfma", MVE_v8f16, 0>;
5661 defm MVE_VFMA_qr_f32 : MVE_VFMA_qr_multi<"vfma", MVE_v4f32, 0>;
5662 defm MVE_VFMA_qr_Sf16 : MVE_VFMA_qr_multi<"vfmas", MVE_v8f16, 1>;
5663 defm MVE_VFMA_qr_Sf32 : MVE_VFMA_qr_multi<"vfmas", MVE_v4f32, 1>;
5666 class MVE_VQDMLAH_qr<string iname, string suffix, bit U, bits<2> size,
5667 bit bit_5, bit bit_12, list<dag> pattern=[]>
5668 : MVE_qDestSrc_rSrc<iname, suffix, pattern> {
5671 let Inst{21-20} = size;
5673 let Inst{12} = bit_12;
5675 let Inst{5} = bit_5;
5678 multiclass MVE_VQDMLAH_qr_multi<string iname, MVEVectorVTInfo VTI,
5679 bit bit_5, bit bit_12> {
5680 def "": MVE_VQDMLAH_qr<iname, VTI.Suffix, 0b0, VTI.Size, bit_5, bit_12>;
5681 defvar Inst = !cast<Instruction>(NAME);
5682 defvar unpred_int = !cast<Intrinsic>("int_arm_mve_" # iname);
5683 defvar pred_int = !cast<Intrinsic>("int_arm_mve_" # iname # "_predicated");
5685 let Predicates = [HasMVEInt] in {
5686 def : Pat<(VTI.Vec (unpred_int (VTI.Vec MQPR:$v1), (VTI.Vec MQPR:$v2),
5688 (VTI.Vec (Inst (VTI.Vec MQPR:$v1), (VTI.Vec MQPR:$v2),
5690 def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$v1), (VTI.Vec MQPR:$v2),
5691 (i32 rGPR:$s), (VTI.Pred VCCR:$pred))),
5692 (VTI.Vec (Inst (VTI.Vec MQPR:$v1), (VTI.Vec MQPR:$v2),
5693 (i32 rGPR:$s), ARMVCCThen,
5694 (VTI.Pred VCCR:$pred)))>;
5698 multiclass MVE_VQDMLAH_qr_types<string iname, bit bit_5, bit bit_12> {
5699 defm s8 : MVE_VQDMLAH_qr_multi<iname, MVE_v16s8, bit_5, bit_12>;
5700 defm s16 : MVE_VQDMLAH_qr_multi<iname, MVE_v8s16, bit_5, bit_12>;
5701 defm s32 : MVE_VQDMLAH_qr_multi<iname, MVE_v4s32, bit_5, bit_12>;
5704 defm MVE_VQDMLAH_qr : MVE_VQDMLAH_qr_types<"vqdmlah", 0b1, 0b0>;
5705 defm MVE_VQRDMLAH_qr : MVE_VQDMLAH_qr_types<"vqrdmlah", 0b0, 0b0>;
5706 defm MVE_VQDMLASH_qr : MVE_VQDMLAH_qr_types<"vqdmlash", 0b1, 0b1>;
5707 defm MVE_VQRDMLASH_qr : MVE_VQDMLAH_qr_types<"vqrdmlash", 0b0, 0b1>;
5709 class MVE_VxDUP<string iname, string suffix, bits<2> size, bit bit_12,
5710 ValueType VT, SDPatternOperator vxdup>
5711 : MVE_p<(outs MQPR:$Qd, tGPREven:$Rn),
5712 (ins tGPREven:$Rn_src, MVE_VIDUP_imm:$imm), NoItinerary,
5713 iname, suffix, "$Qd, $Rn, $imm", vpred_r, "$Rn = $Rn_src",
5714 [(set (VT MQPR:$Qd), (i32 tGPREven:$Rn),
5715 (vxdup (i32 tGPREven:$Rn_src), (i32 imm:$imm)))]> {
5721 let Inst{25-23} = 0b100;
5722 let Inst{22} = Qd{3};
5723 let Inst{21-20} = size;
5724 let Inst{19-17} = Rn{3-1};
5726 let Inst{15-13} = Qd{2-0};
5727 let Inst{12} = bit_12;
5728 let Inst{11-8} = 0b1111;
5729 let Inst{7} = imm{1};
5730 let Inst{6-1} = 0b110111;
5731 let Inst{0} = imm{0};
5732 let validForTailPredication = 1;
5733 let hasSideEffects = 0;
5736 def MVE_VIDUPu8 : MVE_VxDUP<"vidup", "u8", 0b00, 0b0, v16i8, ARMvidup>;
5737 def MVE_VIDUPu16 : MVE_VxDUP<"vidup", "u16", 0b01, 0b0, v8i16, ARMvidup>;
5738 def MVE_VIDUPu32 : MVE_VxDUP<"vidup", "u32", 0b10, 0b0, v4i32, ARMvidup>;
5740 def MVE_VDDUPu8 : MVE_VxDUP<"vddup", "u8", 0b00, 0b1, v16i8, null_frag>;
5741 def MVE_VDDUPu16 : MVE_VxDUP<"vddup", "u16", 0b01, 0b1, v8i16, null_frag>;
5742 def MVE_VDDUPu32 : MVE_VxDUP<"vddup", "u32", 0b10, 0b1, v4i32, null_frag>;
5744 class MVE_VxWDUP<string iname, string suffix, bits<2> size, bit bit_12,
5745 list<dag> pattern=[]>
5746 : MVE_p<(outs MQPR:$Qd, tGPREven:$Rn),
5747 (ins tGPREven:$Rn_src, tGPROdd:$Rm, MVE_VIDUP_imm:$imm), NoItinerary,
5748 iname, suffix, "$Qd, $Rn, $Rm, $imm", vpred_r, "$Rn = $Rn_src",
5756 let Inst{25-23} = 0b100;
5757 let Inst{22} = Qd{3};
5758 let Inst{21-20} = size;
5759 let Inst{19-17} = Rn{3-1};
5761 let Inst{15-13} = Qd{2-0};
5762 let Inst{12} = bit_12;
5763 let Inst{11-8} = 0b1111;
5764 let Inst{7} = imm{1};
5765 let Inst{6-4} = 0b110;
5766 let Inst{3-1} = Rm{3-1};
5767 let Inst{0} = imm{0};
5768 let validForTailPredication = 1;
5769 let hasSideEffects = 0;
5772 def MVE_VIWDUPu8 : MVE_VxWDUP<"viwdup", "u8", 0b00, 0b0>;
5773 def MVE_VIWDUPu16 : MVE_VxWDUP<"viwdup", "u16", 0b01, 0b0>;
5774 def MVE_VIWDUPu32 : MVE_VxWDUP<"viwdup", "u32", 0b10, 0b0>;
5776 def MVE_VDWDUPu8 : MVE_VxWDUP<"vdwdup", "u8", 0b00, 0b1>;
5777 def MVE_VDWDUPu16 : MVE_VxWDUP<"vdwdup", "u16", 0b01, 0b1>;
5778 def MVE_VDWDUPu32 : MVE_VxWDUP<"vdwdup", "u32", 0b10, 0b1>;
5780 let isReMaterializable = 1 in
5781 class MVE_VCTPInst<string suffix, bits<2> size, list<dag> pattern=[]>
5782 : MVE_p<(outs VCCR:$P0), (ins rGPR:$Rn), NoItinerary, "vctp", suffix,
5783 "$Rn", vpred_n, "", pattern> {
5786 let Inst{28-27} = 0b10;
5787 let Inst{26-22} = 0b00000;
5788 let Inst{21-20} = size;
5789 let Inst{19-16} = Rn{3-0};
5790 let Inst{15-11} = 0b11101;
5791 let Inst{10-0} = 0b00000000001;
5792 let Unpredictable{10-0} = 0b11111111111;
5794 let Constraints = "";
5795 let DecoderMethod = "DecodeMveVCTP";
5796 let validForTailPredication = 1;
5799 multiclass MVE_VCTP<MVEVectorVTInfo VTI, Intrinsic intr> {
5800 def "": MVE_VCTPInst<VTI.BitsSuffix, VTI.Size>;
5801 defvar Inst = !cast<Instruction>(NAME);
5803 let Predicates = [HasMVEInt] in {
5804 def : Pat<(intr rGPR:$Rn),
5805 (VTI.Pred (Inst rGPR:$Rn))>;
5806 def : Pat<(and (intr rGPR:$Rn), (VTI.Pred VCCR:$mask)),
5807 (VTI.Pred (Inst rGPR:$Rn, ARMVCCThen, VCCR:$mask))>;
5811 defm MVE_VCTP8 : MVE_VCTP<MVE_v16i8, int_arm_mve_vctp8>;
5812 defm MVE_VCTP16 : MVE_VCTP<MVE_v8i16, int_arm_mve_vctp16>;
5813 defm MVE_VCTP32 : MVE_VCTP<MVE_v4i32, int_arm_mve_vctp32>;
5814 defm MVE_VCTP64 : MVE_VCTP<MVE_v2i64, int_arm_mve_vctp64>;
5816 // end of mve_qDest_rSrc
5818 // start of coproc mov
5820 class MVE_VMOV_64bit<dag oops, dag iops, bit to_qreg, string ops, string cstr>
5821 : MVE_VMOV_lane_base<oops, !con(iops, (ins MVEPairVectorIndex2:$idx,
5822 MVEPairVectorIndex0:$idx2)),
5823 NoItinerary, "vmov", "", ops, cstr, []> {
5830 let Inst{31-23} = 0b111011000;
5831 let Inst{22} = Qd{3};
5833 let Inst{20} = to_qreg;
5834 let Inst{19-16} = Rt2{3-0};
5835 let Inst{15-13} = Qd{2-0};
5836 let Inst{12-5} = 0b01111000;
5838 let Inst{3-0} = Rt{3-0};
5840 let hasSideEffects = 0;
5843 // The assembly syntax for these instructions mentions the vector
5844 // register name twice, e.g.
5846 // vmov q2[2], q2[0], r0, r1
5847 // vmov r0, r1, q2[2], q2[0]
5849 // which needs a bit of juggling with MC operand handling.
5851 // For the move _into_ a vector register, the MC operand list also has
5852 // to mention the register name twice: once as the output, and once as
5853 // an extra input to represent where the unchanged half of the output
5854 // register comes from (when this instruction is used in code
5855 // generation). So we arrange that the first mention of the vector reg
5856 // in the instruction is considered by the AsmMatcher to be the output
5857 // ($Qd), and the second one is the input ($QdSrc). Binding them
5858 // together with the existing 'tie' constraint is enough to enforce at
5859 // register allocation time that they have to be the same register.
5861 // For the move _from_ a vector register, there's no way to get round
5862 // the fact that both instances of that register name have to be
5863 // inputs. They have to be the same register again, but this time, we
5864 // can't use a tie constraint, because that has to be between an
5865 // output and an input operand. So this time, we have to arrange that
5866 // the q-reg appears just once in the MC operand list, in spite of
5867 // being mentioned twice in the asm syntax - which needs a custom
5868 // AsmMatchConverter.
5870 def MVE_VMOV_q_rr : MVE_VMOV_64bit<(outs MQPR:$Qd),
5871 (ins MQPR:$QdSrc, rGPR:$Rt, rGPR:$Rt2),
5872 0b1, "$Qd$idx, $QdSrc$idx2, $Rt, $Rt2",
5874 let DecoderMethod = "DecodeMVEVMOVDRegtoQ";
5877 def MVE_VMOV_rr_q : MVE_VMOV_64bit<(outs rGPR:$Rt, rGPR:$Rt2), (ins MQPR:$Qd),
5878 0b0, "$Rt, $Rt2, $Qd$idx, $Qd$idx2", ""> {
5879 let DecoderMethod = "DecodeMVEVMOVQtoDReg";
5880 let AsmMatchConverter = "cvtMVEVMOVQtoDReg";
5883 let Predicates = [HasMVEInt] in {
5884 // Double lane moves. There are a number of patterns here. We know that the
5885 // insertelt's will be in descending order by index, and need to match the 5
5886 // patterns that might contain 2-0 or 3-1 pairs. These are:
5887 // 3 2 1 0 -> vmovqrr 31; vmovqrr 20
5888 // 3 2 1 -> vmovqrr 31; vmov 2
5889 // 3 1 -> vmovqrr 31
5890 // 2 1 0 -> vmovqrr 20; vmov 1
5891 // 2 0 -> vmovqrr 20
5892 // The other potential patterns will be handled by single lane inserts.
5893 def : Pat<(insertelt (insertelt (insertelt (insertelt (v4i32 MQPR:$src1),
5894 rGPR:$srcA, (i32 0)),
5895 rGPR:$srcB, (i32 1)),
5896 rGPR:$srcC, (i32 2)),
5897 rGPR:$srcD, (i32 3)),
5898 (MVE_VMOV_q_rr (MVE_VMOV_q_rr MQPR:$src1, rGPR:$srcA, rGPR:$srcC, (i32 2), (i32 0)),
5899 rGPR:$srcB, rGPR:$srcD, (i32 3), (i32 1))>;
5900 def : Pat<(insertelt (insertelt (insertelt (v4i32 MQPR:$src1),
5901 rGPR:$srcB, (i32 1)),
5902 rGPR:$srcC, (i32 2)),
5903 rGPR:$srcD, (i32 3)),
5904 (MVE_VMOV_q_rr (MVE_VMOV_to_lane_32 MQPR:$src1, rGPR:$srcC, (i32 2)),
5905 rGPR:$srcB, rGPR:$srcD, (i32 3), (i32 1))>;
5906 def : Pat<(insertelt (insertelt (v4i32 MQPR:$src1), rGPR:$srcA, (i32 1)), rGPR:$srcB, (i32 3)),
5907 (MVE_VMOV_q_rr MQPR:$src1, rGPR:$srcA, rGPR:$srcB, (i32 3), (i32 1))>;
5908 def : Pat<(insertelt (insertelt (insertelt (v4i32 MQPR:$src1),
5909 rGPR:$srcB, (i32 0)),
5910 rGPR:$srcC, (i32 1)),
5911 rGPR:$srcD, (i32 2)),
5912 (MVE_VMOV_q_rr (MVE_VMOV_to_lane_32 MQPR:$src1, rGPR:$srcC, (i32 1)),
5913 rGPR:$srcB, rGPR:$srcD, (i32 2), (i32 0))>;
5914 def : Pat<(insertelt (insertelt (v4i32 MQPR:$src1), rGPR:$srcA, (i32 0)), rGPR:$srcB, (i32 2)),
5915 (MVE_VMOV_q_rr MQPR:$src1, rGPR:$srcA, rGPR:$srcB, (i32 2), (i32 0))>;
5918 // end of coproc mov
5920 // start of MVE interleaving load/store
5922 // Base class for the family of interleaving/deinterleaving
5923 // load/stores with names like VLD20.8 and VST43.32.
5924 class MVE_vldst24_base<bit writeback, bit fourregs, bits<2> stage, bits<2> size,
5925 bit load, dag Oops, dag loadIops, dag wbIops,
5926 string iname, string ops,
5927 string cstr, list<dag> pattern=[]>
5928 : MVE_MI<Oops, !con(loadIops, wbIops), NoItinerary, iname, ops, cstr, pattern> {
5932 let Inst{31-22} = 0b1111110010;
5933 let Inst{21} = writeback;
5934 let Inst{20} = load;
5935 let Inst{19-16} = Rn;
5936 let Inst{15-13} = VQd{2-0};
5937 let Inst{12-9} = 0b1111;
5938 let Inst{8-7} = size;
5939 let Inst{6-5} = stage;
5940 let Inst{4-1} = 0b0000;
5941 let Inst{0} = fourregs;
5944 let mayStore = !eq(load,0);
5945 let hasSideEffects = 0;
5946 let validForTailPredication = load;
5949 // A parameter class used to encapsulate all the ways the writeback
5950 // variants of VLD20 and friends differ from the non-writeback ones.
5951 class MVE_vldst24_writeback<bit b, dag Oo, dag Io,
5952 string sy="", string c="", string n=""> {
5958 string id_suffix = n;
5961 // Another parameter class that encapsulates the differences between VLD2x
5963 class MVE_vldst24_nvecs<int n, list<int> s, bit b, RegisterOperand vl> {
5965 list<int> stages = s;
5967 RegisterOperand VecList = vl;
5970 // A third parameter class that distinguishes VLDnn.8 from .16 from .32.
5971 class MVE_vldst24_lanesize<int i, bits<2> b> {
5973 bits<2> sizebits = b;
5976 // A base class for each direction of transfer: one for load, one for
5977 // store. I can't make these a fourth independent parametric tuple
5978 // class, because they have to take the nvecs tuple class as a
5979 // parameter, in order to find the right VecList operand type.
5981 class MVE_vld24_base<MVE_vldst24_nvecs n, bits<2> pat, bits<2> size,
5982 MVE_vldst24_writeback wb, string iname,
5983 list<dag> pattern=[]>
5984 : MVE_vldst24_base<wb.writeback, n.bit0, pat, size, 1,
5985 !con((outs n.VecList:$VQd), wb.Oops),
5986 (ins n.VecList:$VQdSrc), wb.Iops,
5987 iname, "$VQd, $Rn" # wb.syntax,
5988 wb.cstr # ",$VQdSrc = $VQd", pattern>;
5990 class MVE_vst24_base<MVE_vldst24_nvecs n, bits<2> pat, bits<2> size,
5991 MVE_vldst24_writeback wb, string iname,
5992 list<dag> pattern=[]>
5993 : MVE_vldst24_base<wb.writeback, n.bit0, pat, size, 0,
5994 wb.Oops, (ins n.VecList:$VQd), wb.Iops,
5995 iname, "$VQd, $Rn" # wb.syntax,
5998 // Actually define all the interleaving loads and stores, by a series
5999 // of nested foreaches over number of vectors (VLD2/VLD4); stage
6000 // within one of those series (VLDx0/VLDx1/VLDx2/VLDx3); size of
6001 // vector lane; writeback or no writeback.
6002 foreach n = [MVE_vldst24_nvecs<2, [0,1], 0, VecList2Q>,
6003 MVE_vldst24_nvecs<4, [0,1,2,3], 1, VecList4Q>] in
6004 foreach stage = n.stages in
6005 foreach s = [MVE_vldst24_lanesize< 8, 0b00>,
6006 MVE_vldst24_lanesize<16, 0b01>,
6007 MVE_vldst24_lanesize<32, 0b10>] in
6008 foreach wb = [MVE_vldst24_writeback<
6009 1, (outs rGPR:$wb), (ins t2_nosp_addr_offset_none:$Rn),
6010 "!", "$Rn.base = $wb", "_wb">,
6011 MVE_vldst24_writeback<0, (outs), (ins t2_addr_offset_none:$Rn)>] in {
6013 // For each case within all of those foreaches, define the actual
6014 // instructions. The def names are made by gluing together pieces
6015 // from all the parameter classes, and will end up being things like
6016 // MVE_VLD20_8 and MVE_VST43_16_wb.
6018 def "MVE_VLD" # n.nvecs # stage # "_" # s.lanesize # wb.id_suffix
6019 : MVE_vld24_base<n, stage, s.sizebits, wb,
6020 "vld" # n.nvecs # stage # "." # s.lanesize>;
6022 def "MVE_VST" # n.nvecs # stage # "_" # s.lanesize # wb.id_suffix
6023 : MVE_vst24_base<n, stage, s.sizebits, wb,
6024 "vst" # n.nvecs # stage # "." # s.lanesize>;
6027 def SDTARMVST2 : SDTypeProfile<1, 5, [SDTCisPtrTy<0>, SDTCisPtrTy<1>, SDTCisVT<2, i32>, SDTCisVec<3>,
6028 SDTCisSameAs<3, 4>, SDTCisVT<5, i32>]>;
6029 def SDTARMVST4 : SDTypeProfile<1, 7, [SDTCisPtrTy<0>, SDTCisPtrTy<1>, SDTCisVT<2, i32>, SDTCisVec<3>,
6030 SDTCisSameAs<3, 4>, SDTCisSameAs<3, 5>,
6031 SDTCisSameAs<3, 6>, SDTCisVT<7, i32>]>;
6032 def MVEVST2UPD : SDNode<"ARMISD::VST2_UPD", SDTARMVST2, [SDNPHasChain, SDNPMemOperand]>;
6033 def MVEVST4UPD : SDNode<"ARMISD::VST4_UPD", SDTARMVST4, [SDNPHasChain, SDNPMemOperand]>;
6035 multiclass MVE_vst24_patterns<int lanesize, ValueType VT> {
6036 foreach stage = [0,1] in
6037 def : Pat<(int_arm_mve_vst2q i32:$addr,
6038 (VT MQPR:$v0), (VT MQPR:$v1), (i32 stage)),
6039 (!cast<Instruction>("MVE_VST2"#stage#"_"#lanesize)
6040 (REG_SEQUENCE MQQPR, VT:$v0, qsub_0, VT:$v1, qsub_1),
6041 t2_addr_offset_none:$addr)>;
6042 foreach stage = [0,1] in
6043 def : Pat<(i32 (MVEVST2UPD i32:$addr, (i32 32),
6044 (VT MQPR:$v0), (VT MQPR:$v1), (i32 stage))),
6045 (i32 (!cast<Instruction>("MVE_VST2"#stage#"_"#lanesize#_wb)
6046 (REG_SEQUENCE MQQPR, VT:$v0, qsub_0, VT:$v1, qsub_1),
6047 t2_addr_offset_none:$addr))>;
6049 foreach stage = [0,1,2,3] in
6050 def : Pat<(int_arm_mve_vst4q i32:$addr,
6051 (VT MQPR:$v0), (VT MQPR:$v1),
6052 (VT MQPR:$v2), (VT MQPR:$v3), (i32 stage)),
6053 (!cast<Instruction>("MVE_VST4"#stage#"_"#lanesize)
6054 (REG_SEQUENCE MQQQQPR, VT:$v0, qsub_0, VT:$v1, qsub_1,
6055 VT:$v2, qsub_2, VT:$v3, qsub_3),
6056 t2_addr_offset_none:$addr)>;
6057 foreach stage = [0,1,2,3] in
6058 def : Pat<(i32 (MVEVST4UPD i32:$addr, (i32 64),
6059 (VT MQPR:$v0), (VT MQPR:$v1),
6060 (VT MQPR:$v2), (VT MQPR:$v3), (i32 stage))),
6061 (i32 (!cast<Instruction>("MVE_VST4"#stage#"_"#lanesize#_wb)
6062 (REG_SEQUENCE MQQQQPR, VT:$v0, qsub_0, VT:$v1, qsub_1,
6063 VT:$v2, qsub_2, VT:$v3, qsub_3),
6064 t2_addr_offset_none:$addr))>;
6066 defm : MVE_vst24_patterns<8, v16i8>;
6067 defm : MVE_vst24_patterns<16, v8i16>;
6068 defm : MVE_vst24_patterns<32, v4i32>;
6069 defm : MVE_vst24_patterns<16, v8f16>;
6070 defm : MVE_vst24_patterns<32, v4f32>;
6072 // end of MVE interleaving load/store
6074 // start of MVE predicable load/store
6076 // A parameter class for the direction of transfer.
6077 class MVE_ldst_direction<bit b, dag Oo, dag Io, string c=""> {
6083 def MVE_ld: MVE_ldst_direction<1, (outs MQPR:$Qd), (ins), ",@earlyclobber $Qd">;
6084 def MVE_st: MVE_ldst_direction<0, (outs), (ins MQPR:$Qd)>;
6086 // A parameter class for the size of memory access in a load.
6087 class MVE_memsz<bits<2> e, int s, AddrMode m, string mn, list<string> types> {
6088 bits<2> encoding = e; // opcode bit(s) for encoding
6089 int shift = s; // shift applied to immediate load offset
6092 // For instruction aliases: define the complete list of type
6093 // suffixes at this size, and the canonical ones for loads and
6095 string MnemonicLetter = mn;
6096 int TypeBits = !shl(8, s);
6097 string CanonLoadSuffix = ".u" # TypeBits;
6098 string CanonStoreSuffix = "." # TypeBits;
6099 list<string> suffixes = !foreach(letter, types, "." # letter # TypeBits);
6102 // Instances of MVE_memsz.
6104 // (memD doesn't need an AddrMode, because those are only for
6105 // contiguous loads, and memD is only used by gather/scatters.)
6106 def MVE_memB: MVE_memsz<0b00, 0, AddrModeT2_i7, "b", ["", "u", "s"]>;
6107 def MVE_memH: MVE_memsz<0b01, 1, AddrModeT2_i7s2, "h", ["", "u", "s", "f"]>;
6108 def MVE_memW: MVE_memsz<0b10, 2, AddrModeT2_i7s4, "w", ["", "u", "s", "f"]>;
6109 def MVE_memD: MVE_memsz<0b11, 3, ?, "d", ["", "u", "s", "f"]>;
6111 // This is the base class for all the MVE loads and stores other than
6112 // the interleaving ones. All the non-interleaving loads/stores share
6113 // the characteristic that they operate on just one vector register,
6114 // so they are VPT-predicable.
6116 // The predication operand is vpred_n, for both loads and stores. For
6117 // store instructions, the reason is obvious: if there is no output
6118 // register, there can't be a need for an input parameter giving the
6119 // output register's previous value. Load instructions also don't need
6120 // that input parameter, because unlike MVE data processing
6121 // instructions, predicated loads are defined to set the inactive
6122 // lanes of the output register to zero, instead of preserving their
6124 class MVE_VLDRSTR_base<MVE_ldst_direction dir, bit U, bit P, bit W, bit opc,
6125 dag oops, dag iops, string asm, string suffix,
6126 string ops, string cstr, list<dag> pattern=[]>
6127 : MVE_p<oops, iops, NoItinerary, asm, suffix, ops, vpred_n, cstr, pattern> {
6135 let Inst{20} = dir.load;
6136 let Inst{15-13} = Qd{2-0};
6138 let Inst{11-9} = 0b111;
6140 let mayLoad = dir.load;
6141 let mayStore = !eq(dir.load,0);
6142 let hasSideEffects = 0;
6143 let validForTailPredication = 1;
6146 // Contiguous load and store instructions. These come in two main
6147 // categories: same-size loads/stores in which 128 bits of vector
6148 // register is transferred to or from 128 bits of memory in the most
6149 // obvious way, and widening loads / narrowing stores, in which the
6150 // size of memory accessed is less than the size of a vector register,
6151 // so the load instructions sign- or zero-extend each memory value
6152 // into a wider vector lane, and the store instructions truncate
6155 // The instruction mnemonics for these two classes look reasonably
6156 // similar, but the actual encodings are different enough to need two
6157 // separate base classes.
6159 // Contiguous, same size
6160 class MVE_VLDRSTR_cs<MVE_ldst_direction dir, MVE_memsz memsz, bit P, bit W,
6161 dag oops, dag iops, string asm, string suffix,
6162 IndexMode im, string ops, string cstr>
6163 : MVE_VLDRSTR_base<dir, 0, P, W, 1, oops, iops, asm, suffix, ops, cstr> {
6165 let Inst{23} = addr{7};
6166 let Inst{19-16} = addr{11-8};
6167 let Inst{8-7} = memsz.encoding;
6168 let Inst{6-0} = addr{6-0};
6171 // Contiguous, widening/narrowing
6172 class MVE_VLDRSTR_cw<MVE_ldst_direction dir, MVE_memsz memsz, bit U,
6173 bit P, bit W, bits<2> size, dag oops, dag iops,
6174 string asm, string suffix, IndexMode im,
6175 string ops, string cstr>
6176 : MVE_VLDRSTR_base<dir, U, P, W, 0, oops, iops, asm, suffix, ops, cstr> {
6178 let Inst{23} = addr{7};
6179 let Inst{19} = memsz.encoding{0}; // enough to tell 16- from 32-bit
6180 let Inst{18-16} = addr{10-8};
6181 let Inst{8-7} = size;
6182 let Inst{6-0} = addr{6-0};
6187 // Multiclass wrapper on each of the _cw and _cs base classes, to
6188 // generate three writeback modes (none, preindex, postindex).
6190 multiclass MVE_VLDRSTR_cw_m<MVE_ldst_direction dir, MVE_memsz memsz,
6191 string asm, string suffix, bit U, bits<2> size> {
6192 let AM = memsz.AM in {
6193 def "" : MVE_VLDRSTR_cw<
6194 dir, memsz, U, 1, 0, size,
6195 dir.Oops, !con(dir.Iops, (ins taddrmode_imm7<memsz.shift>:$addr)),
6196 asm, suffix, IndexModeNone, "$Qd, $addr", "">;
6198 def _pre : MVE_VLDRSTR_cw<
6199 dir, memsz, U, 1, 1, size,
6200 !con((outs tGPR:$wb), dir.Oops),
6201 !con(dir.Iops, (ins taddrmode_imm7<memsz.shift>:$addr)),
6202 asm, suffix, IndexModePre, "$Qd, $addr!", "$addr.base = $wb"> {
6203 let DecoderMethod = "DecodeMVE_MEM_1_pre<"#memsz.shift#">";
6206 def _post : MVE_VLDRSTR_cw<
6207 dir, memsz, U, 0, 1, size,
6208 !con((outs tGPR:$wb), dir.Oops),
6209 !con(dir.Iops, (ins t_addr_offset_none:$Rn,
6210 t2am_imm7_offset<memsz.shift>:$addr)),
6211 asm, suffix, IndexModePost, "$Qd, $Rn$addr", "$Rn.base = $wb"> {
6213 let Inst{18-16} = Rn{2-0};
6218 multiclass MVE_VLDRSTR_cs_m<MVE_ldst_direction dir, MVE_memsz memsz,
6219 string asm, string suffix> {
6220 let AM = memsz.AM in {
6221 def "" : MVE_VLDRSTR_cs<
6223 dir.Oops, !con(dir.Iops, (ins t2addrmode_imm7<memsz.shift>:$addr)),
6224 asm, suffix, IndexModeNone, "$Qd, $addr", "">;
6226 def _pre : MVE_VLDRSTR_cs<
6228 !con((outs rGPR:$wb), dir.Oops),
6229 !con(dir.Iops, (ins t2addrmode_imm7_pre<memsz.shift>:$addr)),
6230 asm, suffix, IndexModePre, "$Qd, $addr!", "$addr.base = $wb"> {
6231 let DecoderMethod = "DecodeMVE_MEM_2_pre<"#memsz.shift#">";
6234 def _post : MVE_VLDRSTR_cs<
6236 !con((outs rGPR:$wb), dir.Oops),
6237 !con(dir.Iops, (ins t2_nosp_addr_offset_none:$Rn,
6238 t2am_imm7_offset<memsz.shift>:$addr)),
6239 asm, suffix, IndexModePost, "$Qd, $Rn$addr", "$Rn.base = $wb"> {
6241 let Inst{19-16} = Rn{3-0};
6246 // Now actually declare all the contiguous load/stores, via those
6247 // multiclasses. The instruction ids coming out of this are the bare
6248 // names shown in the defm, with _pre or _post appended for writeback,
6249 // e.g. MVE_VLDRBS16, MVE_VSTRB16_pre, MVE_VSTRHU16_post.
6251 defm MVE_VLDRBS16: MVE_VLDRSTR_cw_m<MVE_ld, MVE_memB, "vldrb", "s16", 0, 0b01>;
6252 defm MVE_VLDRBS32: MVE_VLDRSTR_cw_m<MVE_ld, MVE_memB, "vldrb", "s32", 0, 0b10>;
6253 defm MVE_VLDRBU16: MVE_VLDRSTR_cw_m<MVE_ld, MVE_memB, "vldrb", "u16", 1, 0b01>;
6254 defm MVE_VLDRBU32: MVE_VLDRSTR_cw_m<MVE_ld, MVE_memB, "vldrb", "u32", 1, 0b10>;
6255 defm MVE_VLDRHS32: MVE_VLDRSTR_cw_m<MVE_ld, MVE_memH, "vldrh", "s32", 0, 0b10>;
6256 defm MVE_VLDRHU32: MVE_VLDRSTR_cw_m<MVE_ld, MVE_memH, "vldrh", "u32", 1, 0b10>;
6258 defm MVE_VLDRBU8: MVE_VLDRSTR_cs_m<MVE_ld, MVE_memB, "vldrb", "u8">;
6259 defm MVE_VLDRHU16: MVE_VLDRSTR_cs_m<MVE_ld, MVE_memH, "vldrh", "u16">;
6260 defm MVE_VLDRWU32: MVE_VLDRSTR_cs_m<MVE_ld, MVE_memW, "vldrw", "u32">;
6262 defm MVE_VSTRB16: MVE_VLDRSTR_cw_m<MVE_st, MVE_memB, "vstrb", "16", 0, 0b01>;
6263 defm MVE_VSTRB32: MVE_VLDRSTR_cw_m<MVE_st, MVE_memB, "vstrb", "32", 0, 0b10>;
6264 defm MVE_VSTRH32: MVE_VLDRSTR_cw_m<MVE_st, MVE_memH, "vstrh", "32", 0, 0b10>;
6266 defm MVE_VSTRBU8 : MVE_VLDRSTR_cs_m<MVE_st, MVE_memB, "vstrb", "8">;
6267 defm MVE_VSTRHU16: MVE_VLDRSTR_cs_m<MVE_st, MVE_memH, "vstrh", "16">;
6268 defm MVE_VSTRWU32: MVE_VLDRSTR_cs_m<MVE_st, MVE_memW, "vstrw", "32">;
6270 // Gather loads / scatter stores whose address operand is of the form
6271 // [Rn,Qm], i.e. a single GPR as the common base address, plus a
6272 // vector of offset from it. ('Load/store this sequence of elements of
6273 // the same array.')
6275 // Like the contiguous family, these loads and stores can widen the
6276 // loaded values / truncate the stored ones, or they can just
6277 // load/store the same size of memory and vector lane. But unlike the
6278 // contiguous family, there's no particular difference in encoding
6279 // between those two cases.
6281 // This family also comes with the option to scale the offset values
6282 // in Qm by the size of the loaded memory (i.e. to treat them as array
6283 // indices), or not to scale them (to treat them as plain byte offsets
6284 // in memory, so that perhaps the loaded values are unaligned). The
6285 // scaled instructions' address operand in assembly looks like
6286 // [Rn,Qm,UXTW #2] or similar.
6289 class MVE_VLDRSTR_rq<MVE_ldst_direction dir, MVE_memsz memsz, bit U,
6290 bits<2> size, bit os, string asm, string suffix, int shift>
6291 : MVE_VLDRSTR_base<dir, U, 0b0, 0b0, 0, dir.Oops,
6292 !con(dir.Iops, (ins mve_addr_rq_shift<shift>:$addr)),
6293 asm, suffix, "$Qd, $addr", dir.cstr> {
6296 let Inst{19-16} = addr{6-3};
6297 let Inst{8-7} = size;
6298 let Inst{6} = memsz.encoding{1};
6300 let Inst{4} = memsz.encoding{0};
6301 let Inst{3-1} = addr{2-0};
6305 // Multiclass that defines the scaled and unscaled versions of an
6306 // instruction, when the memory size is wider than a byte. The scaled
6307 // version gets the default name like MVE_VLDRBU16_rq; the unscaled /
6308 // potentially unaligned version gets a "_u" suffix, e.g.
6309 // MVE_VLDRBU16_rq_u.
6310 multiclass MVE_VLDRSTR_rq_w<MVE_ldst_direction dir, MVE_memsz memsz,
6311 string asm, string suffix, bit U, bits<2> size> {
6312 def _u : MVE_VLDRSTR_rq<dir, memsz, U, size, 0, asm, suffix, 0>;
6313 def "" : MVE_VLDRSTR_rq<dir, memsz, U, size, 1, asm, suffix, memsz.shift>;
6316 // Subclass of MVE_VLDRSTR_rq with the same API as that multiclass,
6317 // for use when the memory size is one byte, so there's no 'scaled'
6318 // version of the instruction at all. (This is encoded as if it were
6319 // unscaled, but named in the default way with no _u suffix.)
6320 class MVE_VLDRSTR_rq_b<MVE_ldst_direction dir, MVE_memsz memsz,
6321 string asm, string suffix, bit U, bits<2> size>
6322 : MVE_VLDRSTR_rq<dir, memsz, U, size, 0, asm, suffix, 0>;
6324 // Multiclasses wrapping that to add ISel patterns for intrinsics.
6325 multiclass MVE_VLDR_rq_w<MVE_memsz memsz, list<MVEVectorVTInfo> VTIs> {
6326 defm "": MVE_VLDRSTR_rq_w<MVE_ld, memsz, "vldr" # memsz.MnemonicLetter,
6327 VTIs[0].Suffix, VTIs[0].Unsigned, VTIs[0].Size>;
6328 defvar Inst = !cast<Instruction>(NAME);
6329 defvar InstU = !cast<Instruction>(NAME # "_u");
6331 foreach VTI = VTIs in
6332 foreach UnsignedFlag = !if(!eq(VTI.Size, memsz.encoding),
6333 [0,1], [VTI.Unsigned]) in {
6334 def : Pat<(VTI.Vec (int_arm_mve_vldr_gather_offset GPR:$base, (VTIs[0].Vec MQPR:$offsets), memsz.TypeBits, 0, UnsignedFlag)),
6335 (VTI.Vec (InstU GPR:$base, MQPR:$offsets))>;
6336 def : Pat<(VTI.Vec (int_arm_mve_vldr_gather_offset GPR:$base, (VTIs[0].Vec MQPR:$offsets), memsz.TypeBits, memsz.shift, UnsignedFlag)),
6337 (VTI.Vec (Inst GPR:$base, MQPR:$offsets))>;
6338 def : Pat<(VTI.Vec (int_arm_mve_vldr_gather_offset_predicated GPR:$base, (VTIs[0].Vec MQPR:$offsets), memsz.TypeBits, 0, UnsignedFlag, (VTI.Pred VCCR:$pred))),
6339 (VTI.Vec (InstU GPR:$base, MQPR:$offsets, ARMVCCThen, VCCR:$pred))>;
6340 def : Pat<(VTI.Vec (int_arm_mve_vldr_gather_offset_predicated GPR:$base, (VTIs[0].Vec MQPR:$offsets), memsz.TypeBits, memsz.shift, UnsignedFlag, (VTI.Pred VCCR:$pred))),
6341 (VTI.Vec (Inst GPR:$base, MQPR:$offsets, ARMVCCThen, VCCR:$pred))>;
6344 multiclass MVE_VLDR_rq_b<list<MVEVectorVTInfo> VTIs> {
6345 def "": MVE_VLDRSTR_rq_b<MVE_ld, MVE_memB, "vldrb",
6346 VTIs[0].Suffix, VTIs[0].Unsigned, VTIs[0].Size>;
6347 defvar Inst = !cast<Instruction>(NAME);
6349 foreach VTI = VTIs in {
6350 def : Pat<(VTI.Vec (int_arm_mve_vldr_gather_offset GPR:$base, (VTIs[0].Vec MQPR:$offsets), 8, 0, VTI.Unsigned)),
6351 (VTI.Vec (Inst GPR:$base, MQPR:$offsets))>;
6352 def : Pat<(VTI.Vec (int_arm_mve_vldr_gather_offset_predicated GPR:$base, (VTIs[0].Vec MQPR:$offsets), 8, 0, VTI.Unsigned, (VTI.Pred VCCR:$pred))),
6353 (VTI.Vec (Inst GPR:$base, MQPR:$offsets, ARMVCCThen, VCCR:$pred))>;
6356 multiclass MVE_VSTR_rq_w<MVE_memsz memsz, list<MVEVectorVTInfo> VTIs> {
6357 defm "": MVE_VLDRSTR_rq_w<MVE_st, memsz, "vstr" # memsz.MnemonicLetter,
6358 VTIs[0].BitsSuffix, 0, VTIs[0].Size>;
6359 defvar Inst = !cast<Instruction>(NAME);
6360 defvar InstU = !cast<Instruction>(NAME # "_u");
6362 foreach VTI = VTIs in {
6363 def : Pat<(int_arm_mve_vstr_scatter_offset GPR:$base, (VTIs[0].Vec MQPR:$offsets), (VTI.Vec MQPR:$data), memsz.TypeBits, 0),
6364 (InstU MQPR:$data, GPR:$base, MQPR:$offsets)>;
6365 def : Pat<(int_arm_mve_vstr_scatter_offset GPR:$base, (VTIs[0].Vec MQPR:$offsets), (VTI.Vec MQPR:$data), memsz.TypeBits, memsz.shift),
6366 (Inst MQPR:$data, GPR:$base, MQPR:$offsets)>;
6367 def : Pat<(int_arm_mve_vstr_scatter_offset_predicated GPR:$base, (VTIs[0].Vec MQPR:$offsets), (VTI.Vec MQPR:$data), memsz.TypeBits, 0, (VTI.Pred VCCR:$pred)),
6368 (InstU MQPR:$data, GPR:$base, MQPR:$offsets, ARMVCCThen, VCCR:$pred)>;
6369 def : Pat<(int_arm_mve_vstr_scatter_offset_predicated GPR:$base, (VTIs[0].Vec MQPR:$offsets), (VTI.Vec MQPR:$data), memsz.TypeBits, memsz.shift, (VTI.Pred VCCR:$pred)),
6370 (Inst MQPR:$data, GPR:$base, MQPR:$offsets, ARMVCCThen, VCCR:$pred)>;
6373 multiclass MVE_VSTR_rq_b<list<MVEVectorVTInfo> VTIs> {
6374 def "": MVE_VLDRSTR_rq_b<MVE_st, MVE_memB, "vstrb",
6375 VTIs[0].BitsSuffix, 0, VTIs[0].Size>;
6376 defvar Inst = !cast<Instruction>(NAME);
6378 foreach VTI = VTIs in {
6379 def : Pat<(int_arm_mve_vstr_scatter_offset GPR:$base, (VTIs[0].Vec MQPR:$offsets), (VTI.Vec MQPR:$data), 8, 0),
6380 (Inst MQPR:$data, GPR:$base, MQPR:$offsets)>;
6381 def : Pat<(int_arm_mve_vstr_scatter_offset_predicated GPR:$base, (VTIs[0].Vec MQPR:$offsets), (VTI.Vec MQPR:$data), 8, 0, (VTI.Pred VCCR:$pred)),
6382 (Inst MQPR:$data, GPR:$base, MQPR:$offsets, ARMVCCThen, VCCR:$pred)>;
6386 // Actually define all the loads and stores in this family.
6388 defm MVE_VLDRBU8_rq : MVE_VLDR_rq_b<[MVE_v16u8,MVE_v16s8]>;
6389 defm MVE_VLDRBU16_rq: MVE_VLDR_rq_b<[MVE_v8u16]>;
6390 defm MVE_VLDRBS16_rq: MVE_VLDR_rq_b<[MVE_v8s16]>;
6391 defm MVE_VLDRBU32_rq: MVE_VLDR_rq_b<[MVE_v4u32]>;
6392 defm MVE_VLDRBS32_rq: MVE_VLDR_rq_b<[MVE_v4s32]>;
6394 defm MVE_VLDRHU16_rq: MVE_VLDR_rq_w<MVE_memH, [MVE_v8u16,MVE_v8s16,MVE_v8f16]>;
6395 defm MVE_VLDRHU32_rq: MVE_VLDR_rq_w<MVE_memH, [MVE_v4u32]>;
6396 defm MVE_VLDRHS32_rq: MVE_VLDR_rq_w<MVE_memH, [MVE_v4s32]>;
6397 defm MVE_VLDRWU32_rq: MVE_VLDR_rq_w<MVE_memW, [MVE_v4u32,MVE_v4s32,MVE_v4f32]>;
6398 defm MVE_VLDRDU64_rq: MVE_VLDR_rq_w<MVE_memD, [MVE_v2u64,MVE_v2s64]>;
6400 defm MVE_VSTRB8_rq : MVE_VSTR_rq_b<[MVE_v16i8]>;
6401 defm MVE_VSTRB16_rq : MVE_VSTR_rq_b<[MVE_v8i16]>;
6402 defm MVE_VSTRB32_rq : MVE_VSTR_rq_b<[MVE_v4i32]>;
6404 defm MVE_VSTRH16_rq : MVE_VSTR_rq_w<MVE_memH, [MVE_v8i16,MVE_v8f16]>;
6405 defm MVE_VSTRH32_rq : MVE_VSTR_rq_w<MVE_memH, [MVE_v4i32]>;
6406 defm MVE_VSTRW32_rq : MVE_VSTR_rq_w<MVE_memW, [MVE_v4i32,MVE_v4f32]>;
6407 defm MVE_VSTRD64_rq : MVE_VSTR_rq_w<MVE_memD, [MVE_v2i64]>;
6409 // Gather loads / scatter stores whose address operand is of the form
6410 // [Qm,#imm], i.e. a vector containing a full base address for each
6411 // loaded item, plus an immediate offset applied consistently to all
6412 // of them. ('Load/store the same field from this vector of pointers
6413 // to a structure type.')
6415 // This family requires the vector lane size to be at least 32 bits
6416 // (so there's room for an address in each lane at all). It has no
6417 // widening/narrowing variants. But it does support preindex
6418 // writeback, in which the address vector is updated to hold the
6419 // addresses actually loaded from.
6422 class MVE_VLDRSTR_qi<MVE_ldst_direction dir, MVE_memsz memsz, bit W, dag wbops,
6423 string asm, string wbAsm, string suffix, string cstr = "">
6424 : MVE_VLDRSTR_base<dir, 1, 1, W, 1, !con(wbops, dir.Oops),
6425 !con(dir.Iops, (ins mve_addr_q_shift<memsz.shift>:$addr)),
6426 asm, suffix, "$Qd, $addr" # wbAsm, cstr # dir.cstr> {
6428 let Inst{23} = addr{7};
6429 let Inst{19-17} = addr{10-8};
6431 let Inst{8} = memsz.encoding{0}; // enough to distinguish 32- from 64-bit
6433 let Inst{6-0} = addr{6-0};
6436 // Multiclass that generates the non-writeback and writeback variants.
6437 multiclass MVE_VLDRSTR_qi_m<MVE_ldst_direction dir, MVE_memsz memsz,
6438 string asm, string suffix> {
6439 def "" : MVE_VLDRSTR_qi<dir, memsz, 0, (outs), asm, "", suffix>;
6440 def _pre : MVE_VLDRSTR_qi<dir, memsz, 1, (outs MQPR:$wb), asm, "!", suffix,
6441 "$addr.base = $wb"> {
6442 let DecoderMethod="DecodeMVE_MEM_3_pre<"#memsz.shift#">";
6446 // Multiclasses wrapping that one, adding selection patterns for the
6447 // non-writeback loads and all the stores. (The writeback loads must
6448 // deliver multiple output values, so they have to be selected by C++
6450 multiclass MVE_VLDR_qi<MVE_memsz memsz, MVEVectorVTInfo AVTI,
6451 list<MVEVectorVTInfo> DVTIs> {
6452 defm "" : MVE_VLDRSTR_qi_m<MVE_ld, memsz, "vldr" # memsz.MnemonicLetter,
6453 "u" # memsz.TypeBits>;
6454 defvar Inst = !cast<Instruction>(NAME);
6456 foreach DVTI = DVTIs in {
6457 def : Pat<(DVTI.Vec (int_arm_mve_vldr_gather_base
6458 (AVTI.Vec MQPR:$addr), (i32 imm:$offset))),
6459 (DVTI.Vec (Inst (AVTI.Vec MQPR:$addr), (i32 imm:$offset)))>;
6460 def : Pat<(DVTI.Vec (int_arm_mve_vldr_gather_base_predicated
6461 (AVTI.Vec MQPR:$addr), (i32 imm:$offset), (AVTI.Pred VCCR:$pred))),
6462 (DVTI.Vec (Inst (AVTI.Vec MQPR:$addr), (i32 imm:$offset),
6463 ARMVCCThen, VCCR:$pred))>;
6466 multiclass MVE_VSTR_qi<MVE_memsz memsz, MVEVectorVTInfo AVTI,
6467 list<MVEVectorVTInfo> DVTIs> {
6468 defm "" : MVE_VLDRSTR_qi_m<MVE_st, memsz, "vstr" # memsz.MnemonicLetter,
6469 !cast<string>(memsz.TypeBits)>;
6470 defvar Inst = !cast<Instruction>(NAME);
6471 defvar InstPre = !cast<Instruction>(NAME # "_pre");
6473 foreach DVTI = DVTIs in {
6474 def : Pat<(int_arm_mve_vstr_scatter_base
6475 (AVTI.Vec MQPR:$addr), (i32 imm:$offset), (DVTI.Vec MQPR:$data)),
6476 (Inst (DVTI.Vec MQPR:$data), (AVTI.Vec MQPR:$addr),
6477 (i32 imm:$offset))>;
6478 def : Pat<(int_arm_mve_vstr_scatter_base_predicated
6479 (AVTI.Vec MQPR:$addr), (i32 imm:$offset), (DVTI.Vec MQPR:$data), (AVTI.Pred VCCR:$pred)),
6480 (Inst (DVTI.Vec MQPR:$data), (AVTI.Vec MQPR:$addr),
6481 (i32 imm:$offset), ARMVCCThen, VCCR:$pred)>;
6482 def : Pat<(AVTI.Vec (int_arm_mve_vstr_scatter_base_wb
6483 (AVTI.Vec MQPR:$addr), (i32 imm:$offset), (DVTI.Vec MQPR:$data))),
6484 (AVTI.Vec (InstPre (DVTI.Vec MQPR:$data), (AVTI.Vec MQPR:$addr),
6485 (i32 imm:$offset)))>;
6486 def : Pat<(AVTI.Vec (int_arm_mve_vstr_scatter_base_wb_predicated
6487 (AVTI.Vec MQPR:$addr), (i32 imm:$offset), (DVTI.Vec MQPR:$data), (AVTI.Pred VCCR:$pred))),
6488 (AVTI.Vec (InstPre (DVTI.Vec MQPR:$data), (AVTI.Vec MQPR:$addr),
6489 (i32 imm:$offset), ARMVCCThen, VCCR:$pred))>;
6493 // Actual instruction definitions.
6494 defm MVE_VLDRWU32_qi: MVE_VLDR_qi<MVE_memW, MVE_v4i32, [MVE_v4i32,MVE_v4f32]>;
6495 defm MVE_VLDRDU64_qi: MVE_VLDR_qi<MVE_memD, MVE_v2i64, [MVE_v2i64,MVE_v2f64]>;
6496 defm MVE_VSTRW32_qi: MVE_VSTR_qi<MVE_memW, MVE_v4i32, [MVE_v4i32,MVE_v4f32]>;
6497 defm MVE_VSTRD64_qi: MVE_VSTR_qi<MVE_memD, MVE_v2i64, [MVE_v2i64,MVE_v2f64]>;
6499 // Define aliases for all the instructions where memory size and
6500 // vector lane size are the same. These are mnemonic aliases, so they
6501 // apply consistently across all of the above families - contiguous
6502 // loads, and both the rq and qi types of gather/scatter.
6504 // Rationale: As long as you're loading (for example) 16-bit memory
6505 // values into 16-bit vector lanes, you can think of them as signed or
6506 // unsigned integers, fp16 or just raw 16-bit blobs and it makes no
6507 // difference. So we permit all of vldrh.16, vldrh.u16, vldrh.s16,
6508 // vldrh.f16 and treat them all as equivalent to the canonical
6509 // spelling (which happens to be .u16 for loads, and just .16 for
6512 foreach vpt_cond = ["", "t", "e"] in
6513 foreach memsz = [MVE_memB, MVE_memH, MVE_memW, MVE_memD] in
6514 foreach suffix = memsz.suffixes in {
6515 // Define an alias with every suffix in the list, except for the one
6516 // used by the real Instruction record (i.e. the one that all the
6517 // rest are aliases *for*).
6519 if !ne(suffix, memsz.CanonLoadSuffix) then {
6520 def : MnemonicAlias<
6521 "vldr" # memsz.MnemonicLetter # vpt_cond # suffix,
6522 "vldr" # memsz.MnemonicLetter # vpt_cond # memsz.CanonLoadSuffix>;
6525 if !ne(suffix, memsz.CanonStoreSuffix) then {
6526 def : MnemonicAlias<
6527 "vstr" # memsz.MnemonicLetter # vpt_cond # suffix,
6528 "vstr" # memsz.MnemonicLetter # vpt_cond # memsz.CanonStoreSuffix>;
6532 // end of MVE predicable load/store
6534 class MVE_VPT<string suffix, bits<2> size, dag iops, string asm, list<dag> pattern=[]>
6535 : MVE_MI<(outs ), iops, NoItinerary, !strconcat("vpt", "${Mk}", ".", suffix), asm, "", pattern> {
6540 let Inst{31-23} = 0b111111100;
6541 let Inst{22} = Mk{3};
6542 let Inst{21-20} = size;
6543 let Inst{19-17} = Qn{2-0};
6545 let Inst{15-13} = Mk{2-0};
6546 let Inst{12} = fc{2};
6547 let Inst{11-8} = 0b1111;
6548 let Inst{7} = fc{0};
6552 let validForTailPredication=1;
6555 class MVE_VPTt1<string suffix, bits<2> size, dag iops>
6556 : MVE_VPT<suffix, size, iops, "$fc, $Qn, $Qm"> {
6561 let Inst{5} = Qm{3};
6562 let Inst{3-1} = Qm{2-0};
6563 let Inst{0} = fc{1};
6566 class MVE_VPTt1i<string suffix, bits<2> size>
6567 : MVE_VPTt1<suffix, size,
6568 (ins vpt_mask:$Mk, MQPR:$Qn, MQPR:$Qm, pred_basic_i:$fc)> {
6573 def MVE_VPTv4i32 : MVE_VPTt1i<"i32", 0b10>;
6574 def MVE_VPTv8i16 : MVE_VPTt1i<"i16", 0b01>;
6575 def MVE_VPTv16i8 : MVE_VPTt1i<"i8", 0b00>;
6577 class MVE_VPTt1u<string suffix, bits<2> size>
6578 : MVE_VPTt1<suffix, size,
6579 (ins vpt_mask:$Mk, MQPR:$Qn, MQPR:$Qm, pred_basic_u:$fc)> {
6584 def MVE_VPTv4u32 : MVE_VPTt1u<"u32", 0b10>;
6585 def MVE_VPTv8u16 : MVE_VPTt1u<"u16", 0b01>;
6586 def MVE_VPTv16u8 : MVE_VPTt1u<"u8", 0b00>;
6588 class MVE_VPTt1s<string suffix, bits<2> size>
6589 : MVE_VPTt1<suffix, size,
6590 (ins vpt_mask:$Mk, MQPR:$Qn, MQPR:$Qm, pred_basic_s:$fc)> {
6594 def MVE_VPTv4s32 : MVE_VPTt1s<"s32", 0b10>;
6595 def MVE_VPTv8s16 : MVE_VPTt1s<"s16", 0b01>;
6596 def MVE_VPTv16s8 : MVE_VPTt1s<"s8", 0b00>;
6598 class MVE_VPTt2<string suffix, bits<2> size, dag iops>
6599 : MVE_VPT<suffix, size, iops,
6606 let Inst{5} = fc{1};
6607 let Inst{3-0} = Rm{3-0};
6610 class MVE_VPTt2i<string suffix, bits<2> size>
6611 : MVE_VPTt2<suffix, size,
6612 (ins vpt_mask:$Mk, MQPR:$Qn, GPRwithZR:$Rm, pred_basic_i:$fc)> {
6617 def MVE_VPTv4i32r : MVE_VPTt2i<"i32", 0b10>;
6618 def MVE_VPTv8i16r : MVE_VPTt2i<"i16", 0b01>;
6619 def MVE_VPTv16i8r : MVE_VPTt2i<"i8", 0b00>;
6621 class MVE_VPTt2u<string suffix, bits<2> size>
6622 : MVE_VPTt2<suffix, size,
6623 (ins vpt_mask:$Mk, MQPR:$Qn, GPRwithZR:$Rm, pred_basic_u:$fc)> {
6628 def MVE_VPTv4u32r : MVE_VPTt2u<"u32", 0b10>;
6629 def MVE_VPTv8u16r : MVE_VPTt2u<"u16", 0b01>;
6630 def MVE_VPTv16u8r : MVE_VPTt2u<"u8", 0b00>;
6632 class MVE_VPTt2s<string suffix, bits<2> size>
6633 : MVE_VPTt2<suffix, size,
6634 (ins vpt_mask:$Mk, MQPR:$Qn, GPRwithZR:$Rm, pred_basic_s:$fc)> {
6638 def MVE_VPTv4s32r : MVE_VPTt2s<"s32", 0b10>;
6639 def MVE_VPTv8s16r : MVE_VPTt2s<"s16", 0b01>;
6640 def MVE_VPTv16s8r : MVE_VPTt2s<"s8", 0b00>;
6643 class MVE_VPTf<string suffix, bit size, dag iops, string asm, list<dag> pattern=[]>
6644 : MVE_MI<(outs ), iops, NoItinerary, !strconcat("vpt", "${Mk}", ".", suffix), asm,
6650 let Inst{31-29} = 0b111;
6651 let Inst{28} = size;
6652 let Inst{27-23} = 0b11100;
6653 let Inst{22} = Mk{3};
6654 let Inst{21-20} = 0b11;
6655 let Inst{19-17} = Qn{2-0};
6657 let Inst{15-13} = Mk{2-0};
6658 let Inst{12} = fc{2};
6659 let Inst{11-8} = 0b1111;
6660 let Inst{7} = fc{0};
6664 let Predicates = [HasMVEFloat];
6665 let validForTailPredication=1;
6668 class MVE_VPTft1<string suffix, bit size>
6669 : MVE_VPTf<suffix, size, (ins vpt_mask:$Mk, MQPR:$Qn, MQPR:$Qm, pred_basic_fp:$fc),
6675 let Inst{5} = Qm{3};
6676 let Inst{3-1} = Qm{2-0};
6677 let Inst{0} = fc{1};
6680 def MVE_VPTv4f32 : MVE_VPTft1<"f32", 0b0>;
6681 def MVE_VPTv8f16 : MVE_VPTft1<"f16", 0b1>;
6683 class MVE_VPTft2<string suffix, bit size>
6684 : MVE_VPTf<suffix, size, (ins vpt_mask:$Mk, MQPR:$Qn, GPRwithZR:$Rm, pred_basic_fp:$fc),
6690 let Inst{5} = fc{1};
6691 let Inst{3-0} = Rm{3-0};
6694 def MVE_VPTv4f32r : MVE_VPTft2<"f32", 0b0>;
6695 def MVE_VPTv8f16r : MVE_VPTft2<"f16", 0b1>;
6697 def MVE_VPST : MVE_MI<(outs ), (ins vpt_mask:$Mk), NoItinerary,
6698 !strconcat("vpst", "${Mk}"), "", "", []> {
6701 let Inst{31-23} = 0b111111100;
6702 let Inst{22} = Mk{3};
6703 let Inst{21-16} = 0b110001;
6704 let Inst{15-13} = Mk{2-0};
6705 let Inst{12-0} = 0b0111101001101;
6706 let Unpredictable{12} = 0b1;
6707 let Unpredictable{7} = 0b1;
6708 let Unpredictable{5} = 0b1;
6711 let validForTailPredication = 1;
6714 def MVE_VPSEL : MVE_p<(outs MQPR:$Qd), (ins MQPR:$Qn, MQPR:$Qm), NoItinerary,
6715 "vpsel", "", "$Qd, $Qn, $Qm", vpred_n, "", []> {
6721 let Inst{25-23} = 0b100;
6722 let Inst{22} = Qd{3};
6723 let Inst{21-20} = 0b11;
6724 let Inst{19-17} = Qn{2-0};
6726 let Inst{15-13} = Qd{2-0};
6727 let Inst{12-9} = 0b0111;
6729 let Inst{7} = Qn{3};
6731 let Inst{5} = Qm{3};
6733 let Inst{3-1} = Qm{2-0};
6737 foreach suffix = ["s8", "s16", "s32", "u8", "u16", "u32",
6738 "i8", "i16", "i32", "f16", "f32"] in
6739 def : MVEInstAlias<"vpsel${vp}." # suffix # "\t$Qd, $Qn, $Qm",
6740 (MVE_VPSEL MQPR:$Qd, MQPR:$Qn, MQPR:$Qm, vpred_n:$vp)>;
6742 let Predicates = [HasMVEInt] in {
6743 def : Pat<(v16i8 (vselect (v16i1 VCCR:$pred), (v16i8 MQPR:$v1), (v16i8 MQPR:$v2))),
6744 (v16i8 (MVE_VPSEL MQPR:$v1, MQPR:$v2, ARMVCCNone, VCCR:$pred))>;
6745 def : Pat<(v8i16 (vselect (v8i1 VCCR:$pred), (v8i16 MQPR:$v1), (v8i16 MQPR:$v2))),
6746 (v8i16 (MVE_VPSEL MQPR:$v1, MQPR:$v2, ARMVCCNone, VCCR:$pred))>;
6747 def : Pat<(v4i32 (vselect (v4i1 VCCR:$pred), (v4i32 MQPR:$v1), (v4i32 MQPR:$v2))),
6748 (v4i32 (MVE_VPSEL MQPR:$v1, MQPR:$v2, ARMVCCNone, VCCR:$pred))>;
6750 def : Pat<(v8f16 (vselect (v8i1 VCCR:$pred), (v8f16 MQPR:$v1), (v8f16 MQPR:$v2))),
6751 (v8f16 (MVE_VPSEL MQPR:$v1, MQPR:$v2, ARMVCCNone, VCCR:$pred))>;
6752 def : Pat<(v4f32 (vselect (v4i1 VCCR:$pred), (v4f32 MQPR:$v1), (v4f32 MQPR:$v2))),
6753 (v4f32 (MVE_VPSEL MQPR:$v1, MQPR:$v2, ARMVCCNone, VCCR:$pred))>;
6755 def : Pat<(v16i8 (vselect (v16i8 MQPR:$pred), (v16i8 MQPR:$v1), (v16i8 MQPR:$v2))),
6756 (v16i8 (MVE_VPSEL MQPR:$v1, MQPR:$v2, ARMVCCNone,
6757 (MVE_VCMPi8 (v16i8 MQPR:$pred), (MVE_VMOVimmi8 0), ARMCCne)))>;
6758 def : Pat<(v8i16 (vselect (v8i16 MQPR:$pred), (v8i16 MQPR:$v1), (v8i16 MQPR:$v2))),
6759 (v8i16 (MVE_VPSEL MQPR:$v1, MQPR:$v2, ARMVCCNone,
6760 (MVE_VCMPi16 (v8i16 MQPR:$pred), (MVE_VMOVimmi16 0), ARMCCne)))>;
6761 def : Pat<(v4i32 (vselect (v4i32 MQPR:$pred), (v4i32 MQPR:$v1), (v4i32 MQPR:$v2))),
6762 (v4i32 (MVE_VPSEL MQPR:$v1, MQPR:$v2, ARMVCCNone,
6763 (MVE_VCMPi32 (v4i32 MQPR:$pred), (MVE_VMOVimmi32 0), ARMCCne)))>;
6765 def : Pat<(v8f16 (vselect (v8i16 MQPR:$pred), (v8f16 MQPR:$v1), (v8f16 MQPR:$v2))),
6766 (v8f16 (MVE_VPSEL MQPR:$v1, MQPR:$v2, ARMVCCNone,
6767 (MVE_VCMPi16 (v8i16 MQPR:$pred), (MVE_VMOVimmi16 0), ARMCCne)))>;
6768 def : Pat<(v4f32 (vselect (v4i32 MQPR:$pred), (v4f32 MQPR:$v1), (v4f32 MQPR:$v2))),
6769 (v4f32 (MVE_VPSEL MQPR:$v1, MQPR:$v2, ARMVCCNone,
6770 (MVE_VCMPi32 (v4i32 MQPR:$pred), (MVE_VMOVimmi32 0), ARMCCne)))>;
6773 def : Pat<(v16i8 (zext (v16i1 VCCR:$pred))),
6774 (v16i8 (MVE_VPSEL (MVE_VMOVimmi8 1), (MVE_VMOVimmi8 0), ARMVCCNone, VCCR:$pred))>;
6775 def : Pat<(v8i16 (zext (v8i1 VCCR:$pred))),
6776 (v8i16 (MVE_VPSEL (MVE_VMOVimmi16 1), (MVE_VMOVimmi16 0), ARMVCCNone, VCCR:$pred))>;
6777 def : Pat<(v4i32 (zext (v4i1 VCCR:$pred))),
6778 (v4i32 (MVE_VPSEL (MVE_VMOVimmi32 1), (MVE_VMOVimmi32 0), ARMVCCNone, VCCR:$pred))>;
6780 def : Pat<(v16i8 (sext (v16i1 VCCR:$pred))),
6781 (v16i8 (MVE_VPSEL (MVE_VMOVimmi8 255), (MVE_VMOVimmi8 0), ARMVCCNone, VCCR:$pred))>;
6782 def : Pat<(v8i16 (sext (v8i1 VCCR:$pred))),
6783 (v8i16 (MVE_VPSEL (MVE_VMOVimmi8 255), (MVE_VMOVimmi16 0), ARMVCCNone, VCCR:$pred))>;
6784 def : Pat<(v4i32 (sext (v4i1 VCCR:$pred))),
6785 (v4i32 (MVE_VPSEL (MVE_VMOVimmi8 255), (MVE_VMOVimmi32 0), ARMVCCNone, VCCR:$pred))>;
6787 def : Pat<(v16i8 (anyext (v16i1 VCCR:$pred))),
6788 (v16i8 (MVE_VPSEL (MVE_VMOVimmi8 1), (MVE_VMOVimmi8 0), ARMVCCNone, VCCR:$pred))>;
6789 def : Pat<(v8i16 (anyext (v8i1 VCCR:$pred))),
6790 (v8i16 (MVE_VPSEL (MVE_VMOVimmi16 1), (MVE_VMOVimmi16 0), ARMVCCNone, VCCR:$pred))>;
6791 def : Pat<(v4i32 (anyext (v4i1 VCCR:$pred))),
6792 (v4i32 (MVE_VPSEL (MVE_VMOVimmi32 1), (MVE_VMOVimmi32 0), ARMVCCNone, VCCR:$pred))>;
6795 let Predicates = [HasMVEFloat] in {
6797 // 112 is 1.0 in float
6798 def : Pat<(v4f32 (uint_to_fp (v4i1 VCCR:$pred))),
6799 (v4f32 (MVE_VPSEL (v4f32 (MVE_VMOVimmf32 112)), (v4f32 (MVE_VMOVimmi32 0)), ARMVCCNone, VCCR:$pred))>;
6800 // 2620 in 1.0 in half
6801 def : Pat<(v8f16 (uint_to_fp (v8i1 VCCR:$pred))),
6802 (v8f16 (MVE_VPSEL (v8f16 (MVE_VMOVimmi16 2620)), (v8f16 (MVE_VMOVimmi16 0)), ARMVCCNone, VCCR:$pred))>;
6803 // 240 is -1.0 in float
6804 def : Pat<(v4f32 (sint_to_fp (v4i1 VCCR:$pred))),
6805 (v4f32 (MVE_VPSEL (v4f32 (MVE_VMOVimmf32 240)), (v4f32 (MVE_VMOVimmi32 0)), ARMVCCNone, VCCR:$pred))>;
6806 // 2748 is -1.0 in half
6807 def : Pat<(v8f16 (sint_to_fp (v8i1 VCCR:$pred))),
6808 (v8f16 (MVE_VPSEL (v8f16 (MVE_VMOVimmi16 2748)), (v8f16 (MVE_VMOVimmi16 0)), ARMVCCNone, VCCR:$pred))>;
6810 def : Pat<(v4i1 (fp_to_uint (v4f32 MQPR:$v1))),
6811 (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), ZR, ARMCCne))>;
6812 def : Pat<(v8i1 (fp_to_uint (v8f16 MQPR:$v1))),
6813 (v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v1), ZR, ARMCCne))>;
6814 def : Pat<(v4i1 (fp_to_sint (v4f32 MQPR:$v1))),
6815 (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), ZR, ARMCCne))>;
6816 def : Pat<(v8i1 (fp_to_sint (v8f16 MQPR:$v1))),
6817 (v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v1), ZR, ARMCCne))>;
6820 def MVE_VPNOT : MVE_p<(outs VCCR:$P0), (ins VCCR:$P0_in), NoItinerary,
6821 "vpnot", "", "", vpred_n, "", []> {
6822 let Inst{31-0} = 0b11111110001100010000111101001101;
6823 let Unpredictable{19-17} = 0b111;
6824 let Unpredictable{12} = 0b1;
6825 let Unpredictable{7} = 0b1;
6826 let Unpredictable{5} = 0b1;
6828 let Constraints = "";
6829 let DecoderMethod = "DecodeMVEVPNOT";
6832 let Predicates = [HasMVEInt] in {
6833 def : Pat<(v4i1 (xor (v4i1 VCCR:$pred), (v4i1 (predicate_cast (i32 65535))))),
6834 (v4i1 (MVE_VPNOT (v4i1 VCCR:$pred)))>;
6835 def : Pat<(v8i1 (xor (v8i1 VCCR:$pred), (v8i1 (predicate_cast (i32 65535))))),
6836 (v8i1 (MVE_VPNOT (v8i1 VCCR:$pred)))>;
6837 def : Pat<(v16i1 (xor (v16i1 VCCR:$pred), (v16i1 (predicate_cast (i32 65535))))),
6838 (v16i1 (MVE_VPNOT (v16i1 VCCR:$pred)))>;
6842 class MVE_loltp_start<dag iops, string asm, string ops, bits<2> size>
6843 : t2LOL<(outs GPRlr:$LR), iops, asm, ops> {
6845 let Predicates = [HasMVEInt];
6847 let Inst{21-20} = size;
6848 let Inst{19-16} = Rn{3-0};
6852 class MVE_DLSTP<string asm, bits<2> size>
6853 : MVE_loltp_start<(ins rGPR:$Rn), asm, "$LR, $Rn", size> {
6855 let Inst{11-1} = 0b00000000000;
6856 let Unpredictable{10-1} = 0b1111111111;
6859 class MVE_WLSTP<string asm, bits<2> size>
6860 : MVE_loltp_start<(ins rGPR:$Rn, wlslabel_u11:$label),
6861 asm, "$LR, $Rn, $label", size> {
6864 let Inst{11} = label{0};
6865 let Inst{10-1} = label{10-1};
6867 let isTerminator = 1;
6870 def SDT_MVEMEMCPYLOOPNODE
6871 : SDTypeProfile<0, 3, [SDTCisPtrTy<0>, SDTCisPtrTy<1>, SDTCisVT<2, i32>]>;
6872 def MVE_MEMCPYLOOPNODE : SDNode<"ARMISD::MEMCPYLOOP", SDT_MVEMEMCPYLOOPNODE,
6873 [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>;
6875 let usesCustomInserter = 1, hasNoSchedulingInfo = 1, Defs = [CPSR] in {
6876 def MVE_MEMCPYLOOPINST : PseudoInst<(outs),
6877 (ins rGPR:$dst, rGPR:$src, rGPR:$sz),
6879 [(MVE_MEMCPYLOOPNODE rGPR:$dst, rGPR:$src, rGPR:$sz)]>;
6882 def SDT_MVEMEMSETLOOPNODE
6883 : SDTypeProfile<0, 3, [SDTCisPtrTy<0>, SDTCisVT<1, v16i8>, SDTCisVT<2, i32>]>;
6884 def MVE_MEMSETLOOPNODE : SDNode<"ARMISD::MEMSETLOOP", SDT_MVEMEMSETLOOPNODE,
6885 [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>;
6887 let usesCustomInserter = 1, hasNoSchedulingInfo = 1, Defs = [CPSR] in {
6888 def MVE_MEMSETLOOPINST : PseudoInst<(outs),
6889 (ins rGPR:$dst, MQPR:$src, rGPR:$sz),
6891 [(MVE_MEMSETLOOPNODE rGPR:$dst, MQPR:$src, rGPR:$sz)]>;
6894 def MVE_DLSTP_8 : MVE_DLSTP<"dlstp.8", 0b00>;
6895 def MVE_DLSTP_16 : MVE_DLSTP<"dlstp.16", 0b01>;
6896 def MVE_DLSTP_32 : MVE_DLSTP<"dlstp.32", 0b10>;
6897 def MVE_DLSTP_64 : MVE_DLSTP<"dlstp.64", 0b11>;
6899 def MVE_WLSTP_8 : MVE_WLSTP<"wlstp.8", 0b00>;
6900 def MVE_WLSTP_16 : MVE_WLSTP<"wlstp.16", 0b01>;
6901 def MVE_WLSTP_32 : MVE_WLSTP<"wlstp.32", 0b10>;
6902 def MVE_WLSTP_64 : MVE_WLSTP<"wlstp.64", 0b11>;
6904 class MVE_loltp_end<dag oops, dag iops, string asm, string ops>
6905 : t2LOL<oops, iops, asm, ops> {
6906 let Predicates = [HasMVEInt];
6907 let Inst{22-21} = 0b00;
6908 let Inst{19-16} = 0b1111;
6912 def MVE_LETP : MVE_loltp_end<(outs GPRlr:$LRout),
6913 (ins GPRlr:$LRin, lelabel_u11:$label),
6914 "letp", "$LRin, $label"> {
6918 let Inst{11} = label{0};
6919 let Inst{10-1} = label{10-1};
6921 let isTerminator = 1;
6924 def MVE_LCTP : MVE_loltp_end<(outs), (ins pred:$p), "lctp${p}", ""> {
6927 let Inst{11-1} = 0b00000000000;
6928 let Unpredictable{21-20} = 0b11;
6929 let Unpredictable{11-1} = 0b11111111111;
6933 // Pseudo instructions for lowering MQQPR and MQQQQPR stack spills and reloads.
6934 // They are equivalent to VLDMDIA/VSTMDIA with a single reg, as opposed to multiple
6937 let Predicates = [HasMVEInt], AM = AddrMode4 in {
6938 let mayStore = 1, hasSideEffects = 0 in {
6939 def MQQPRStore : t2PseudoInst<(outs), (ins MQQPR:$val, GPRnopc:$ptr),
6940 4, NoItinerary, []>;
6941 def MQQQQPRStore : t2PseudoInst<(outs), (ins MQQQQPR:$val, GPRnopc:$ptr),
6942 4, NoItinerary, []>;
6944 let mayLoad = 1, hasSideEffects = 0 in {
6945 def MQQPRLoad : t2PseudoInst<(outs MQQPR:$val), (ins GPRnopc:$ptr),
6946 4, NoItinerary, []>;
6947 def MQQQQPRLoad : t2PseudoInst<(outs MQQQQPR:$val), (ins GPRnopc:$ptr),
6948 4, NoItinerary, []>;
6953 //===----------------------------------------------------------------------===//
6955 //===----------------------------------------------------------------------===//
6957 // PatFrags for loads and stores. Often trying to keep semi-consistent names.
6959 def aligned32_pre_store : PatFrag<(ops node:$val, node:$ptr, node:$offset),
6960 (pre_store node:$val, node:$ptr, node:$offset), [{
6961 return cast<StoreSDNode>(N)->getAlignment() >= 4;
6963 def aligned32_post_store : PatFrag<(ops node:$val, node:$ptr, node:$offset),
6964 (post_store node:$val, node:$ptr, node:$offset), [{
6965 return cast<StoreSDNode>(N)->getAlignment() >= 4;
6967 def aligned16_pre_store : PatFrag<(ops node:$val, node:$ptr, node:$offset),
6968 (pre_store node:$val, node:$ptr, node:$offset), [{
6969 return cast<StoreSDNode>(N)->getAlignment() >= 2;
6971 def aligned16_post_store : PatFrag<(ops node:$val, node:$ptr, node:$offset),
6972 (post_store node:$val, node:$ptr, node:$offset), [{
6973 return cast<StoreSDNode>(N)->getAlignment() >= 2;
6977 def aligned_maskedloadvi8 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru),
6978 (masked_ld node:$ptr, undef, node:$pred, node:$passthru), [{
6979 auto *Ld = cast<MaskedLoadSDNode>(N);
6980 return Ld->getMemoryVT().getScalarType() == MVT::i8;
6982 def aligned_sextmaskedloadvi8 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru),
6983 (aligned_maskedloadvi8 node:$ptr, node:$pred, node:$passthru), [{
6984 return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::SEXTLOAD;
6986 def aligned_zextmaskedloadvi8 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru),
6987 (aligned_maskedloadvi8 node:$ptr, node:$pred, node:$passthru), [{
6988 return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::ZEXTLOAD;
6990 def aligned_extmaskedloadvi8 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru),
6991 (aligned_maskedloadvi8 node:$ptr, node:$pred, node:$passthru), [{
6992 auto *Ld = cast<MaskedLoadSDNode>(N);
6993 EVT ScalarVT = Ld->getMemoryVT().getScalarType();
6994 return ScalarVT.isInteger() && Ld->getExtensionType() == ISD::EXTLOAD;
6996 def aligned_maskedloadvi16: PatFrag<(ops node:$ptr, node:$pred, node:$passthru),
6997 (masked_ld node:$ptr, undef, node:$pred, node:$passthru), [{
6998 auto *Ld = cast<MaskedLoadSDNode>(N);
6999 EVT ScalarVT = Ld->getMemoryVT().getScalarType();
7000 return (ScalarVT == MVT::i16 || ScalarVT == MVT::f16) && Ld->getAlignment() >= 2;
7002 def aligned_sextmaskedloadvi16 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru),
7003 (aligned_maskedloadvi16 node:$ptr, node:$pred, node:$passthru), [{
7004 return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::SEXTLOAD;
7006 def aligned_zextmaskedloadvi16 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru),
7007 (aligned_maskedloadvi16 node:$ptr, node:$pred, node:$passthru), [{
7008 return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::ZEXTLOAD;
7010 def aligned_extmaskedloadvi16 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru),
7011 (aligned_maskedloadvi16 node:$ptr, node:$pred, node:$passthru), [{
7012 auto *Ld = cast<MaskedLoadSDNode>(N);
7013 EVT ScalarVT = Ld->getMemoryVT().getScalarType();
7014 return ScalarVT.isInteger() && Ld->getExtensionType() == ISD::EXTLOAD;
7016 def aligned_maskedloadvi32: PatFrag<(ops node:$ptr, node:$pred, node:$passthru),
7017 (masked_ld node:$ptr, undef, node:$pred, node:$passthru), [{
7018 auto *Ld = cast<MaskedLoadSDNode>(N);
7019 EVT ScalarVT = Ld->getMemoryVT().getScalarType();
7020 return (ScalarVT == MVT::i32 || ScalarVT == MVT::f32) && Ld->getAlignment() >= 4;
7023 def aligned_maskedstvi8 : PatFrag<(ops node:$val, node:$ptr, node:$pred),
7024 (masked_st node:$val, node:$ptr, undef, node:$pred), [{
7025 return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
7027 def aligned_maskedstvi16 : PatFrag<(ops node:$val, node:$ptr, node:$pred),
7028 (masked_st node:$val, node:$ptr, undef, node:$pred), [{
7029 auto *St = cast<MaskedStoreSDNode>(N);
7030 EVT ScalarVT = St->getMemoryVT().getScalarType();
7031 return (ScalarVT == MVT::i16 || ScalarVT == MVT::f16) && St->getAlignment() >= 2;
7033 def aligned_maskedstvi32 : PatFrag<(ops node:$val, node:$ptr, node:$pred),
7034 (masked_st node:$val, node:$ptr, undef, node:$pred), [{
7035 auto *St = cast<MaskedStoreSDNode>(N);
7036 EVT ScalarVT = St->getMemoryVT().getScalarType();
7037 return (ScalarVT == MVT::i32 || ScalarVT == MVT::f32) && St->getAlignment() >= 4;
7040 def pre_maskedstore : PatFrag<(ops node:$val, node:$base, node:$offset, node:$mask),
7041 (masked_st node:$val, node:$base, node:$offset, node:$mask), [{
7042 ISD::MemIndexedMode AM = cast<MaskedStoreSDNode>(N)->getAddressingMode();
7043 return AM == ISD::PRE_INC || AM == ISD::PRE_DEC;
7045 def post_maskedstore : PatFrag<(ops node:$val, node:$base, node:$offset, node:$mask),
7046 (masked_st node:$val, node:$base, node:$offset, node:$mask), [{
7047 ISD::MemIndexedMode AM = cast<MaskedStoreSDNode>(N)->getAddressingMode();
7048 return AM == ISD::POST_INC || AM == ISD::POST_DEC;
7050 def aligned_pre_maskedstorevi8 : PatFrag<(ops node:$val, node:$ptr, node:$offset, node:$mask),
7051 (pre_maskedstore node:$val, node:$ptr, node:$offset, node:$mask), [{
7052 return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
7054 def aligned_post_maskedstorevi8 : PatFrag<(ops node:$val, node:$ptr, node:$offset, node:$mask),
7055 (post_maskedstore node:$val, node:$ptr, node:$offset, node:$mask), [{
7056 return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
7058 def aligned_pre_maskedstorevi16 : PatFrag<(ops node:$val, node:$ptr, node:$offset, node:$mask),
7059 (pre_maskedstore node:$val, node:$ptr, node:$offset, node:$mask), [{
7060 auto *St = cast<MaskedStoreSDNode>(N);
7061 EVT ScalarVT = St->getMemoryVT().getScalarType();
7062 return (ScalarVT == MVT::i16 || ScalarVT == MVT::f16) && St->getAlignment() >= 2;
7064 def aligned_post_maskedstorevi16 : PatFrag<(ops node:$val, node:$ptr, node:$offset, node:$mask),
7065 (post_maskedstore node:$val, node:$ptr, node:$offset, node:$mask), [{
7066 auto *St = cast<MaskedStoreSDNode>(N);
7067 EVT ScalarVT = St->getMemoryVT().getScalarType();
7068 return (ScalarVT == MVT::i16 || ScalarVT == MVT::f16) && St->getAlignment() >= 2;
7070 def aligned_pre_maskedstorevi32 : PatFrag<(ops node:$val, node:$ptr, node:$offset, node:$mask),
7071 (pre_maskedstore node:$val, node:$ptr, node:$offset, node:$mask), [{
7072 auto *St = cast<MaskedStoreSDNode>(N);
7073 EVT ScalarVT = St->getMemoryVT().getScalarType();
7074 return (ScalarVT == MVT::i32 || ScalarVT == MVT::f32) && St->getAlignment() >= 4;
7076 def aligned_post_maskedstorevi32 : PatFrag<(ops node:$val, node:$ptr, node:$offset, node:$mask),
7077 (post_maskedstore node:$val, node:$ptr, node:$offset, node:$mask), [{
7078 auto *St = cast<MaskedStoreSDNode>(N);
7079 EVT ScalarVT = St->getMemoryVT().getScalarType();
7080 return (ScalarVT == MVT::i32 || ScalarVT == MVT::f32) && St->getAlignment() >= 4;
7084 // PatFrags for "Aligned" extending / truncating
7086 def aligned_extloadvi8 : PatFrag<(ops node:$ptr), (extloadvi8 node:$ptr)>;
7087 def aligned_sextloadvi8 : PatFrag<(ops node:$ptr), (sextloadvi8 node:$ptr)>;
7088 def aligned_zextloadvi8 : PatFrag<(ops node:$ptr), (zextloadvi8 node:$ptr)>;
7090 def aligned_truncstvi8 : PatFrag<(ops node:$val, node:$ptr),
7091 (truncstorevi8 node:$val, node:$ptr)>;
7092 def aligned_post_truncstvi8 : PatFrag<(ops node:$val, node:$base, node:$offset),
7093 (post_truncstvi8 node:$val, node:$base, node:$offset)>;
7094 def aligned_pre_truncstvi8 : PatFrag<(ops node:$val, node:$base, node:$offset),
7095 (pre_truncstvi8 node:$val, node:$base, node:$offset)>;
7097 let MinAlignment = 2 in {
7098 def aligned_extloadvi16 : PatFrag<(ops node:$ptr), (extloadvi16 node:$ptr)>;
7099 def aligned_sextloadvi16 : PatFrag<(ops node:$ptr), (sextloadvi16 node:$ptr)>;
7100 def aligned_zextloadvi16 : PatFrag<(ops node:$ptr), (zextloadvi16 node:$ptr)>;
7102 def aligned_truncstvi16 : PatFrag<(ops node:$val, node:$ptr),
7103 (truncstorevi16 node:$val, node:$ptr)>;
7104 def aligned_post_truncstvi16 : PatFrag<(ops node:$val, node:$base, node:$offset),
7105 (post_truncstvi16 node:$val, node:$base, node:$offset)>;
7106 def aligned_pre_truncstvi16 : PatFrag<(ops node:$val, node:$base, node:$offset),
7107 (pre_truncstvi16 node:$val, node:$base, node:$offset)>;
7110 def truncmaskedst : PatFrag<(ops node:$val, node:$base, node:$pred),
7111 (masked_st node:$val, node:$base, undef, node:$pred), [{
7112 return cast<MaskedStoreSDNode>(N)->isTruncatingStore();
7114 def aligned_truncmaskedstvi8 : PatFrag<(ops node:$val, node:$base, node:$pred),
7115 (truncmaskedst node:$val, node:$base, node:$pred), [{
7116 return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
7118 def aligned_truncmaskedstvi16 : PatFrag<(ops node:$val, node:$base, node:$pred),
7119 (truncmaskedst node:$val, node:$base, node:$pred), [{
7120 auto *St = cast<MaskedStoreSDNode>(N);
7121 EVT ScalarVT = St->getMemoryVT().getScalarType();
7122 return (ScalarVT == MVT::i16 || ScalarVT == MVT::f16) && St->getAlignment() >= 2;
7124 def pre_truncmaskedst : PatFrag<(ops node:$val, node:$base, node:$offset, node:$pred),
7125 (masked_st node:$val, node:$base, node:$offset, node:$pred), [{
7126 ISD::MemIndexedMode AM = cast<MaskedStoreSDNode>(N)->getAddressingMode();
7127 return cast<MaskedStoreSDNode>(N)->isTruncatingStore() && (AM == ISD::PRE_INC || AM == ISD::PRE_DEC);
7129 def aligned_pre_truncmaskedstvi8 : PatFrag<(ops node:$val, node:$base, node:$offset, node:$pred),
7130 (pre_truncmaskedst node:$val, node:$base, node:$offset, node:$pred), [{
7131 return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
7133 def aligned_pre_truncmaskedstvi16 : PatFrag<(ops node:$val, node:$base, node:$offset, node:$pred),
7134 (pre_truncmaskedst node:$val, node:$base, node:$offset, node:$pred), [{
7135 auto *St = cast<MaskedStoreSDNode>(N);
7136 EVT ScalarVT = St->getMemoryVT().getScalarType();
7137 return (ScalarVT == MVT::i16 || ScalarVT == MVT::f16) && St->getAlignment() >= 2;
7139 def post_truncmaskedst : PatFrag<(ops node:$val, node:$base, node:$offset, node:$postd),
7140 (masked_st node:$val, node:$base, node:$offset, node:$postd), [{
7141 ISD::MemIndexedMode AM = cast<MaskedStoreSDNode>(N)->getAddressingMode();
7142 return cast<MaskedStoreSDNode>(N)->isTruncatingStore() && (AM == ISD::POST_INC || AM == ISD::POST_DEC);
7144 def aligned_post_truncmaskedstvi8 : PatFrag<(ops node:$val, node:$base, node:$offset, node:$postd),
7145 (post_truncmaskedst node:$val, node:$base, node:$offset, node:$postd), [{
7146 return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
7148 def aligned_post_truncmaskedstvi16 : PatFrag<(ops node:$val, node:$base, node:$offset, node:$postd),
7149 (post_truncmaskedst node:$val, node:$base, node:$offset, node:$postd), [{
7150 auto *St = cast<MaskedStoreSDNode>(N);
7151 EVT ScalarVT = St->getMemoryVT().getScalarType();
7152 return (ScalarVT == MVT::i16 || ScalarVT == MVT::f16) && St->getAlignment() >= 2;
7155 // Load/store patterns
7157 class MVE_vector_store_typed<ValueType Ty, Instruction RegImmInst,
7158 PatFrag StoreKind, int shift>
7159 : Pat<(StoreKind (Ty MQPR:$val), t2addrmode_imm7<shift>:$addr),
7160 (RegImmInst (Ty MQPR:$val), t2addrmode_imm7<shift>:$addr)>;
7162 class MVE_vector_maskedstore_typed<ValueType Ty, Instruction RegImmInst,
7163 PatFrag StoreKind, int shift>
7164 : Pat<(StoreKind (Ty MQPR:$val), t2addrmode_imm7<shift>:$addr, VCCR:$pred),
7165 (RegImmInst (Ty MQPR:$val), t2addrmode_imm7<shift>:$addr, ARMVCCThen, VCCR:$pred)>;
7167 multiclass MVE_vector_store<Instruction RegImmInst, PatFrag StoreKind,
7169 def : MVE_vector_store_typed<v16i8, RegImmInst, StoreKind, shift>;
7170 def : MVE_vector_store_typed<v8i16, RegImmInst, StoreKind, shift>;
7171 def : MVE_vector_store_typed<v8f16, RegImmInst, StoreKind, shift>;
7172 def : MVE_vector_store_typed<v4i32, RegImmInst, StoreKind, shift>;
7173 def : MVE_vector_store_typed<v4f32, RegImmInst, StoreKind, shift>;
7174 def : MVE_vector_store_typed<v2i64, RegImmInst, StoreKind, shift>;
7175 def : MVE_vector_store_typed<v2f64, RegImmInst, StoreKind, shift>;
7178 class MVE_vector_load_typed<ValueType Ty, Instruction RegImmInst,
7179 PatFrag LoadKind, int shift>
7180 : Pat<(Ty (LoadKind t2addrmode_imm7<shift>:$addr)),
7181 (Ty (RegImmInst t2addrmode_imm7<shift>:$addr))>;
7183 class MVE_vector_maskedload_typed<ValueType Ty, Instruction RegImmInst,
7184 PatFrag LoadKind, int shift>
7185 : Pat<(Ty (LoadKind t2addrmode_imm7<shift>:$addr, VCCR:$pred, (Ty (ARMvmovImm (i32 0))))),
7186 (Ty (RegImmInst t2addrmode_imm7<shift>:$addr, ARMVCCThen, VCCR:$pred))>;
7188 multiclass MVE_vector_load<Instruction RegImmInst, PatFrag LoadKind,
7190 def : MVE_vector_load_typed<v16i8, RegImmInst, LoadKind, shift>;
7191 def : MVE_vector_load_typed<v8i16, RegImmInst, LoadKind, shift>;
7192 def : MVE_vector_load_typed<v8f16, RegImmInst, LoadKind, shift>;
7193 def : MVE_vector_load_typed<v4i32, RegImmInst, LoadKind, shift>;
7194 def : MVE_vector_load_typed<v4f32, RegImmInst, LoadKind, shift>;
7195 def : MVE_vector_load_typed<v2i64, RegImmInst, LoadKind, shift>;
7196 def : MVE_vector_load_typed<v2f64, RegImmInst, LoadKind, shift>;
7199 class MVE_vector_offset_store_typed<ValueType Ty, Instruction Opcode,
7200 PatFrag StoreKind, int shift>
7201 : Pat<(StoreKind (Ty MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<shift>:$addr),
7202 (Opcode MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<shift>:$addr)>;
7204 class MVE_vector_offset_maskedstore_typed<ValueType Ty, Instruction Opcode,
7205 PatFrag StoreKind, int shift>
7206 : Pat<(StoreKind (Ty MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<shift>:$addr, VCCR:$pred),
7207 (Opcode MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<shift>:$addr, ARMVCCThen, VCCR:$pred)>;
7209 multiclass MVE_vector_offset_store<Instruction RegImmInst, PatFrag StoreKind,
7211 def : MVE_vector_offset_store_typed<v16i8, RegImmInst, StoreKind, shift>;
7212 def : MVE_vector_offset_store_typed<v8i16, RegImmInst, StoreKind, shift>;
7213 def : MVE_vector_offset_store_typed<v8f16, RegImmInst, StoreKind, shift>;
7214 def : MVE_vector_offset_store_typed<v4i32, RegImmInst, StoreKind, shift>;
7215 def : MVE_vector_offset_store_typed<v4f32, RegImmInst, StoreKind, shift>;
7216 def : MVE_vector_offset_store_typed<v2i64, RegImmInst, StoreKind, shift>;
7217 def : MVE_vector_offset_store_typed<v2f64, RegImmInst, StoreKind, shift>;
7221 let Predicates = [HasMVEInt, IsLE] in {
7223 defm : MVE_vector_store<MVE_VSTRBU8, byte_alignedstore, 0>;
7224 defm : MVE_vector_store<MVE_VSTRHU16, hword_alignedstore, 1>;
7225 defm : MVE_vector_store<MVE_VSTRWU32, alignedstore32, 2>;
7228 defm : MVE_vector_load<MVE_VLDRBU8, byte_alignedload, 0>;
7229 defm : MVE_vector_load<MVE_VLDRHU16, hword_alignedload, 1>;
7230 defm : MVE_vector_load<MVE_VLDRWU32, alignedload32, 2>;
7232 // Pre/post inc stores
7233 defm : MVE_vector_offset_store<MVE_VSTRBU8_pre, pre_store, 0>;
7234 defm : MVE_vector_offset_store<MVE_VSTRBU8_post, post_store, 0>;
7235 defm : MVE_vector_offset_store<MVE_VSTRHU16_pre, aligned16_pre_store, 1>;
7236 defm : MVE_vector_offset_store<MVE_VSTRHU16_post, aligned16_post_store, 1>;
7237 defm : MVE_vector_offset_store<MVE_VSTRWU32_pre, aligned32_pre_store, 2>;
7238 defm : MVE_vector_offset_store<MVE_VSTRWU32_post, aligned32_post_store, 2>;
7241 let Predicates = [HasMVEInt, IsBE] in {
7243 def : MVE_vector_store_typed<v16i8, MVE_VSTRBU8, store, 0>;
7244 def : MVE_vector_store_typed<v8i16, MVE_VSTRHU16, alignedstore16, 1>;
7245 def : MVE_vector_store_typed<v8f16, MVE_VSTRHU16, alignedstore16, 1>;
7246 def : MVE_vector_store_typed<v4i32, MVE_VSTRWU32, alignedstore32, 2>;
7247 def : MVE_vector_store_typed<v4f32, MVE_VSTRWU32, alignedstore32, 2>;
7250 def : MVE_vector_load_typed<v16i8, MVE_VLDRBU8, load, 0>;
7251 def : MVE_vector_load_typed<v8i16, MVE_VLDRHU16, alignedload16, 1>;
7252 def : MVE_vector_load_typed<v8f16, MVE_VLDRHU16, alignedload16, 1>;
7253 def : MVE_vector_load_typed<v4i32, MVE_VLDRWU32, alignedload32, 2>;
7254 def : MVE_vector_load_typed<v4f32, MVE_VLDRWU32, alignedload32, 2>;
7256 // Other unaligned loads/stores need to go though a VREV
7257 def : Pat<(v2f64 (load t2addrmode_imm7<0>:$addr)),
7258 (v2f64 (MVE_VREV64_8 (MVE_VLDRBU8 t2addrmode_imm7<0>:$addr)))>;
7259 def : Pat<(v2i64 (load t2addrmode_imm7<0>:$addr)),
7260 (v2i64 (MVE_VREV64_8 (MVE_VLDRBU8 t2addrmode_imm7<0>:$addr)))>;
7261 def : Pat<(v4i32 (load t2addrmode_imm7<0>:$addr)),
7262 (v4i32 (MVE_VREV32_8 (MVE_VLDRBU8 t2addrmode_imm7<0>:$addr)))>;
7263 def : Pat<(v4f32 (load t2addrmode_imm7<0>:$addr)),
7264 (v4f32 (MVE_VREV32_8 (MVE_VLDRBU8 t2addrmode_imm7<0>:$addr)))>;
7265 def : Pat<(v8i16 (load t2addrmode_imm7<0>:$addr)),
7266 (v8i16 (MVE_VREV16_8 (MVE_VLDRBU8 t2addrmode_imm7<0>:$addr)))>;
7267 def : Pat<(v8f16 (load t2addrmode_imm7<0>:$addr)),
7268 (v8f16 (MVE_VREV16_8 (MVE_VLDRBU8 t2addrmode_imm7<0>:$addr)))>;
7269 def : Pat<(store (v2f64 MQPR:$val), t2addrmode_imm7<0>:$addr),
7270 (MVE_VSTRBU8 (MVE_VREV64_8 MQPR:$val), t2addrmode_imm7<0>:$addr)>;
7271 def : Pat<(store (v2i64 MQPR:$val), t2addrmode_imm7<0>:$addr),
7272 (MVE_VSTRBU8 (MVE_VREV64_8 MQPR:$val), t2addrmode_imm7<0>:$addr)>;
7273 def : Pat<(store (v4i32 MQPR:$val), t2addrmode_imm7<0>:$addr),
7274 (MVE_VSTRBU8 (MVE_VREV32_8 MQPR:$val), t2addrmode_imm7<0>:$addr)>;
7275 def : Pat<(store (v4f32 MQPR:$val), t2addrmode_imm7<0>:$addr),
7276 (MVE_VSTRBU8 (MVE_VREV32_8 MQPR:$val), t2addrmode_imm7<0>:$addr)>;
7277 def : Pat<(store (v8i16 MQPR:$val), t2addrmode_imm7<0>:$addr),
7278 (MVE_VSTRBU8 (MVE_VREV16_8 MQPR:$val), t2addrmode_imm7<0>:$addr)>;
7279 def : Pat<(store (v8f16 MQPR:$val), t2addrmode_imm7<0>:$addr),
7280 (MVE_VSTRBU8 (MVE_VREV16_8 MQPR:$val), t2addrmode_imm7<0>:$addr)>;
7282 // Pre/Post inc stores
7283 def : MVE_vector_offset_store_typed<v16i8, MVE_VSTRBU8_pre, pre_store, 0>;
7284 def : MVE_vector_offset_store_typed<v16i8, MVE_VSTRBU8_post, post_store, 0>;
7285 def : MVE_vector_offset_store_typed<v8i16, MVE_VSTRHU16_pre, aligned16_pre_store, 1>;
7286 def : MVE_vector_offset_store_typed<v8i16, MVE_VSTRHU16_post, aligned16_post_store, 1>;
7287 def : MVE_vector_offset_store_typed<v8f16, MVE_VSTRHU16_pre, aligned16_pre_store, 1>;
7288 def : MVE_vector_offset_store_typed<v8f16, MVE_VSTRHU16_post, aligned16_post_store, 1>;
7289 def : MVE_vector_offset_store_typed<v4i32, MVE_VSTRWU32_pre, aligned32_pre_store, 2>;
7290 def : MVE_vector_offset_store_typed<v4i32, MVE_VSTRWU32_post, aligned32_post_store, 2>;
7291 def : MVE_vector_offset_store_typed<v4f32, MVE_VSTRWU32_pre, aligned32_pre_store, 2>;
7292 def : MVE_vector_offset_store_typed<v4f32, MVE_VSTRWU32_post, aligned32_post_store, 2>;
7295 let Predicates = [HasMVEInt] in {
7296 // Aligned masked store, shared between LE and BE
7297 def : MVE_vector_maskedstore_typed<v16i8, MVE_VSTRBU8, aligned_maskedstvi8, 0>;
7298 def : MVE_vector_maskedstore_typed<v8i16, MVE_VSTRHU16, aligned_maskedstvi16, 1>;
7299 def : MVE_vector_maskedstore_typed<v8f16, MVE_VSTRHU16, aligned_maskedstvi16, 1>;
7300 def : MVE_vector_maskedstore_typed<v4i32, MVE_VSTRWU32, aligned_maskedstvi32, 2>;
7301 def : MVE_vector_maskedstore_typed<v4f32, MVE_VSTRWU32, aligned_maskedstvi32, 2>;
7303 // Pre/Post inc masked stores
7304 def : MVE_vector_offset_maskedstore_typed<v16i8, MVE_VSTRBU8_pre, aligned_pre_maskedstorevi8, 0>;
7305 def : MVE_vector_offset_maskedstore_typed<v16i8, MVE_VSTRBU8_post, aligned_post_maskedstorevi8, 0>;
7306 def : MVE_vector_offset_maskedstore_typed<v8i16, MVE_VSTRHU16_pre, aligned_pre_maskedstorevi16, 1>;
7307 def : MVE_vector_offset_maskedstore_typed<v8i16, MVE_VSTRHU16_post, aligned_post_maskedstorevi16, 1>;
7308 def : MVE_vector_offset_maskedstore_typed<v8f16, MVE_VSTRHU16_pre, aligned_pre_maskedstorevi16, 1>;
7309 def : MVE_vector_offset_maskedstore_typed<v8f16, MVE_VSTRHU16_post, aligned_post_maskedstorevi16, 1>;
7310 def : MVE_vector_offset_maskedstore_typed<v4i32, MVE_VSTRWU32_pre, aligned_pre_maskedstorevi32, 2>;
7311 def : MVE_vector_offset_maskedstore_typed<v4i32, MVE_VSTRWU32_post, aligned_post_maskedstorevi32, 2>;
7312 def : MVE_vector_offset_maskedstore_typed<v4f32, MVE_VSTRWU32_pre, aligned_pre_maskedstorevi32, 2>;
7313 def : MVE_vector_offset_maskedstore_typed<v4f32, MVE_VSTRWU32_post, aligned_post_maskedstorevi32, 2>;
7315 // Aligned masked loads
7316 def : MVE_vector_maskedload_typed<v16i8, MVE_VLDRBU8, aligned_maskedloadvi8, 0>;
7317 def : MVE_vector_maskedload_typed<v8i16, MVE_VLDRHU16, aligned_maskedloadvi16, 1>;
7318 def : MVE_vector_maskedload_typed<v8f16, MVE_VLDRHU16, aligned_maskedloadvi16, 1>;
7319 def : MVE_vector_maskedload_typed<v4i32, MVE_VLDRWU32, aligned_maskedloadvi32, 2>;
7320 def : MVE_vector_maskedload_typed<v4f32, MVE_VLDRWU32, aligned_maskedloadvi32, 2>;
7323 // Widening/Narrowing Loads/Stores
7325 multiclass MVEExtLoadStore<Instruction LoadSInst, Instruction LoadUInst, string StoreInst,
7326 string Amble, ValueType VT, int Shift> {
7328 def : Pat<(!cast<PatFrag>("aligned_truncst"#Amble) (VT MQPR:$val), taddrmode_imm7<Shift>:$addr),
7329 (!cast<Instruction>(StoreInst) MQPR:$val, taddrmode_imm7<Shift>:$addr)>;
7330 def : Pat<(!cast<PatFrag>("aligned_post_truncst"#Amble) (VT MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<Shift>:$addr),
7331 (!cast<Instruction>(StoreInst#"_post") MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<Shift>:$addr)>;
7332 def : Pat<(!cast<PatFrag>("aligned_pre_truncst"#Amble) (VT MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<Shift>:$addr),
7333 (!cast<Instruction>(StoreInst#"_pre") MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<Shift>:$addr)>;
7335 // Masked trunc stores
7336 def : Pat<(!cast<PatFrag>("aligned_truncmaskedst"#Amble) (VT MQPR:$val), taddrmode_imm7<Shift>:$addr, VCCR:$pred),
7337 (!cast<Instruction>(StoreInst) MQPR:$val, taddrmode_imm7<Shift>:$addr, ARMVCCThen, VCCR:$pred)>;
7338 def : Pat<(!cast<PatFrag>("aligned_post_truncmaskedst"#Amble) (VT MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<Shift>:$addr, VCCR:$pred),
7339 (!cast<Instruction>(StoreInst#"_post") MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<Shift>:$addr, ARMVCCThen, VCCR:$pred)>;
7340 def : Pat<(!cast<PatFrag>("aligned_pre_truncmaskedst"#Amble) (VT MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<Shift>:$addr, VCCR:$pred),
7341 (!cast<Instruction>(StoreInst#"_pre") MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<Shift>:$addr, ARMVCCThen, VCCR:$pred)>;
7344 def : Pat<(VT (!cast<PatFrag>("aligned_extload"#Amble) taddrmode_imm7<Shift>:$addr)),
7345 (VT (LoadUInst taddrmode_imm7<Shift>:$addr))>;
7346 def : Pat<(VT (!cast<PatFrag>("aligned_sextload"#Amble) taddrmode_imm7<Shift>:$addr)),
7347 (VT (LoadSInst taddrmode_imm7<Shift>:$addr))>;
7348 def : Pat<(VT (!cast<PatFrag>("aligned_zextload"#Amble) taddrmode_imm7<Shift>:$addr)),
7349 (VT (LoadUInst taddrmode_imm7<Shift>:$addr))>;
7352 def : Pat<(VT (!cast<PatFrag>("aligned_extmaskedload"#Amble) taddrmode_imm7<Shift>:$addr, VCCR:$pred, (VT (ARMvmovImm (i32 0))))),
7353 (VT (LoadUInst taddrmode_imm7<Shift>:$addr, ARMVCCThen, VCCR:$pred))>;
7354 def : Pat<(VT (!cast<PatFrag>("aligned_sextmaskedload"#Amble) taddrmode_imm7<Shift>:$addr, VCCR:$pred, (VT (ARMvmovImm (i32 0))))),
7355 (VT (LoadSInst taddrmode_imm7<Shift>:$addr, ARMVCCThen, VCCR:$pred))>;
7356 def : Pat<(VT (!cast<PatFrag>("aligned_zextmaskedload"#Amble) taddrmode_imm7<Shift>:$addr, VCCR:$pred, (VT (ARMvmovImm (i32 0))))),
7357 (VT (LoadUInst taddrmode_imm7<Shift>:$addr, ARMVCCThen, VCCR:$pred))>;
7360 let Predicates = [HasMVEInt] in {
7361 defm : MVEExtLoadStore<MVE_VLDRBS16, MVE_VLDRBU16, "MVE_VSTRB16", "vi8", v8i16, 0>;
7362 defm : MVEExtLoadStore<MVE_VLDRBS32, MVE_VLDRBU32, "MVE_VSTRB32", "vi8", v4i32, 0>;
7363 defm : MVEExtLoadStore<MVE_VLDRHS32, MVE_VLDRHU32, "MVE_VSTRH32", "vi16", v4i32, 1>;
7367 // Bit convert patterns
7369 let Predicates = [HasMVEInt] in {
7370 def : Pat<(v2f64 (bitconvert (v2i64 MQPR:$src))), (v2f64 MQPR:$src)>;
7371 def : Pat<(v2i64 (bitconvert (v2f64 MQPR:$src))), (v2i64 MQPR:$src)>;
7373 def : Pat<(v4i32 (bitconvert (v4f32 MQPR:$src))), (v4i32 MQPR:$src)>;
7374 def : Pat<(v4f32 (bitconvert (v4i32 MQPR:$src))), (v4f32 MQPR:$src)>;
7376 def : Pat<(v8i16 (bitconvert (v8f16 MQPR:$src))), (v8i16 MQPR:$src)>;
7377 def : Pat<(v8f16 (bitconvert (v8i16 MQPR:$src))), (v8f16 MQPR:$src)>;
7380 let Predicates = [IsLE,HasMVEInt] in {
7381 def : Pat<(v2f64 (bitconvert (v4f32 MQPR:$src))), (v2f64 MQPR:$src)>;
7382 def : Pat<(v2f64 (bitconvert (v4i32 MQPR:$src))), (v2f64 MQPR:$src)>;
7383 def : Pat<(v2f64 (bitconvert (v8f16 MQPR:$src))), (v2f64 MQPR:$src)>;
7384 def : Pat<(v2f64 (bitconvert (v8i16 MQPR:$src))), (v2f64 MQPR:$src)>;
7385 def : Pat<(v2f64 (bitconvert (v16i8 MQPR:$src))), (v2f64 MQPR:$src)>;
7387 def : Pat<(v2i64 (bitconvert (v4f32 MQPR:$src))), (v2i64 MQPR:$src)>;
7388 def : Pat<(v2i64 (bitconvert (v4i32 MQPR:$src))), (v2i64 MQPR:$src)>;
7389 def : Pat<(v2i64 (bitconvert (v8f16 MQPR:$src))), (v2i64 MQPR:$src)>;
7390 def : Pat<(v2i64 (bitconvert (v8i16 MQPR:$src))), (v2i64 MQPR:$src)>;
7391 def : Pat<(v2i64 (bitconvert (v16i8 MQPR:$src))), (v2i64 MQPR:$src)>;
7393 def : Pat<(v4f32 (bitconvert (v2f64 MQPR:$src))), (v4f32 MQPR:$src)>;
7394 def : Pat<(v4f32 (bitconvert (v2i64 MQPR:$src))), (v4f32 MQPR:$src)>;
7395 def : Pat<(v4f32 (bitconvert (v8f16 MQPR:$src))), (v4f32 MQPR:$src)>;
7396 def : Pat<(v4f32 (bitconvert (v8i16 MQPR:$src))), (v4f32 MQPR:$src)>;
7397 def : Pat<(v4f32 (bitconvert (v16i8 MQPR:$src))), (v4f32 MQPR:$src)>;
7399 def : Pat<(v4i32 (bitconvert (v2f64 MQPR:$src))), (v4i32 MQPR:$src)>;
7400 def : Pat<(v4i32 (bitconvert (v2i64 MQPR:$src))), (v4i32 MQPR:$src)>;
7401 def : Pat<(v4i32 (bitconvert (v8f16 MQPR:$src))), (v4i32 MQPR:$src)>;
7402 def : Pat<(v4i32 (bitconvert (v8i16 MQPR:$src))), (v4i32 MQPR:$src)>;
7403 def : Pat<(v4i32 (bitconvert (v16i8 MQPR:$src))), (v4i32 MQPR:$src)>;
7405 def : Pat<(v8f16 (bitconvert (v2f64 MQPR:$src))), (v8f16 MQPR:$src)>;
7406 def : Pat<(v8f16 (bitconvert (v2i64 MQPR:$src))), (v8f16 MQPR:$src)>;
7407 def : Pat<(v8f16 (bitconvert (v4f32 MQPR:$src))), (v8f16 MQPR:$src)>;
7408 def : Pat<(v8f16 (bitconvert (v4i32 MQPR:$src))), (v8f16 MQPR:$src)>;
7409 def : Pat<(v8f16 (bitconvert (v16i8 MQPR:$src))), (v8f16 MQPR:$src)>;
7411 def : Pat<(v8i16 (bitconvert (v2f64 MQPR:$src))), (v8i16 MQPR:$src)>;
7412 def : Pat<(v8i16 (bitconvert (v2i64 MQPR:$src))), (v8i16 MQPR:$src)>;
7413 def : Pat<(v8i16 (bitconvert (v4f32 MQPR:$src))), (v8i16 MQPR:$src)>;
7414 def : Pat<(v8i16 (bitconvert (v4i32 MQPR:$src))), (v8i16 MQPR:$src)>;
7415 def : Pat<(v8i16 (bitconvert (v16i8 MQPR:$src))), (v8i16 MQPR:$src)>;
7417 def : Pat<(v16i8 (bitconvert (v2f64 MQPR:$src))), (v16i8 MQPR:$src)>;
7418 def : Pat<(v16i8 (bitconvert (v2i64 MQPR:$src))), (v16i8 MQPR:$src)>;
7419 def : Pat<(v16i8 (bitconvert (v4f32 MQPR:$src))), (v16i8 MQPR:$src)>;
7420 def : Pat<(v16i8 (bitconvert (v4i32 MQPR:$src))), (v16i8 MQPR:$src)>;
7421 def : Pat<(v16i8 (bitconvert (v8f16 MQPR:$src))), (v16i8 MQPR:$src)>;
7422 def : Pat<(v16i8 (bitconvert (v8i16 MQPR:$src))), (v16i8 MQPR:$src)>;
7425 let Predicates = [IsBE,HasMVEInt] in {
7426 def : Pat<(v2f64 (bitconvert (v4f32 MQPR:$src))), (v2f64 (MVE_VREV64_32 MQPR:$src))>;
7427 def : Pat<(v2f64 (bitconvert (v4i32 MQPR:$src))), (v2f64 (MVE_VREV64_32 MQPR:$src))>;
7428 def : Pat<(v2f64 (bitconvert (v8f16 MQPR:$src))), (v2f64 (MVE_VREV64_16 MQPR:$src))>;
7429 def : Pat<(v2f64 (bitconvert (v8i16 MQPR:$src))), (v2f64 (MVE_VREV64_16 MQPR:$src))>;
7430 def : Pat<(v2f64 (bitconvert (v16i8 MQPR:$src))), (v2f64 (MVE_VREV64_8 MQPR:$src))>;
7432 def : Pat<(v2i64 (bitconvert (v4f32 MQPR:$src))), (v2i64 (MVE_VREV64_32 MQPR:$src))>;
7433 def : Pat<(v2i64 (bitconvert (v4i32 MQPR:$src))), (v2i64 (MVE_VREV64_32 MQPR:$src))>;
7434 def : Pat<(v2i64 (bitconvert (v8f16 MQPR:$src))), (v2i64 (MVE_VREV64_16 MQPR:$src))>;
7435 def : Pat<(v2i64 (bitconvert (v8i16 MQPR:$src))), (v2i64 (MVE_VREV64_16 MQPR:$src))>;
7436 def : Pat<(v2i64 (bitconvert (v16i8 MQPR:$src))), (v2i64 (MVE_VREV64_8 MQPR:$src))>;
7438 def : Pat<(v4f32 (bitconvert (v2f64 MQPR:$src))), (v4f32 (MVE_VREV64_32 MQPR:$src))>;
7439 def : Pat<(v4f32 (bitconvert (v2i64 MQPR:$src))), (v4f32 (MVE_VREV64_32 MQPR:$src))>;
7440 def : Pat<(v4f32 (bitconvert (v8f16 MQPR:$src))), (v4f32 (MVE_VREV32_16 MQPR:$src))>;
7441 def : Pat<(v4f32 (bitconvert (v8i16 MQPR:$src))), (v4f32 (MVE_VREV32_16 MQPR:$src))>;
7442 def : Pat<(v4f32 (bitconvert (v16i8 MQPR:$src))), (v4f32 (MVE_VREV32_8 MQPR:$src))>;
7444 def : Pat<(v4i32 (bitconvert (v2f64 MQPR:$src))), (v4i32 (MVE_VREV64_32 MQPR:$src))>;
7445 def : Pat<(v4i32 (bitconvert (v2i64 MQPR:$src))), (v4i32 (MVE_VREV64_32 MQPR:$src))>;
7446 def : Pat<(v4i32 (bitconvert (v8f16 MQPR:$src))), (v4i32 (MVE_VREV32_16 MQPR:$src))>;
7447 def : Pat<(v4i32 (bitconvert (v8i16 MQPR:$src))), (v4i32 (MVE_VREV32_16 MQPR:$src))>;
7448 def : Pat<(v4i32 (bitconvert (v16i8 MQPR:$src))), (v4i32 (MVE_VREV32_8 MQPR:$src))>;
7450 def : Pat<(v8f16 (bitconvert (v2f64 MQPR:$src))), (v8f16 (MVE_VREV64_16 MQPR:$src))>;
7451 def : Pat<(v8f16 (bitconvert (v2i64 MQPR:$src))), (v8f16 (MVE_VREV64_16 MQPR:$src))>;
7452 def : Pat<(v8f16 (bitconvert (v4f32 MQPR:$src))), (v8f16 (MVE_VREV32_16 MQPR:$src))>;
7453 def : Pat<(v8f16 (bitconvert (v4i32 MQPR:$src))), (v8f16 (MVE_VREV32_16 MQPR:$src))>;
7454 def : Pat<(v8f16 (bitconvert (v16i8 MQPR:$src))), (v8f16 (MVE_VREV16_8 MQPR:$src))>;
7456 def : Pat<(v8i16 (bitconvert (v2f64 MQPR:$src))), (v8i16 (MVE_VREV64_16 MQPR:$src))>;
7457 def : Pat<(v8i16 (bitconvert (v2i64 MQPR:$src))), (v8i16 (MVE_VREV64_16 MQPR:$src))>;
7458 def : Pat<(v8i16 (bitconvert (v4f32 MQPR:$src))), (v8i16 (MVE_VREV32_16 MQPR:$src))>;
7459 def : Pat<(v8i16 (bitconvert (v4i32 MQPR:$src))), (v8i16 (MVE_VREV32_16 MQPR:$src))>;
7460 def : Pat<(v8i16 (bitconvert (v16i8 MQPR:$src))), (v8i16 (MVE_VREV16_8 MQPR:$src))>;
7462 def : Pat<(v16i8 (bitconvert (v2f64 MQPR:$src))), (v16i8 (MVE_VREV64_8 MQPR:$src))>;
7463 def : Pat<(v16i8 (bitconvert (v2i64 MQPR:$src))), (v16i8 (MVE_VREV64_8 MQPR:$src))>;
7464 def : Pat<(v16i8 (bitconvert (v4f32 MQPR:$src))), (v16i8 (MVE_VREV32_8 MQPR:$src))>;
7465 def : Pat<(v16i8 (bitconvert (v4i32 MQPR:$src))), (v16i8 (MVE_VREV32_8 MQPR:$src))>;
7466 def : Pat<(v16i8 (bitconvert (v8f16 MQPR:$src))), (v16i8 (MVE_VREV16_8 MQPR:$src))>;
7467 def : Pat<(v16i8 (bitconvert (v8i16 MQPR:$src))), (v16i8 (MVE_VREV16_8 MQPR:$src))>;